diff --git a/.gitignore b/.gitignore
index 3d01925..f1fe27b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1 @@
-SOURCES/mesa-19.3.0-rc4.tar.xz
+SOURCES/mesa-20.1.2.tar.xz
diff --git a/.mesa.metadata b/.mesa.metadata
index 0dd6d67..87f9f7f 100644
--- a/.mesa.metadata
+++ b/.mesa.metadata
@@ -1 +1 @@
-f4eeb09a7dece984364a509154170a85deee9ea0 SOURCES/mesa-19.3.0-rc4.tar.xz
+b90fe9ca8c3bdad043e86cd1af93bcf83e1da3fb SOURCES/mesa-20.1.2.tar.xz
diff --git a/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch b/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch
new file mode 100644
index 0000000..30fc63d
--- /dev/null
+++ b/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch
@@ -0,0 +1,45 @@
+From fcf3f45728a22250ad15db7e230545147fc28c2e Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 29 Jun 2020 14:59:20 +1000
+Subject: [PATCH] gallivm/nir: fix big-endian 64-bit splitting/merging.
+
+The shuffles need to be swapped to do this properly on big-endian
+---
+ src/gallium/auxiliary/gallivm/lp_bld_nir.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+index f14475e839d..2c4135ccc05 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+@@ -353,8 +353,13 @@ static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
+    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
+    int len = bld_base->base.type.length * 2;
+    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
++#if UTIL_ARCH_LITTLE_ENDIAN
+       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
+       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
++#else
++      shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
++      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
++#endif
+    }
+ 
+    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
+@@ -378,8 +383,13 @@ merge_64bit(struct lp_build_nir_context *bld_base,
+    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
+ 
+    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
++#if UTIL_ARCH_LITTLE_ENDIAN
+       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
+       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
++#else
++      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
++      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
++#endif
+    }
+    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
+ }
+-- 
+2.26.2
+
diff --git a/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch b/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch
new file mode 100644
index 0000000..33c573f
--- /dev/null
+++ b/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch
@@ -0,0 +1,33 @@
+From ea7bf3941eeef8320c711a6f66b5e73077cc6e6b Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 29 Jun 2020 07:40:13 +1000
+Subject: [PATCH] gallivm/nir: fix const loading on big endian systems
+
+The code was expecting the lower 32-bits of the 64-bit to be
+what it wanted, don't be implicit, pull the value from the union.
+
+This should fix rendering on big endian systems since NIR was
+introduced.
+
+Fixes: 44a6b0107b37 ("gallivm: add nir->llvm translation (v2)")
+Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
+---
+ src/gallium/auxiliary/gallivm/lp_bld_nir.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+index 9aa582a0e8a..f14475e839d 100644
+--- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
++++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
+@@ -865,7 +865,7 @@ static void visit_load_const(struct lp_build_nir_context *bld_base,
+    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
+    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
+    for (unsigned i = 0; i < instr->def.num_components; i++)
+-      result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->value[i].u64);
++      result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
+    assign_ssa_dest(bld_base, &instr->def, result);
+ }
+ 
+-- 
+2.26.2
+
diff --git a/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch b/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch
new file mode 100644
index 0000000..4e37ce3
--- /dev/null
+++ b/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch
@@ -0,0 +1,81 @@
+From 5fc0b580cecb1529659d5d3719412fb7cbffac0d Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Mon, 29 Jun 2020 13:26:56 +1000
+Subject: [PATCH] glsl: fix constant packing for 64-bit big endian.
+
+In a piglit run on s390 a lot of double tests fail, explicitly
+packing/shifting things rather than using memcpy seems to help
+---
+ src/compiler/glsl/ir_constant_expression.cpp | 15 +++++++++++++++
+ src/compiler/glsl/ir_expression_operation.py | 20 ++++++++++----------
+ 2 files changed, 25 insertions(+), 10 deletions(-)
+
+diff --git a/src/compiler/glsl/ir_constant_expression.cpp b/src/compiler/glsl/ir_constant_expression.cpp
+index 636196886b3..595cc821797 100644
+--- a/src/compiler/glsl/ir_constant_expression.cpp
++++ b/src/compiler/glsl/ir_constant_expression.cpp
+@@ -452,6 +452,21 @@ isub64_saturate(int64_t a, int64_t b)
+    return a - b;
+ }
+ 
++static uint64_t
++pack_2x32(uint32_t a, uint32_t b)
++{
++   uint64_t v = a;
++   v |= (uint64_t)b << 32;
++   return v;
++}
++
++static void
++unpack_2x32(uint64_t p, uint32_t *a, uint32_t *b)
++{
++   *a = p & 0xffffffff;
++   *b = (p >> 32);
++}
++
+ /**
+  * Get the constant that is ultimately referenced by an r-value, in a constant
+  * expression evaluation context.
+diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py
+index d2c4d41024f..1c4e6b358e1 100644
+--- a/src/compiler/glsl/ir_expression_operation.py
++++ b/src/compiler/glsl/ir_expression_operation.py
+@@ -560,14 +560,14 @@ ir_expression_operation = [
+    operation("saturate", 1, printable_name="sat", source_types=(float_type,), c_expression="CLAMP({src0}, 0.0f, 1.0f)"),
+ 
+    # Double packing, part of ARB_gpu_shader_fp64.
+-   operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
+-   operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+ 
+    # Sampler/Image packing, part of ARB_bindless_texture.
+-   operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
+-   operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
+-   operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
+-   operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+ 
+    operation("frexp_sig", 1),
+    operation("frexp_exp", 1),
+@@ -592,10 +592,10 @@ ir_expression_operation = [
+    operation("ssbo_unsized_array_length", 1),
+ 
+    # 64-bit integer packing ops.
+-   operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="memcpy(&data.i64[0], &op[0]->value.i[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
+-   operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
+-   operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="memcpy(&data.i[0], &op[0]->value.i64[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
+-   operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
++   operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
+ 
+    operation("add", 2, printable_name="+", source_types=numeric_types, c_expression="{src0} + {src1}", flags=vector_scalar_operation),
+    operation("sub", 2, printable_name="-", source_types=numeric_types, c_expression="{src0} - {src1}", flags=vector_scalar_operation),
+-- 
+2.26.2
+
diff --git a/SOURCES/0001-llvmpipe-ppc-fix-if-ifdef-confusion-in-backport.patch b/SOURCES/0001-llvmpipe-ppc-fix-if-ifdef-confusion-in-backport.patch
deleted file mode 100644
index c2d5a67..0000000
--- a/SOURCES/0001-llvmpipe-ppc-fix-if-ifdef-confusion-in-backport.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 27d0c526ec926de8eca10917b4a1b68080f45187 Mon Sep 17 00:00:00 2001
-From: Dave Airlie <airlied@redhat.com>
-Date: Thu, 21 Nov 2019 05:53:03 +1000
-Subject: [PATCH] llvmpipe/ppc: fix if/ifdef confusion in backport.
-
-Fixes: 32aba91c07f (llvmpipe: use ppc64le/ppc64 Large code model for JIT-compiled shaders)
----
- src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
-index ee27f346254..89d3fb9133b 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
-@@ -469,7 +469,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
-     */
-    builder.setCodeModel(CodeModel::Large);
- 
--#if PIPE_ARCH_LITTLE_ENDIAN
-+#ifdef PIPE_ARCH_LITTLE_ENDIAN
-    /*
-     * Versions of LLVM prior to 4.0 lacked a table entry for "POWER8NVL",
-     * resulting in (big-endian) "generic" being returned on
--- 
-2.21.0
-
diff --git a/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch b/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch
new file mode 100644
index 0000000..0daf825
--- /dev/null
+++ b/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch
@@ -0,0 +1,34 @@
+From d3ec950f0d8492b980a91844ffd744d7e7824277 Mon Sep 17 00:00:00 2001
+From: Ben Skeggs <bskeggs@redhat.com>
+Date: Sat, 6 Jun 2020 16:58:00 +1000
+Subject: [PATCH] nir: use bitfield_insert instead of bfi in
+ nir_lower_double_ops
+
+NVIDIA hardware doesn't have an equivilant to bfi, but we do already have
+a lowering for bitfield_insert->bfi.
+
+Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
+Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
+Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5373>
+---
+ src/compiler/nir/nir_lower_double_ops.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c
+index f9c93a910a5..73226fd62ef 100644
+--- a/src/compiler/nir/nir_lower_double_ops.c
++++ b/src/compiler/nir/nir_lower_double_ops.c
+@@ -49,7 +49,9 @@ set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp)
+    /* The exponent is bits 52-62, or 20-30 of the high word, so set the exponent
+     * to 1023
+     */
+-   nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x7ff00000), exp, hi);
++   nir_ssa_def *new_hi = nir_bitfield_insert(b, hi, exp,
++                                             nir_imm_int(b, 20),
++                                             nir_imm_int(b, 11));
+    /* recombine */
+    return nir_pack_64_2x32_split(b, lo, new_hi);
+ }
+-- 
+2.26.2
+
diff --git a/SOURCES/Makefile b/SOURCES/Makefile
index a3b6b3b..c431c49 100644
--- a/SOURCES/Makefile
+++ b/SOURCES/Makefile
@@ -1,4 +1,4 @@
-VERSION ?= 19.3.0-rc4
+VERSION ?= 20.1.2
 SANITIZE ?= 1
 
 DIRNAME = mesa-${VERSION}
@@ -10,7 +10,7 @@ clean:
 	rm -f mesa-${VERSION}.tar.xz
 
 clone: clean
-	curl -O https://mesa.freedesktop.org/archive/mesa-${VERSION}.tar.xz
+	curl -O https://archive.mesa3d.org/mesa-${VERSION}.tar.xz
 	tar xf mesa-${VERSION}.tar.xz
 
 sanitize: clone vl_mpeg12_decoder.c vl_decoder.c
diff --git a/SOURCES/nouveau-tu1xx-support.patch b/SOURCES/nouveau-tu1xx-support.patch
new file mode 100644
index 0000000..1134f43
--- /dev/null
+++ b/SOURCES/nouveau-tu1xx-support.patch
@@ -0,0 +1,9921 @@
+diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
+index 6c360992a53..9de8168fbd9 100644
+--- a/src/gallium/drivers/nouveau/Makefile.sources
++++ b/src/gallium/drivers/nouveau/Makefile.sources
+@@ -151,6 +151,14 @@ NVC0_CODEGEN_SOURCES := \
+ 	codegen/nv50_ir_target_nvc0.h
+ 
+ NVC0_C_SOURCES := \
++	nvc0/cla0c0qmd.h \
++	nvc0/clc0c0qmd.h \
++	nvc0/clc3c0qmd.h \
++	nvc0/drf.h \
++	nvc0/qmd.h \
++	nvc0/qmda0c0.c \
++	nvc0/qmdc0c0.c \
++	nvc0/qmdc3c0.c \
+ 	nvc0/gm107_texture.xml.h \
+ 	nvc0/nvc0_3d.xml.h \
+ 	nvc0/nvc0_compute.c \
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+index 42ee969c66b..d58c0d206ec 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+@@ -67,8 +67,10 @@ enum operation
+    OP_AND,
+    OP_OR,
+    OP_XOR,
++   OP_LOP3_LUT,
+    OP_SHL,
+    OP_SHR,
++   OP_SHF,
+    OP_MAX,
+    OP_MIN,
+    OP_SAT, // CLAMP(f32, 0.0, 1.0)
+@@ -116,6 +118,7 @@ enum operation
+    OP_PINTERP,
+    OP_EMIT,    // emit vertex
+    OP_RESTART, // restart primitive
++   OP_FINAL, // finish emitting primitives
+    OP_TEX,
+    OP_TXB, // texture bias
+    OP_TXL, // texure lod
+@@ -151,7 +154,10 @@ enum operation
+    OP_INSBF,  // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
+    OP_EXTBF,  // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
+    OP_BFIND,  // find highest/lowest set bit
++   OP_BREV,   // bitfield reverse
++   OP_BMSK,   // bitfield mask
+    OP_PERMT,  // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
++   OP_SGXT,
+    OP_ATOM,
+    OP_BAR,    // execution barrier, sources = { id, thread count, predicate }
+    OP_VADD,   // byte/word vector operations
+@@ -167,6 +173,7 @@ enum operation
+    OP_SHFL, // warp shuffle
+    OP_VOTE,
+    OP_BUFQ, // buffer query
++   OP_WARPSYNC,
+    OP_LAST
+ };
+ 
+@@ -254,11 +261,29 @@ enum operation
+ #define NV50_IR_SUBOP_VOTE_ALL 0
+ #define NV50_IR_SUBOP_VOTE_ANY 1
+ #define NV50_IR_SUBOP_VOTE_UNI 2
++#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0
++#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc
++#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa
++#define NV50_IR_SUBOP_LOP3_LUT(exp) ({         \
++      uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \
++      uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \
++      uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \
++      (uint8_t)(exp);                          \
++})
++#define NV50_IR_SUBOP_BMSK_C (0 << 0)
++#define NV50_IR_SUBOP_BMSK_W (1 << 0)
+ 
+ #define NV50_IR_SUBOP_MINMAX_LOW  1
+ #define NV50_IR_SUBOP_MINMAX_MED  2
+ #define NV50_IR_SUBOP_MINMAX_HIGH 3
+ 
++#define NV50_IR_SUBOP_SHF_L  (0 << 0)
++#define NV50_IR_SUBOP_SHF_R  (1 << 0)
++#define NV50_IR_SUBOP_SHF_LO (0 << 1)
++#define NV50_IR_SUBOP_SHF_HI (1 << 1)
++#define NV50_IR_SUBOP_SHF_C  (0 << 2)
++#define NV50_IR_SUBOP_SHF_W  (1 << 2)
++
+ // xmad(src0, src1, 0) << 16 + src2
+ #define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
+ // (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
+@@ -900,7 +925,7 @@ public:
+ 
+    uint16_t subOp; // quadop, 1 for mul-high, etc.
+ 
+-   unsigned encSize    : 4; // encoding size in bytes
++   unsigned encSize    : 5; // encoding size in bytes
+    unsigned saturate   : 1; // to [0.0f, 1.0f]
+    unsigned join       : 1; // converge control flow (use OP_JOIN until end)
+    unsigned fixed      : 1; // prevent dead code elimination
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+index 5dc0e24c5dc..63ea7f5e7e8 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+@@ -29,6 +29,8 @@
+ #include "tgsi/tgsi_parse.h"
+ #include "tgsi/tgsi_scan.h"
+ 
++struct nir_shader_compiler_options;
++
+ /*
+  * This struct constitutes linkage information in TGSI terminology.
+  *
+@@ -70,10 +72,12 @@ struct nv50_ir_prog_symbol
+    uint32_t offset;
+ };
+ 
++#define NVISA_GF100_CHIPSET    0xc0
+ #define NVISA_GK104_CHIPSET    0xe0
+ #define NVISA_GK20A_CHIPSET    0xea
+ #define NVISA_GM107_CHIPSET    0x110
+ #define NVISA_GM200_CHIPSET    0x120
++#define NVISA_GV100_CHIPSET    0x140
+ 
+ struct nv50_ir_prog_info
+ {
+@@ -200,6 +204,9 @@ struct nv50_ir_prog_info
+ extern "C" {
+ #endif
+ 
++const struct nir_shader_compiler_options *
++nv50_ir_nir_shader_compiler_options(int chipset);
++
+ extern int nv50_ir_generate_code(struct nv50_ir_prog_info *);
+ 
+ extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+index e244bd0d610..dd8e1ab86c4 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+@@ -23,6 +23,7 @@
+  */
+ 
+ #include "codegen/nv50_ir_target_gm107.h"
++#include "codegen/nv50_ir_sched_gm107.h"
+ 
+ //#define GM107_DEBUG_SCHED_DATA
+ 
+@@ -170,6 +171,7 @@ private:
+    void emitBFI();
+    void emitBFE();
+    void emitFLO();
++   void emitPRMT();
+ 
+    void emitLDSTs(int, DataType);
+    void emitLDSTc(int);
+@@ -2371,6 +2373,33 @@ CodeEmitterGM107::emitFLO()
+    emitGPR  (0x00, insn->def(0));
+ }
+ 
++void
++CodeEmitterGM107::emitPRMT()
++{
++   switch (insn->src(1).getFile()) {
++   case FILE_GPR:
++      emitInsn(0x5bc00000);
++      emitGPR (0x14, insn->src(1));
++      break;
++   case FILE_MEMORY_CONST:
++      emitInsn(0x4bc00000);
++      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
++      break;
++   case FILE_IMMEDIATE:
++      emitInsn(0x36c00000);
++      emitIMMD(0x14, 19, insn->src(1));
++      break;
++   default:
++      assert(!"bad src1 file");
++      break;
++   }
++
++   emitField(0x30, 3, insn->subOp);
++   emitGPR  (0x27, insn->src(2));
++   emitGPR  (0x08, insn->src(0));
++   emitGPR  (0x00, insn->def(0));
++}
++
+ /*******************************************************************************
+  * memory
+  ******************************************************************************/
+@@ -3537,6 +3566,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
+    case OP_BFIND:
+       emitFLO();
+       break;
++   case OP_PERMT:
++      emitPRMT();
++      break;
+    case OP_SLCT:
+       if (isFloatType(insn->dType))
+          emitFCMP();
+@@ -3742,156 +3774,6 @@ CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
+  * sched data calculator
+  ******************************************************************************/
+ 
+-class SchedDataCalculatorGM107 : public Pass
+-{
+-public:
+-   SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
+-
+-private:
+-   struct RegScores
+-   {
+-      struct ScoreData {
+-         int r[256];
+-         int p[8];
+-         int c;
+-      } rd, wr;
+-      int base;
+-
+-      void rebase(const int base)
+-      {
+-         const int delta = this->base - base;
+-         if (!delta)
+-            return;
+-         this->base = 0;
+-
+-         for (int i = 0; i < 256; ++i) {
+-            rd.r[i] += delta;
+-            wr.r[i] += delta;
+-         }
+-         for (int i = 0; i < 8; ++i) {
+-            rd.p[i] += delta;
+-            wr.p[i] += delta;
+-         }
+-         rd.c += delta;
+-         wr.c += delta;
+-      }
+-      void wipe()
+-      {
+-         memset(&rd, 0, sizeof(rd));
+-         memset(&wr, 0, sizeof(wr));
+-      }
+-      int getLatest(const ScoreData& d) const
+-      {
+-         int max = 0;
+-         for (int i = 0; i < 256; ++i)
+-            if (d.r[i] > max)
+-               max = d.r[i];
+-         for (int i = 0; i < 8; ++i)
+-            if (d.p[i] > max)
+-               max = d.p[i];
+-         if (d.c > max)
+-            max = d.c;
+-         return max;
+-      }
+-      inline int getLatestRd() const
+-      {
+-         return getLatest(rd);
+-      }
+-      inline int getLatestWr() const
+-      {
+-         return getLatest(wr);
+-      }
+-      inline int getLatest() const
+-      {
+-         return MAX2(getLatestRd(), getLatestWr());
+-      }
+-      void setMax(const RegScores *that)
+-      {
+-         for (int i = 0; i < 256; ++i) {
+-            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
+-            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
+-         }
+-         for (int i = 0; i < 8; ++i) {
+-            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
+-            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
+-         }
+-         rd.c = MAX2(rd.c, that->rd.c);
+-         wr.c = MAX2(wr.c, that->wr.c);
+-      }
+-      void print(int cycle)
+-      {
+-         for (int i = 0; i < 256; ++i) {
+-            if (rd.r[i] > cycle)
+-               INFO("rd $r%i @ %i\n", i, rd.r[i]);
+-            if (wr.r[i] > cycle)
+-               INFO("wr $r%i @ %i\n", i, wr.r[i]);
+-         }
+-         for (int i = 0; i < 8; ++i) {
+-            if (rd.p[i] > cycle)
+-               INFO("rd $p%i @ %i\n", i, rd.p[i]);
+-            if (wr.p[i] > cycle)
+-               INFO("wr $p%i @ %i\n", i, wr.p[i]);
+-         }
+-         if (rd.c > cycle)
+-            INFO("rd $c @ %i\n", rd.c);
+-         if (wr.c > cycle)
+-            INFO("wr $c @ %i\n", wr.c);
+-      }
+-   };
+-
+-   RegScores *score; // for current BB
+-   std::vector<RegScores> scoreBoards;
+-
+-   const TargetGM107 *targ;
+-   bool visit(Function *);
+-   bool visit(BasicBlock *);
+-
+-   void commitInsn(const Instruction *, int);
+-   int calcDelay(const Instruction *, int) const;
+-   void setDelay(Instruction *, int, const Instruction *);
+-   void recordWr(const Value *, int, int);
+-   void checkRd(const Value *, int, int&) const;
+-
+-   inline void emitYield(Instruction *);
+-   inline void emitStall(Instruction *, uint8_t);
+-   inline void emitReuse(Instruction *, uint8_t);
+-   inline void emitWrDepBar(Instruction *, uint8_t);
+-   inline void emitRdDepBar(Instruction *, uint8_t);
+-   inline void emitWtDepBar(Instruction *, uint8_t);
+-
+-   inline int getStall(const Instruction *) const;
+-   inline int getWrDepBar(const Instruction *) const;
+-   inline int getRdDepBar(const Instruction *) const;
+-   inline int getWtDepBar(const Instruction *) const;
+-
+-   void setReuseFlag(Instruction *);
+-
+-   inline void printSchedInfo(int, const Instruction *) const;
+-
+-   struct LiveBarUse {
+-      LiveBarUse(Instruction *insn, Instruction *usei)
+-         : insn(insn), usei(usei) { }
+-      Instruction *insn;
+-      Instruction *usei;
+-   };
+-
+-   struct LiveBarDef {
+-      LiveBarDef(Instruction *insn, Instruction *defi)
+-         : insn(insn), defi(defi) { }
+-      Instruction *insn;
+-      Instruction *defi;
+-   };
+-
+-   bool insertBarriers(BasicBlock *);
+-
+-   bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
+-   Instruction *findFirstUse(const Instruction *) const;
+-   Instruction *findFirstDef(const Instruction *) const;
+-
+-   bool needRdDepBar(const Instruction *) const;
+-   bool needWrDepBar(const Instruction *) const;
+-};
+-
+ inline void
+ SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
+ {
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
+new file mode 100644
+index 00000000000..0fbd47ccf88
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
+@@ -0,0 +1,2011 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#include "codegen/nv50_ir_emit_gv100.h"
++#include "codegen/nv50_ir_sched_gm107.h"
++
++namespace nv50_ir {
++
++/*******************************************************************************
++ * instruction format helpers
++ ******************************************************************************/
++
++#define FA_NODEF (1 << 0)
++#define FA_RRR   (1 << 1)
++#define FA_RRI   (1 << 2)
++#define FA_RRC   (1 << 3)
++#define FA_RIR   (1 << 4)
++#define FA_RCR   (1 << 5)
++
++#define FA_SRC_MASK 0x0ff
++#define FA_SRC_NEG  0x100
++#define FA_SRC_ABS  0x200
++
++#define EMPTY -1
++#define __(a) (a) // no source modifiers
++#define _A(a) ((a) | FA_SRC_ABS)
++#define N_(a) ((a) | FA_SRC_NEG)
++#define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS)
++
++void
++CodeEmitterGV100::emitFormA_I32(int src)
++{
++   emitIMMD(32, 32, insn->src(src));
++   if (insn->src(src).mod.abs())
++      code[1] &= 0x7fffffff;
++   if (insn->src(src).mod.neg())
++      code[1] ^= 0x80000000;
++}
++
++void
++CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2)
++{
++   emitInsn(op);
++   if (src1 >= 0) {
++      emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
++      emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
++      emitGPR (64, insn->src(src1 & FA_SRC_MASK));
++   }
++   if (src2 >= 0) {
++      emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
++      emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
++      emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK));
++   }
++}
++
++void
++CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2)
++{
++   emitInsn(op);
++   if (src1 >= 0) {
++      emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
++      emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
++      emitGPR (64, insn->src(src1 & FA_SRC_MASK));
++   }
++   if (src2 >= 0)
++      emitFormA_I32(src2 & FA_SRC_MASK);
++}
++
++void
++CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2)
++{
++   emitInsn(op);
++   if (src2 >= 0) {
++      emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
++      emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
++      emitGPR (64, insn->src(src2 & FA_SRC_MASK));
++   }
++
++   if (src1 >= 0) {
++      emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
++      emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
++      emitGPR (32, insn->src(src1 & FA_SRC_MASK));
++   }
++}
++
++void
++CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms,
++                            int src0, int src1, int src2)
++{
++   switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) {
++   case FILE_GPR:
++      switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) {
++      case FILE_GPR:
++         assert(forms & FA_RRR);
++         emitFormA_RRR((1 << 9) | op, src1, src2);
++         break;
++      case FILE_IMMEDIATE:
++         assert(forms & FA_RRI);
++         emitFormA_RRI((2 << 9) | op, src1, src2);
++         break;
++      case FILE_MEMORY_CONST:
++         assert(forms & FA_RRC);
++         emitFormA_RRC((3 << 9) | op, src1, src2);
++         break;
++      default:
++         assert(!"bad src2 file");
++         break;
++      }
++      break;
++   case FILE_IMMEDIATE:
++      assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
++      assert(forms & FA_RIR);
++      emitFormA_RRI((4 << 9) | op, src2, src1);
++      break;
++   case FILE_MEMORY_CONST:
++      assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
++      assert(forms & FA_RCR);
++      emitFormA_RRC((5 << 9) | op, src2, src1);
++      break;
++   default:
++      assert(!"bad src1 file");
++      break;
++   }
++
++   if (src0 >= 0) {
++      assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR);
++      emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS));
++      emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG));
++      emitGPR(24, insn->src(src0 & FA_SRC_MASK));
++   }
++
++   if (!(forms & FA_NODEF))
++      emitGPR(16, insn->def(0));
++}
++
++/*******************************************************************************
++ * control
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitBRA()
++{
++   const FlowInstruction *insn = this->insn->asFlow();
++   int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4;
++
++   assert(!insn->indirect && !insn->absolute);
++
++   emitInsn (0x947);
++   emitField(34, 48, target);
++   emitPRED (87);
++   emitField(86, 2, 0); // ./.INC/.DEC
++}
++
++void
++CodeEmitterGV100::emitEXIT()
++{
++   emitInsn (0x94d);
++   emitNOT  (90);
++   emitPRED (87);
++   emitField(85, 1, 0); // .NO_ATEXIT
++   emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3
++}
++
++void
++CodeEmitterGV100::emitKILL()
++{
++   emitInsn(0x95b);
++   emitPRED(87);
++}
++
++void
++CodeEmitterGV100::emitNOP()
++{
++   emitInsn(0x918);
++}
++
++void
++CodeEmitterGV100::emitWARPSYNC()
++{
++   emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
++   emitNOT  (90);
++   emitPRED (87);
++}
++
++/*******************************************************************************
++ * movement / conversion
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitCS2R()
++{
++   emitInsn(0x805);
++   emitSYS (72, insn->src(0));
++   emitGPR (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitF2F()
++{
++   if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
++      emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
++   else
++      emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
++   emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
++   emitFMZ  (80, 1);
++   emitRND  (78);
++   emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
++   emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3
++}
++
++void
++CodeEmitterGV100::emitF2I()
++{
++   if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
++      emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
++   else
++      emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
++   emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
++   emitFMZ  (80, 1);
++   emitRND  (78);
++   emitField(77, 1, 0); // .NTZ
++   emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
++   emitField(72, 1, isSignedType(insn->dType));
++}
++
++void
++CodeEmitterGV100::emitFRND()
++{
++   int subop = 0;
++
++   switch (insn->op) {
++   case OP_CVT:
++      switch (insn->rnd) {
++      case ROUND_NI: subop = 0; break;
++      case ROUND_MI: subop = 1; break;
++      case ROUND_PI: subop = 2; break;
++      case ROUND_ZI: subop = 3; break;
++      default:
++         assert(!"invalid FRND mode");
++         break;
++      }
++      break;
++   case OP_FLOOR: subop = 1; break;
++   case OP_CEIL : subop = 2; break;
++   case OP_TRUNC: subop = 3; break;
++   default:
++      assert(!"invalid FRND opcode");
++      break;
++   }
++
++   if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
++      emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
++   else
++      emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
++   emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
++   emitFMZ  (80, 1);
++   emitField(78, 2, subop);
++   emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
++}
++
++void
++CodeEmitterGV100::emitI2F()
++{
++   if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
++      emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
++   else
++      emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
++   emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
++   emitRND  (78);
++   emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
++   emitField(74, 1, isSignedType(insn->sType));
++   if (typeSizeof(insn->sType) == 2)
++      emitField(60, 2, insn->subOp >> 1);
++   else
++      emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3
++}
++
++void
++CodeEmitterGV100::emitMOV()
++{
++   switch (insn->def(0).getFile()) {
++   case FILE_GPR:
++      switch (insn->src(0).getFile()) {
++      case FILE_GPR:
++      case FILE_MEMORY_CONST:
++      case FILE_IMMEDIATE:
++         emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
++         emitField(72, 4, insn->lanes);
++         break;
++      case FILE_PREDICATE:
++         emitInsn (0x807);
++         emitGPR  (16, insn->def(0));
++         emitGPR  (24);
++         emitField(32, 32, 0xffffffff);
++         emitField(90,  1, 1);
++         emitPRED (87, insn->src(0));
++         break;
++      default:
++         assert(!"bad src file");
++         break;
++      }
++      break;
++   case FILE_PREDICATE:
++      emitInsn (0x20c);
++      emitPRED (87);
++      emitPRED (84);
++      emitNOT  (71);
++      emitPRED (68);
++      emitPRED (81, insn->def(0));
++      emitCond3(76, CC_NE);
++      emitGPR  (24, insn->src(0));
++      emitGPR  (32);
++      break;
++   default:
++      assert(!"bad dst file");
++      break;
++   }
++}
++
++void
++CodeEmitterGV100::emitPRMT()
++{
++   emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
++   emitField(72, 3, insn->subOp);
++}
++
++void
++CodeEmitterGV100::emitS2R()
++{
++   emitInsn(0x919);
++   emitSYS (72, insn->src(0));
++   emitGPR (16, insn->def(0));
++}
++
++static void
++selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
++{
++   int loc = entry->loc;
++   if (data.force_persample_interp)
++      code[loc + 2] |= 1 << 26;
++   else
++      code[loc + 2] &= ~(1 << 26);
++}
++
++void
++CodeEmitterGV100::emitSEL()
++{
++   emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
++   emitNOT  (90, insn->src(2));
++   emitPRED (87, insn->src(2));
++   if (insn->subOp == 1)
++      addInterp(0, 0, selpFlip);
++}
++
++void
++CodeEmitterGV100::emitSHFL()
++{
++   switch (insn->src(1).getFile()) {
++   case FILE_GPR:
++      switch (insn->src(2).getFile()) {
++      case FILE_GPR:
++         emitInsn(0x389);
++         emitGPR (64, insn->src(2));
++         break;
++      case FILE_IMMEDIATE:
++         emitInsn(0x589);
++         emitIMMD(40, 13, insn->src(2));
++         break;
++      default:
++         assert(!"bad src2 file");
++         break;
++      }
++      emitGPR(32, insn->src(1));
++      break;
++   case FILE_IMMEDIATE:
++      switch (insn->src(2).getFile()) {
++      case FILE_GPR:
++         emitInsn(0x989);
++         emitGPR (64, insn->src(2));
++         break;
++      case FILE_IMMEDIATE:
++         emitInsn(0xf89);
++         emitIMMD(40, 13, insn->src(2));
++         break;
++      default:
++         assert(!"bad src2 file");
++         break;
++      }
++      emitIMMD(53, 5, insn->src(1));
++      break;
++   default:
++      assert(!"bad src1 file");
++      break;
++   }
++
++   if (insn->defExists(1))
++      emitPRED(81, insn->def(1));
++   else
++      emitPRED(81);
++
++   emitField(58, 2, insn->subOp);
++   emitGPR  (24, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++/*******************************************************************************
++ * fp32
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitFADD()
++{
++   if (insn->src(1).getFile() == FILE_GPR)
++      emitFormA(0x021, FA_RRR         , NA(0), NA(1), EMPTY);
++   else
++      emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
++   emitFMZ  (80, 1);
++   emitRND  (78);
++   emitSAT  (77);
++}
++
++void
++CodeEmitterGV100::emitFFMA()
++{
++   emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
++   emitField(80, 1, insn->ftz);
++   emitRND  (78);
++   emitSAT  (77);
++   emitField(76, 1, insn->dnz);
++}
++
++void
++CodeEmitterGV100::emitFMNMX()
++{
++   emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
++   emitField(90, 1, insn->op == OP_MAX);
++   emitPRED (87);
++   emitFMZ  (80, 1);
++}
++
++void
++CodeEmitterGV100::emitFMUL()
++{
++   emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
++   emitField(80, 1, insn->ftz);
++   emitPDIV (84);
++   emitRND  (78);
++   emitSAT  (77);
++   emitField(76, 1, insn->dnz);
++}
++
++void
++CodeEmitterGV100::emitFSET_BF()
++{
++   const CmpInstruction *insn = this->insn->asCmp();
++
++   emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
++   emitFMZ  (80, 1);
++   emitCond4(76, insn->setCond);
++
++   if (insn->op != OP_SET) {
++      switch (insn->op) {
++      case OP_SET_AND: emitField(74, 2, 0); break;
++      case OP_SET_OR : emitField(74, 2, 1); break;
++      case OP_SET_XOR: emitField(74, 2, 2); break;
++      default:
++         assert(!"invalid set op");
++         break;
++      }
++      emitNOT (90, insn->src(2));
++      emitPRED(87, insn->src(2));
++   } else {
++      emitPRED(87);
++   }
++}
++
++void
++CodeEmitterGV100::emitFSETP()
++{
++   const CmpInstruction *insn = this->insn->asCmp();
++
++   emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
++   emitFMZ  (80, 1);
++   emitCond4(76, insn->setCond);
++
++   if (insn->op != OP_SET) {
++      switch (insn->op) {
++      case OP_SET_AND: emitField(74, 2, 0); break;
++      case OP_SET_OR : emitField(74, 2, 1); break;
++      case OP_SET_XOR: emitField(74, 2, 2); break;
++      default:
++         assert(!"invalid set op");
++         break;
++      }
++      emitNOT (90, insn->src(2));
++      emitPRED(87, insn->src(2));
++   } else {
++      emitPRED(87);
++   }
++
++   if (insn->defExists(1))
++      emitPRED(84, insn->def(1));
++   else
++      emitPRED(84);
++   emitPRED(81, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitFSWZADD()
++{
++   uint8_t subOp = 0;
++
++   // NP/PN swapped vs SM60
++   for (int i = 0; i < 4; i++) {
++      uint8_t p = ((insn->subOp >> (i * 2)) & 3);
++      if (p == 1 || p == 2)
++         p ^= 3;
++      subOp |= p << (i * 2);
++   }
++
++   emitInsn (0x822);
++   emitFMZ  (80, 1);
++   emitRND  (78);
++   emitField(77, 1, insn->lanes); /* abused for .ndv */
++   emitGPR  (64, insn->src(1));
++   emitField(32, 8, subOp);
++   emitGPR  (24, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitMUFU()
++{
++   int mufu = 0;
++
++   switch (insn->op) {
++   case OP_COS : mufu = 0; break;
++   case OP_SIN : mufu = 1; break;
++   case OP_EX2 : mufu = 2; break;
++   case OP_LG2 : mufu = 3; break;
++   case OP_RCP : mufu = 4 + 2 * insn->subOp; break;
++   case OP_RSQ : mufu = 5 + 2 * insn->subOp; break;
++   case OP_SQRT: mufu = 8; break;
++   default:
++      assert(!"invalid mufu");
++      break;
++   }
++
++   emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
++   emitField(74, 4, mufu);
++}
++
++/*******************************************************************************
++ * fp64
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitDADD()
++{
++   emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
++   emitRND(78);
++}
++
++void
++CodeEmitterGV100::emitDFMA()
++{
++   emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
++   emitRND(78);
++}
++
++void
++CodeEmitterGV100::emitDMUL()
++{
++   emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
++   emitRND(78);
++}
++
++void
++CodeEmitterGV100::emitDSETP()
++{
++   const CmpInstruction *insn = this->insn->asCmp();
++
++   if (insn->src(1).getFile() == FILE_GPR)
++      emitFormA(0x02a, FA_NODEF | FA_RRR         , NA(0), NA(1), EMPTY);
++   else
++      emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
++
++   if (insn->op != OP_SET) {
++      switch (insn->op) {
++      case OP_SET_AND: emitField(74, 2, 0); break;
++      case OP_SET_OR : emitField(74, 2, 1); break;
++      case OP_SET_XOR: emitField(74, 2, 2); break;
++      default:
++         assert(!"invalid set op");
++         break;
++      }
++      emitNOT (90, insn->src(2));
++      emitPRED(87, insn->src(2));
++   } else {
++      emitPRED(87);
++   }
++
++   if (insn->defExists(1))
++      emitPRED(84, insn->def(1));
++   else
++      emitPRED(84);
++   emitPRED (81, insn->def(0));
++   emitCond4(76, insn->setCond);
++}
++
++/*******************************************************************************
++ * integer
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitBMSK()
++{
++   emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
++   emitField(75, 1, insn->subOp); // .C/.W
++}
++
++void
++CodeEmitterGV100::emitBREV()
++{
++   emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
++}
++
++void
++CodeEmitterGV100::emitFLO()
++{
++   emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
++   emitPRED (81);
++   emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
++   emitField(73, 1, isSignedType(insn->dType));
++   emitNOT  (63, insn->src(0));
++}
++
++void
++CodeEmitterGV100::emitIABS()
++{
++   emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
++}
++
++void
++CodeEmitterGV100::emitIADD3()
++{
++//   emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2));
++   emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY);
++   emitGPR  (64); //XXX: fix when switching back to N_(2)
++   emitPRED (84, NULL); // .CC1
++   emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL);
++   if (insn->flagsSrc >= 0) {
++      emitField(74, 1, 1); // .X
++      emitPRED (87, insn->getSrc(insn->flagsSrc));
++      emitField(77, 4, 0xf); // .X1
++   }
++}
++
++void
++CodeEmitterGV100::emitIMAD()
++{
++   emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
++   emitField(73, 1, isSignedType(insn->sType));
++}
++
++void
++CodeEmitterGV100::emitIMAD_WIDE()
++{
++   emitFormA(0x025, FA_RRR |          FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
++   emitPRED (81);
++   emitField(73, 1, isSignedType(insn->sType));
++}
++
++void
++CodeEmitterGV100::emitISETP()
++{
++   const CmpInstruction *insn = this->insn->asCmp();
++
++   emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
++
++   if (insn->op != OP_SET) {
++      switch (insn->op) {
++      case OP_SET_AND: emitField(74, 2, 0); break;
++      case OP_SET_OR : emitField(74, 2, 1); break;
++      case OP_SET_XOR: emitField(74, 2, 2); break;
++      default:
++         assert(!"invalid set op");
++         break;
++      }
++      emitNOT (90, insn->src(2));
++      emitPRED(87, insn->src(2));
++   } else {
++      emitPRED(87);
++   }
++
++   //XXX: CC->pred
++   if (insn->flagsSrc >= 0) {
++      assert(0);
++      emitField(68, 4, 6);
++   } else {
++      emitNOT (71);
++      if (!insn->subOp)
++         emitPRED(68);
++   }
++
++   if (insn->defExists(1))
++      emitPRED(84, insn->def(1));
++   else
++      emitPRED(84);
++   emitPRED (81, insn->def(0));
++   emitCond3(76, insn->setCond);
++   emitField(73, 1, isSignedType(insn->sType));
++
++   if (insn->subOp) { // .EX
++      assert(0);
++      emitField(72, 1, 1);
++      emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2));
++   }
++}
++
++void
++CodeEmitterGV100::emitLEA()
++{
++   assert(insn->src(1).get()->asImm());
++
++   emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY);
++   emitPRED (81);
++   emitIMMD (75, 5, insn->src(1));
++   emitGPR  (64);
++}
++
++void
++CodeEmitterGV100::emitLOP3_LUT()
++{
++   emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2));
++   emitField(90, 1, 1);
++   emitPRED (87);
++   emitPRED (81);
++   emitField(80, 1, 0); // .PAND
++   emitField(72, 8, insn->subOp);
++}
++
++void
++CodeEmitterGV100::emitPOPC()
++{
++   emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
++   emitNOT  (63, insn->src(0));
++}
++
++void
++CodeEmitterGV100::emitSGXT()
++{
++   emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
++   emitField(75, 1, 0); // .W
++   emitField(73, 1, 1); // /.U32
++}
++
++void
++CodeEmitterGV100::emitSHF()
++{
++   emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
++   emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI));
++   emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R));
++   emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W));
++
++   switch (insn->sType) {
++   case TYPE_S64: emitField(73, 2, 0); break;
++   case TYPE_U64: emitField(73, 2, 1); break;
++   case TYPE_S32: emitField(73, 2, 2); break;
++   case TYPE_U32:
++   default:
++      emitField(73, 2, 3);
++      break;
++   }
++}
++
++/*******************************************************************************
++ * load/stores
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitALD()
++{
++   emitInsn (0x321);
++   emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
++   emitGPR  (32, insn->src(0).getIndirect(1));
++   emitO    (79);
++   emitP    (76);
++   emitADDR (24, 40, 10, 0, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitAST()
++{
++   emitInsn (0x322);
++   emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1);
++   emitGPR  (64, insn->src(0).getIndirect(1));
++   emitP    (76);
++   emitADDR (24, 40, 10, 0, insn->src(0));
++   emitGPR  (32, insn->src(1));
++}
++
++void
++CodeEmitterGV100::emitATOM()
++{
++   unsigned subOp, dType;
++
++   if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) {
++      emitInsn(0x38a);
++
++      if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
++         subOp = 8;
++      else
++         subOp = insn->subOp;
++      emitField(87, 4, subOp);
++
++      switch (insn->dType) {
++      case TYPE_U32 : dType = 0; break;
++      case TYPE_S32 : dType = 1; break;
++      case TYPE_U64 : dType = 2; break;
++      case TYPE_F32 : dType = 3; break;
++      case TYPE_B128: dType = 4; break;
++      case TYPE_S64 : dType = 5; break;
++      default:
++         assert(!"unexpected dType");
++         dType = 0;
++         break;
++      }
++      emitField(73, 3, dType);
++   } else {
++      emitInsn(0x38b);
++
++      switch (insn->dType) {
++      case TYPE_U32: dType = 0; break;
++      case TYPE_U64: dType = 2; break;
++      default:
++         assert(!"unexpected dType");
++         dType = 0;
++         break;
++      }
++      emitField(73, 3, dType);
++   }
++
++   emitPRED (81);
++   emitField(79, 2, 1);
++   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
++   emitGPR  (32, insn->src(1));
++   emitADDR (24, 40, 24, 0, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitATOMS()
++{
++   unsigned dType, subOp;
++
++   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
++      switch (insn->dType) {
++      case TYPE_U32: dType = 0; break;
++      case TYPE_S32: dType = 1; break;
++      case TYPE_U64: dType = 2; break;
++      default: assert(!"unexpected dType"); dType = 0; break;
++      }
++
++      emitInsn (0x38d);
++      emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST
++      emitField(73, 2, dType);
++      emitGPR  (64, insn->src(2));
++   } else {
++      emitInsn(0x38c);
++
++      if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
++         subOp = 8;
++      else
++         subOp = insn->subOp;
++      emitField(87, 4, subOp);
++
++      switch (insn->dType) {
++      case TYPE_U32: dType = 0; break;
++      case TYPE_S32: dType = 1; break;
++      case TYPE_U64: dType = 2; break;
++      default: assert(!"unexpected dType"); dType = 0; break;
++      }
++
++      emitField(73, 2, dType);
++   }
++
++   emitGPR  (32, insn->src(1));
++   emitADDR (24, 40, 24, 0, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitIPA()
++{
++   emitInsn (0x326);
++   emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL);
++
++   switch (insn->getInterpMode()) {
++   case NV50_IR_INTERP_LINEAR     :
++   case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break;
++   case NV50_IR_INTERP_FLAT       : emitField(78, 2, 1); break;
++   case NV50_IR_INTERP_SC         : emitField(78, 2, 2); break;
++   default:
++      assert(!"invalid ipa mode");
++      break;
++   }
++
++   if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) {
++      switch (insn->getSampleMode()) {
++      case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break;
++      case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break;
++      default:
++         break;
++      }
++      emitGPR  (32);
++   } else {
++      emitField(76, 2, 2);
++      emitGPR  (32, insn->src(1));
++   }
++
++   assert(!insn->src(0).isIndirect(0));
++   emitADDR (-1, 64, 8, 2, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitISBERD()
++{
++   emitInsn(0x923);
++   emitGPR (24, insn->src(0));
++   emitGPR (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitLDSTc(int pos)
++{
++   int mode = 0;
++
++   switch (insn->cache) {
++   case CACHE_CA: mode = 0; break;
++   case CACHE_CG: mode = 1; break;
++   case CACHE_CS: mode = 2; break;
++   case CACHE_CV: mode = 3; break;
++   default:
++      assert(!"invalid caching mode");
++      break;
++   }
++
++   emitField(pos, 2, mode);
++}
++
++void
++CodeEmitterGV100::emitLDSTs(int pos, DataType type)
++{
++   int data = 0;
++
++   switch (typeSizeof(type)) {
++   case  1: data = isSignedType(type) ? 1 : 0; break;
++   case  2: data = isSignedType(type) ? 3 : 2; break;
++   case  4: data = 4; break;
++   case  8: data = 5; break;
++   case 16: data = 6; break;
++   default:
++      assert(!"bad type");
++      break;
++   }
++
++   emitField(pos, 3, data);
++}
++
++void
++CodeEmitterGV100::emitLD()
++{
++   emitInsn (0x980);
++   emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO
++   emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
++   emitLDSTs(73, insn->dType);
++   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
++   emitADDR (24, 32, 32, 0, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitLDC()
++{
++   emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY);
++   emitField(78, 2, insn->subOp);
++   emitLDSTs(73, insn->dType);
++   emitGPR  (24, insn->src(0).getIndirect(0));
++}
++
++void
++CodeEmitterGV100::emitLDL()
++{
++   emitInsn (0x983);
++   emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
++   emitLDSTs(73, insn->dType);
++   emitADDR (24, 40, 24, 0, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitLDS()
++{
++   emitInsn (0x984);
++   emitLDSTs(73, insn->dType);
++   emitADDR (24, 40, 24, 0, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitOUT()
++{
++   const int cut  = insn->op == OP_RESTART || insn->subOp;
++   const int emit = insn->op == OP_EMIT;
++
++   if (insn->op != OP_FINAL)
++      emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY);
++   else
++      emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY);
++   emitField(78, 2, (cut << 1) | emit);
++}
++
++void
++CodeEmitterGV100::emitRED()
++{
++   unsigned dType;
++
++   switch (insn->dType) {
++   case TYPE_U32: dType = 0; break;
++   case TYPE_S32: dType = 1; break;
++   case TYPE_U64: dType = 2; break;
++   case TYPE_F32: dType = 3; break;
++   case TYPE_B128: dType = 4; break;
++   case TYPE_S64: dType = 5; break;
++   default: assert(!"unexpected dType"); dType = 0; break;
++   }
++
++   emitInsn (0x98e);
++   emitField(87, 3, insn->subOp);
++   emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
++   emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3
++   emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
++   emitField(73, 3, dType);
++   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
++   emitGPR  (32, insn->src(1));
++   emitADDR (24, 40, 24, 0, insn->src(0));
++}
++
++void
++CodeEmitterGV100::emitST()
++{
++   emitInsn (0x385);
++   emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO
++   emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
++   emitLDSTs(73, insn->dType);
++   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
++   emitGPR  (64, insn->src(1));
++   emitADDR (24, 32, 32, 0, insn->src(0));
++}
++
++void
++CodeEmitterGV100::emitSTL()
++{
++   emitInsn (0x387);
++   emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
++   emitLDSTs(73, insn->dType);
++   emitADDR (24, 40, 24, 0, insn->src(0));
++   emitGPR  (32, insn->src(1));
++}
++
++void
++CodeEmitterGV100::emitSTS()
++{
++   emitInsn (0x388);
++   emitLDSTs(73, insn->dType);
++   emitADDR (24, 40, 24, 0, insn->src(0));
++   emitGPR  (32, insn->src(1));
++}
++
++/*******************************************************************************
++ * texture
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitTEXs(int pos)
++{
++   int src1 = insn->predSrc == 1 ? 2 : 1;
++   if (insn->srcExists(src1))
++      emitGPR(pos, insn->src(src1));
++   else
++      emitGPR(pos);
++}
++
++void
++CodeEmitterGV100::emitTEX()
++{
++   const TexInstruction *insn = this->insn->asTex();
++   int lodm = 0;
++
++   if (!insn->tex.levelZero) {
++      switch (insn->op) {
++      case OP_TEX: lodm = 0; break;
++      case OP_TXB: lodm = 2; break;
++      case OP_TXL: lodm = 3; break;
++      default:
++         assert(!"invalid tex op");
++         break;
++      }
++   } else {
++      lodm = 1;
++   }
++
++   if (insn->tex.rIndirectSrc < 0) {
++      emitInsn (0xb60);
++      emitField(54, 5, prog->driver->io.auxCBSlot);
++      emitField(40, 14, insn->tex.r);
++   } else {
++      emitInsn (0x361);
++      emitField(59, 1, 1); // .B
++   }
++   emitField(90, 1, insn->tex.liveOnly); // .NODEP
++   emitField(87, 3, lodm);
++   emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
++   emitField(78, 1, insn->tex.target.isShadow()); // .DC
++   emitField(77, 1, insn->tex.derivAll); // .NDV
++   emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI
++   emitPRED (81);
++   emitGPR  (64, insn->def(1));
++   emitGPR  (16, insn->def(0));
++   emitGPR  (24, insn->src(0));
++   emitTEXs (32);
++   emitField(63, 1, insn->tex.target.isArray());
++   emitField(61, 2, insn->tex.target.isCube() ? 3 :
++                    insn->tex.target.getDim() - 1);
++   emitField(72, 4, insn->tex.mask);
++}
++
++void
++CodeEmitterGV100::emitTLD()
++{
++   const TexInstruction *insn = this->insn->asTex();
++
++   if (insn->tex.rIndirectSrc < 0) {
++      emitInsn (0xb66);
++      emitField(54, 5, prog->driver->io.auxCBSlot);
++      emitField(40, 14, insn->tex.r);
++   } else {
++      emitInsn (0x367);
++      emitField(59, 1, 1); // .B
++   }
++   emitField(90, 1, insn->tex.liveOnly);
++   emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */);
++   emitPRED (81);
++   emitField(78, 1, insn->tex.target.isMS());
++   emitField(76, 1, insn->tex.useOffsets == 1);
++   emitField(72, 4, insn->tex.mask);
++   emitGPR  (64, insn->def(1));
++   emitField(63, 1, insn->tex.target.isArray());
++   emitField(61, 2, insn->tex.target.isCube() ? 3 :
++                    insn->tex.target.getDim() - 1);
++   emitTEXs (32);
++   emitGPR  (24, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitTLD4()
++{
++   const TexInstruction *insn = this->insn->asTex();
++
++   if (insn->tex.rIndirectSrc < 0) {
++      emitInsn (0xb63);
++      emitField(54, 5, prog->driver->io.auxCBSlot);
++      emitField(40, 14, insn->tex.r);
++   } else {
++      emitInsn (0x364);
++      emitField(59, 1, 1); // .B
++   }
++   emitField(90, 1, insn->tex.liveOnly);
++   emitField(87, 2, insn->tex.gatherComp);
++   emitField(84, 1, 1); // !.EF
++   emitPRED (81);
++   emitField(78, 1, insn->tex.target.isShadow());
++   emitField(77, 2, insn->tex.useOffsets == 4);
++   emitField(76, 2, insn->tex.useOffsets == 1);
++   emitField(72, 4, insn->tex.mask);
++   emitGPR  (64, insn->def(1));
++   emitField(63, 1, insn->tex.target.isArray());
++   emitField(61, 2, insn->tex.target.isCube() ? 3 :
++                    insn->tex.target.getDim() - 1);
++   emitTEXs (32);
++   emitGPR  (24, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitTMML()
++{
++   const TexInstruction *insn = this->insn->asTex();
++
++   if (insn->tex.rIndirectSrc < 0) {
++      emitInsn (0xb69);
++      emitField(54, 5, prog->driver->io.auxCBSlot);
++      emitField(40, 14, insn->tex.r);
++   } else {
++      emitInsn (0x36a);
++      emitField(59, 1, 1); // .B
++   }
++   emitField(90, 1, insn->tex.liveOnly);
++   emitField(77, 1, insn->tex.derivAll);
++   emitField(72, 4, insn->tex.mask);
++   emitGPR  (64, insn->def(1));
++   emitField(63, 1, insn->tex.target.isArray());
++   emitField(61, 2, insn->tex.target.isCube() ? 3 :
++                    insn->tex.target.getDim() - 1);
++   emitTEXs (32);
++   emitGPR  (24, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitTXD()
++{
++   const TexInstruction *insn = this->insn->asTex();
++
++   if (insn->tex.rIndirectSrc < 0) {
++      emitInsn (0xb6c);
++      emitField(54, 5, prog->driver->io.auxCBSlot);
++      emitField(40, 14, insn->tex.r);
++   } else {
++      emitInsn (0x36d);
++      emitField(59, 1, 1); // .B
++   }
++   emitField(90, 1, insn->tex.liveOnly);
++   emitPRED (81);
++   emitField(76, 1, insn->tex.useOffsets == 1);
++   emitField(72, 4, insn->tex.mask);
++   emitGPR  (64, insn->def(1));
++   emitField(63, 1, insn->tex.target.isArray());
++   emitField(61, 2, insn->tex.target.isCube() ? 3 :
++                    insn->tex.target.getDim() - 1);
++   emitTEXs (32);
++   emitGPR  (24, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitTXQ()
++{
++   const TexInstruction *insn = this->insn->asTex();
++   int type = 0;
++
++   switch (insn->tex.query) {
++   case TXQ_DIMS           : type = 0x00; break;
++   case TXQ_TYPE           : type = 0x01; break;
++   case TXQ_SAMPLE_POSITION: type = 0x02; break;
++   default:
++      assert(!"invalid txq query");
++      break;
++   }
++
++   if (insn->tex.rIndirectSrc < 0) {
++      emitInsn (0xb6f);
++      emitField(54, 5, prog->driver->io.auxCBSlot);
++      emitField(40, 14, insn->tex.r);
++   } else {
++      emitInsn (0x370);
++      emitField(59, 1, 1); // .B
++   }
++   emitField(90, 1, insn->tex.liveOnly);
++   emitField(72, 4, insn->tex.mask);
++   emitGPR  (64, insn->def(1));
++   emitField(62, 2, type);
++   emitGPR  (24, insn->src(0));
++   emitGPR  (16, insn->def(0));
++}
++
++/*******************************************************************************
++ * surface
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitSUHandle(const int s)
++{
++   const TexInstruction *insn = this->insn->asTex();
++
++   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
++
++   if (insn->src(s).getFile() == FILE_GPR) {
++      emitGPR(64, insn->src(s));
++   } else {
++      assert(0);
++      //XXX: not done
++      ImmediateValue *imm = insn->getSrc(s)->asImm();
++      assert(imm);
++      emitField(0x33, 1, 1);
++      emitField(0x24, 13, imm->reg.data.u32);
++   }
++}
++
++void
++CodeEmitterGV100::emitSUTarget()
++{
++   const TexInstruction *insn = this->insn->asTex();
++   int target = 0;
++
++   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
++
++   if (insn->tex.target == TEX_TARGET_BUFFER) {
++      target = 1;
++   } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
++      target = 2;
++   } else if (insn->tex.target == TEX_TARGET_2D ||
++              insn->tex.target == TEX_TARGET_RECT) {
++      target = 3;
++   } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
++              insn->tex.target == TEX_TARGET_CUBE ||
++              insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
++      target = 4;
++   } else if (insn->tex.target == TEX_TARGET_3D) {
++      target = 5;
++   } else {
++      assert(insn->tex.target == TEX_TARGET_1D);
++   }
++   emitField(61, 3, target);
++}
++
++void
++CodeEmitterGV100::emitSUATOM()
++{
++   const TexInstruction *insn = this->insn->asTex();
++   uint8_t type = 0, subOp;
++
++   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
++      emitInsn(0x396);   // SUATOM.D.CAS
++   else
++      emitInsn(0x394);   // SUATOM.D
++
++   emitSUTarget();
++
++   // destination type
++   switch (insn->dType) {
++   case TYPE_S32: type = 1; break;
++   case TYPE_U64: type = 2; break;
++   case TYPE_F32: type = 3; break;
++   case TYPE_S64: type = 5; break;
++   default:
++      assert(insn->dType == TYPE_U32);
++      break;
++   }
++
++   // atomic operation
++   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
++      subOp = 0;
++   } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
++      subOp = 8;
++   } else {
++      subOp = insn->subOp;
++   }
++
++   emitField(87, 4, subOp);
++   emitPRED (81);
++   emitField(79, 2, 1);
++   emitField(73, 3, type);
++   emitField(72, 1, 0); // .BA
++   emitGPR  (32, insn->src(1));
++   emitGPR  (24, insn->src(0));
++   emitGPR  (16, insn->def(0));
++
++   emitSUHandle(2);
++}
++
++void
++CodeEmitterGV100::emitSULD()
++{
++   const TexInstruction *insn = this->insn->asTex();
++   int type = 0;
++
++   if (insn->op == OP_SULDB) {
++      emitInsn(0x99a);
++      emitSUTarget();
++
++      switch (insn->dType) {
++      case TYPE_U8:   type = 0; break;
++      case TYPE_S8:   type = 1; break;
++      case TYPE_U16:  type = 2; break;
++      case TYPE_S16:  type = 3; break;
++      case TYPE_U32:  type = 4; break;
++      case TYPE_U64:  type = 5; break;
++      case TYPE_B128: type = 6; break;
++      default:
++         assert(0);
++         break;
++      }
++   //   emitLDSTc(0x18);
++      emitField(73, 3, type);
++   } else {
++      emitInsn(0x998);
++      emitSUTarget();
++      emitField(72, 4, 0xf); // rgba
++   }
++
++   emitPRED (81);
++   emitField(79, 2, 1);
++
++   emitGPR  (16, insn->def(0));
++   emitGPR  (24, insn->src(0));
++
++   emitSUHandle(1);
++}
++
++void
++CodeEmitterGV100::emitSUST()
++{
++   const TexInstruction *insn = this->insn->asTex();
++
++   emitInsn(0x99c); // SUST.P
++#if 0
++   if (insn->op == OP_SUSTB)
++      emitField(0x34, 1, 1);
++#endif
++   emitSUTarget();
++
++
++#if 0
++   emitLDSTc(0x18);
++#endif
++
++   emitField(79, 2, 1);
++   emitField(72, 4, 0xf); // rgba
++   emitGPR(32, insn->src(1));
++   emitGPR(24, insn->src(0));
++   emitSUHandle(2);
++}
++
++/*******************************************************************************
++ * misc
++ ******************************************************************************/
++
++void
++CodeEmitterGV100::emitAL2P()
++{
++   emitInsn (0x920);
++   emitO    (79);
++   emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
++   emitField(40, 11, insn->src(0).get()->reg.data.offset);
++   emitGPR  (24, insn->src(0).getIndirect(0));
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitBAR()
++{
++   uint8_t subop, redop = 0x00;
++
++   // 80
++   //    01: DEFER_BLOCKING
++   // 78:77
++   //    00: SYNC
++   //    01: ARV
++   //    02: RED
++   //    03: SCAN
++   // 75:74
++   //    00: RED.POPC
++   //    01: RED.AND
++   //    02: RED.OR
++
++   switch (insn->subOp) {
++   case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break;
++   case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break;
++   case NV50_IR_SUBOP_BAR_RED_OR  : subop = 0x02; redop = 0x02; break;
++   case NV50_IR_SUBOP_BAR_ARRIVE  : subop = 0x01; break;
++   default:
++      subop = 0x00;
++      assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
++      break;
++   }
++
++   if (insn->src(0).getFile() == FILE_GPR) {
++      emitInsn ((1 << 9) | 0x11d);
++      emitGPR  (32, insn->src(0)); //XXX: nvdisasm shows src0==src1
++   } else {
++      ImmediateValue *imm = insn->getSrc(0)->asImm();
++      assert(imm);
++      if (insn->src(1).getFile() == FILE_GPR) {
++         emitInsn ((4 << 9) | 0x11d);
++         emitGPR  (32, insn->src(1));
++      } else {
++         emitInsn ((5 << 9) | 0x11d);
++      }
++      emitField(54, 4, imm->reg.data.u32);
++   }
++
++   emitField(77, 2, subop);
++   emitField(74, 2, redop);
++
++   if (insn->srcExists(2) && (insn->predSrc != 2)) {
++      emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
++      emitPRED (87, insn->src(2));
++   } else {
++      emitField(87, 3, 7);
++   }
++}
++
++void
++CodeEmitterGV100::emitCCTL()
++{
++   if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL)
++      emitInsn(0x98f);
++   else
++      emitInsn(0x990);
++   emitField(87, 4, insn->subOp);
++   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
++   emitADDR (24, 32, 32, 0, insn->src(0));
++}
++
++void
++CodeEmitterGV100::emitMEMBAR()
++{
++   emitInsn (0x992);
++   switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) {
++   case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break;
++   case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break;
++   case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break;
++   default:
++      assert(!"invalid scope");
++      break;
++   }
++}
++
++void
++CodeEmitterGV100::emitPIXLD()
++{
++   emitInsn (0x925);
++   switch (insn->subOp) {
++   case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK
++   case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX
++   default:
++      assert(0);
++      break;
++   }
++   emitPRED (71);
++   emitGPR  (16, insn->def(0));
++}
++
++void
++CodeEmitterGV100::emitPLOP3_LUT()
++{
++   uint8_t op[2] = {};
++
++   switch (insn->op) {
++   case OP_AND: op[0] = 0xf0 & 0xcc; break;
++   case OP_OR : op[0] = 0xf0 | 0xcc; break;
++   case OP_XOR: op[0] = 0xf0 ^ 0xcc; break;
++   default:
++      assert(!"invalid PLOP3");
++      break;
++   }
++
++   emitInsn(0x81c);
++   emitNOT (90, insn->src(0));
++   emitPRED(87, insn->src(0));
++   emitPRED(84); // def(1)
++   emitPRED(81, insn->def(0));
++   emitNOT (80, insn->src(1));
++   emitPRED(77, insn->src(1));
++   emitField(72, 5, op[0] >> 3);
++   emitNOT (71); // src(2)
++   emitPRED(68); // src(2)
++   emitField(64, 3, op[0] & 7);
++   emitField(16, 8, op[1]);
++}
++
++void
++CodeEmitterGV100::emitVOTE()
++{
++   const ImmediateValue *imm;
++   uint32_t u32;
++
++   int r = -1, p = -1;
++   for (int i = 0; insn->defExists(i); i++) {
++      if (insn->def(i).getFile() == FILE_GPR)
++         r = i;
++      else if (insn->def(i).getFile() == FILE_PREDICATE)
++         p = i;
++   }
++
++   emitInsn (0x806);
++   emitField(72, 2, insn->subOp);
++   if (r >= 0)
++      emitGPR  (16, insn->def(r));
++   else
++      emitGPR  (16);
++   if (p >= 0)
++      emitPRED (81, insn->def(p));
++   else
++      emitPRED (81);
++
++   switch (insn->src(0).getFile()) {
++   case FILE_PREDICATE:
++      emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
++      emitPRED (87, insn->src(0));
++      break;
++   case FILE_IMMEDIATE:
++      imm = insn->getSrc(0)->asImm();
++      assert(imm);
++      u32 = imm->reg.data.u32;
++      assert(u32 == 0 || u32 == 1);
++      emitField(90, 1, u32 == 0);
++      emitPRED (87);
++      break;
++   default:
++      assert(!"Unhandled src");
++      break;
++   }
++}
++
++bool
++CodeEmitterGV100::emitInstruction(Instruction *i)
++{
++   insn = i;
++
++   switch (insn->op) {
++   case OP_ABS:
++      assert(!isFloatType(insn->dType));
++      emitIABS();
++      break;
++   case OP_ADD:
++      if (isFloatType(insn->dType)) {
++         if (insn->dType == TYPE_F32)
++            emitFADD();
++         else
++            emitDADD();
++      } else {
++         emitIADD3();
++      }
++      break;
++   case OP_AFETCH:
++      emitAL2P();
++      break;
++   case OP_AND:
++   case OP_OR:
++   case OP_XOR:
++      if (insn->def(0).getFile() == FILE_PREDICATE) {
++         emitPLOP3_LUT();
++      } else {
++         assert(!"invalid logop");
++         emitNOP();
++      }
++      break;
++   case OP_ATOM:
++      if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
++         emitATOMS();
++      else
++         if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
++            emitRED();
++         else
++            emitATOM();
++      break;
++   case OP_BAR:
++      emitBAR();
++      break;
++   case OP_BFIND:
++      emitFLO();
++      break;
++   case OP_BMSK:
++      emitBMSK();
++      break;
++   case OP_BREV:
++      emitBREV();
++      break;
++   case OP_BRA:
++   case OP_JOIN: //XXX
++      emitBRA();
++      break;
++   case OP_CCTL:
++      emitCCTL();
++      break;
++   case OP_CEIL:
++   case OP_CVT:
++   case OP_FLOOR:
++   case OP_TRUNC:
++      if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
++                                 insn->src(0).getFile() == FILE_PREDICATE)) {
++         emitMOV();
++      } else if (isFloatType(insn->dType)) {
++         if (isFloatType(insn->sType)) {
++            if (insn->sType == insn->dType)
++               emitFRND();
++            else
++               emitF2F();
++         } else {
++            emitI2F();
++         }
++      } else {
++         if (isFloatType(insn->sType)) {
++            emitF2I();
++         } else {
++            assert(!"I2I");
++            emitNOP();
++         }
++      }
++      break;
++   case OP_COS:
++   case OP_EX2:
++   case OP_LG2:
++   case OP_RCP:
++   case OP_RSQ:
++   case OP_SIN:
++   case OP_SQRT:
++      emitMUFU();
++      break;
++   case OP_DISCARD:
++      emitKILL();
++      break;
++   case OP_EMIT:
++   case OP_FINAL:
++   case OP_RESTART:
++      emitOUT();
++      break;
++   case OP_EXIT:
++      emitEXIT();
++      break;
++   case OP_EXPORT:
++      emitAST();
++      break;
++   case OP_FMA:
++   case OP_MAD:
++      if (isFloatType(insn->dType)) {
++         if (insn->dType == TYPE_F32)
++            emitFFMA();
++         else
++            emitDFMA();
++      } else {
++         if (typeSizeof(insn->dType) != 8)
++            emitIMAD();
++         else
++            emitIMAD_WIDE();
++      }
++      break;
++   case OP_JOINAT: //XXX
++      emitNOP();
++      break;
++   case OP_LINTERP:
++      emitIPA();
++      break;
++   case OP_LOAD:
++      switch (insn->src(0).getFile()) {
++      case FILE_MEMORY_CONST : emitLDC(); break;
++      case FILE_MEMORY_LOCAL : emitLDL(); break;
++      case FILE_MEMORY_SHARED: emitLDS(); break;
++      case FILE_MEMORY_GLOBAL: emitLD(); break;
++      default:
++         assert(!"invalid load");
++         emitNOP();
++         break;
++      }
++      break;
++   case OP_LOP3_LUT:
++      emitLOP3_LUT();
++      break;
++   case OP_MAX:
++   case OP_MIN:
++      if (isFloatType(insn->dType)) {
++         if (insn->dType == TYPE_F32) {
++            emitFMNMX();
++         } else {
++            assert(!"invalid FMNMX");
++            emitNOP();
++         }
++      } else {
++         assert(!"invalid MNMX");
++         emitNOP();
++      }
++      break;
++   case OP_MEMBAR:
++      emitMEMBAR();
++      break;
++   case OP_MOV:
++      emitMOV();
++      break;
++   case OP_MUL:
++      if (isFloatType(insn->dType)) {
++         if (insn->dType == TYPE_F32)
++            emitFMUL();
++         else
++            emitDMUL();
++      } else {
++         assert(!"invalid IMUL");
++         emitNOP();
++      }
++      break;
++   case OP_PERMT:
++      emitPRMT();
++      break;
++   case OP_PFETCH:
++      emitISBERD();
++      break;
++   case OP_PIXLD:
++      emitPIXLD();
++      break;
++   case OP_POPCNT:
++      emitPOPC();
++      break;
++   case OP_QUADOP:
++      emitFSWZADD();
++      break;
++   case OP_RDSV:
++      if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
++         emitCS2R();
++      else
++         emitS2R();
++      break;
++   case OP_SELP:
++      emitSEL();
++      break;
++   case OP_SET:
++   case OP_SET_AND:
++   case OP_SET_OR:
++   case OP_SET_XOR:
++      if (insn->def(0).getFile() != FILE_PREDICATE) {
++         if (isFloatType(insn->dType)) {
++            if (insn->dType == TYPE_F32) {
++               emitFSET_BF();
++            } else {
++               assert(!"invalid FSET");
++               emitNOP();
++            }
++         } else {
++            assert(!"invalid SET");
++            emitNOP();
++         }
++      } else {
++         if (isFloatType(insn->sType))
++            if (insn->sType == TYPE_F64)
++               emitDSETP();
++            else
++               emitFSETP();
++         else
++            emitISETP();
++      }
++      break;
++   case OP_SGXT:
++      emitSGXT();
++      break;
++   case OP_SHF:
++      emitSHF();
++      break;
++   case OP_SHFL:
++      emitSHFL();
++      break;
++   case OP_SHLADD:
++      emitLEA();
++      break;
++   case OP_STORE:
++      switch (insn->src(0).getFile()) {
++      case FILE_MEMORY_LOCAL : emitSTL(); break;
++      case FILE_MEMORY_SHARED: emitSTS(); break;
++      case FILE_MEMORY_GLOBAL: emitST(); break;
++      default:
++         assert(!"invalid store");
++         emitNOP();
++         break;
++      }
++      break;
++   case OP_SULDB:
++   case OP_SULDP:
++      emitSULD();
++      break;
++   case OP_SUREDB:
++   case OP_SUREDP:
++      emitSUATOM();
++      break;
++   case OP_SUSTB:
++   case OP_SUSTP:
++      emitSUST();
++      break;
++   case OP_TEX:
++   case OP_TXB:
++   case OP_TXL:
++      emitTEX();
++      break;
++   case OP_TXD:
++      emitTXD();
++      break;
++   case OP_TXF:
++      emitTLD();
++      break;
++   case OP_TXG:
++      emitTLD4();
++      break;
++   case OP_TXLQ:
++      emitTMML();
++      break;
++   case OP_TXQ:
++      emitTXQ();
++      break;
++   case OP_VFETCH:
++      emitALD();
++      break;
++   case OP_VOTE:
++      emitVOTE();
++      break;
++   case OP_WARPSYNC:
++      emitWARPSYNC();
++      break;
++   default:
++      assert(!"invalid opcode");
++      emitNOP();
++      break;
++   }
++
++   code[3] &= 0x000001ff;
++   code[3] |= insn->sched << 9;
++   code += 4;
++   codeSize += 16;
++   return true;
++}
++
++void
++CodeEmitterGV100::prepareEmission(BasicBlock *bb)
++{
++   Function *func = bb->getFunction();
++   Instruction *i;
++   int j;
++
++   for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
++
++   for (; j >= 0; --j) {
++      BasicBlock *in = func->bbArray[j];
++      Instruction *exit = in->getExit();
++
++      if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
++         in->binSize -= 16;
++         func->binSize -= 16;
++
++         for (++j; j < func->bbCount; ++j)
++            func->bbArray[j]->binPos -= 16;
++
++         in->remove(exit);
++      }
++      bb->binPos = in->binPos + in->binSize;
++      if (in->binSize) // no more no-op branches to bb
++         break;
++   }
++   func->bbArray[func->bbCount++] = bb;
++
++   if (!bb->getExit())
++      return;
++
++   for (i = bb->getEntry(); i; i = i->next) {
++      i->encSize = getMinEncodingSize(i);
++      bb->binSize += i->encSize;
++   }
++
++   assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16));
++
++   func->binSize += bb->binSize;
++}
++
++void
++CodeEmitterGV100::prepareEmission(Function *func)
++{
++   SchedDataCalculatorGM107 sched(targ);
++   CodeEmitter::prepareEmission(func);
++   sched.run(func, true, true);
++}
++
++void
++CodeEmitterGV100::prepareEmission(Program *prog)
++{
++   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
++        !fi.end(); fi.next()) {
++      Function *func = reinterpret_cast<Function *>(fi.get());
++      func->binPos = prog->binSize;
++      prepareEmission(func);
++      prog->binSize += func->binSize;
++   }
++
++   this->prog = prog;
++}
++
++CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target)
++   : CodeEmitter(target), targ(target)
++{
++   code = NULL;
++   codeSize = codeSizeLimit = 0;
++   relocInfo = NULL;
++}
++};
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h
+new file mode 100644
+index 00000000000..e97bf6580a1
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h
+@@ -0,0 +1,403 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#ifndef __NV50_IR_EMIT_GV100_H__
++#define __NV50_IR_EMIT_GV100_H__
++#include "codegen/nv50_ir_target_gv100.h"
++
++namespace nv50_ir {
++
++class CodeEmitterGV100 : public CodeEmitter {
++public:
++   CodeEmitterGV100(TargetGV100 *target);
++
++   virtual bool emitInstruction(Instruction *);
++   virtual uint32_t getMinEncodingSize(const Instruction *) const { return 16; }
++
++private:
++   const Program *prog;
++   const TargetGV100 *targ;
++   const Instruction *insn;
++
++   virtual void prepareEmission(Program *);
++   virtual void prepareEmission(Function *);
++   virtual void prepareEmission(BasicBlock *);
++
++   inline void emitInsn(uint32_t op) {
++      code[0] = op;
++      code[1] = 0;
++      code[2] = 0;
++      code[3] = 0;
++      if (insn->predSrc >= 0) {
++         emitField(12, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
++         emitField(15, 1, insn->cc == CC_NOT_P);
++      } else {
++         emitField(12, 3, 7);
++      }
++   };
++
++   inline void emitField(int b, int s, uint64_t v) {
++      if (b >= 0) {
++         uint64_t m = ~0ULL >> (64 - s);
++         uint64_t d = v & m;
++         assert(!(v & ~m) || (v & ~m) == ~m);
++         if (b < 64 && b + s > 64) {
++            *(uint64_t *)&code[0] |= d << b;
++            *(uint64_t *)&code[2] |= d >> (64 - b);
++         } else {
++            *(uint64_t *)&code[(b/64*2)] |= d << (b & 0x3f);
++         }
++      }
++   };
++
++   inline void emitABS(int pos, int src, bool supported)
++   {
++      if (insn->src(src).mod.abs()) {
++         assert(supported);
++         emitField(pos, 1, 1);
++      }
++   }
++
++   inline void emitABS(int pos, int src)
++   {
++      emitABS(pos, src, true);
++   }
++
++   inline void emitNEG(int pos, int src, bool supported) {
++      if (insn->src(src).mod.neg()) {
++         assert(supported);
++         emitField(pos, 1, 1);
++      }
++   }
++
++   inline void emitNEG(int pos, int src) {
++      emitNEG(pos, src, true);
++   }
++
++   inline void emitNOT(int pos) {
++      emitField(pos, 1, 0);
++   };
++
++   inline void emitNOT(int pos, const ValueRef &ref) {
++      emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
++   }
++
++   inline void emitSAT(int pos) {
++      emitField(pos, 1, insn->saturate);
++   }
++
++   inline void emitRND(int rmp, RoundMode rnd, int rip) {
++      int rm = 0, ri = 0;
++      switch (rnd) {
++      case ROUND_NI: ri = 1;
++      case ROUND_N : rm = 0; break;
++      case ROUND_MI: ri = 1;
++      case ROUND_M : rm = 1; break;
++      case ROUND_PI: ri = 1;
++      case ROUND_P : rm = 2; break;
++      case ROUND_ZI: ri = 1;
++      case ROUND_Z : rm = 3; break;
++      default:
++         assert(!"invalid round mode");
++         break;
++      }
++      emitField(rip, 1, ri);
++      emitField(rmp, 2, rm);
++   }
++
++   inline void emitRND(int pos) {
++      emitRND(pos, insn->rnd, -1);
++   }
++
++   inline void emitFMZ(int pos, int len) {
++      emitField(pos, len, insn->dnz << 1 | insn->ftz);
++   }
++
++   inline void emitPDIV(int pos) {
++      emitField(pos, 3, insn->postFactor + 4);
++   }
++
++   inline void emitO(int pos) {
++      emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
++   }
++
++   inline void emitP(int pos) {
++      emitField(pos, 1, insn->perPatch);
++   }
++
++   inline void emitCond3(int pos, CondCode code) {
++      int data = 0;
++
++      switch (code) {
++      case CC_FL : data = 0x00; break;
++      case CC_LTU:
++      case CC_LT : data = 0x01; break;
++      case CC_EQU:
++      case CC_EQ : data = 0x02; break;
++      case CC_LEU:
++      case CC_LE : data = 0x03; break;
++      case CC_GTU:
++      case CC_GT : data = 0x04; break;
++      case CC_NEU:
++      case CC_NE : data = 0x05; break;
++      case CC_GEU:
++      case CC_GE : data = 0x06; break;
++      case CC_TR : data = 0x07; break;
++      default:
++         assert(!"invalid cond3");
++         break;
++      }
++
++      emitField(pos, 3, data);
++   }
++
++   inline void emitCond4(int pos, CondCode code) {
++      int data = 0;
++
++      switch (code) {
++      case CC_FL: data = 0x00; break;
++      case CC_LT: data = 0x01; break;
++      case CC_EQ: data = 0x02; break;
++      case CC_LE: data = 0x03; break;
++      case CC_GT: data = 0x04; break;
++      case CC_NE: data = 0x05; break;
++      case CC_GE: data = 0x06; break;
++   //   case CC_NUM: data = 0x07; break;
++   //   case CC_NAN: data = 0x08; break;
++      case CC_LTU: data = 0x09; break;
++      case CC_EQU: data = 0x0a; break;
++      case CC_LEU: data = 0x0b; break;
++      case CC_GTU: data = 0x0c; break;
++      case CC_NEU: data = 0x0d; break;
++      case CC_GEU: data = 0x0e; break;
++      case CC_TR:  data = 0x0f; break;
++      default:
++         assert(!"invalid cond4");
++         break;
++      }
++
++      emitField(pos, 4, data);
++   }
++
++   inline void emitSYS(int pos, const Value *val) {
++      int id = val ? val->reg.data.id : -1;
++
++      switch (id) {
++      case SV_LANEID         : id = 0x00; break;
++      case SV_VERTEX_COUNT   : id = 0x10; break;
++      case SV_INVOCATION_ID  : id = 0x11; break;
++      case SV_THREAD_KILL    : id = 0x13; break;
++      case SV_INVOCATION_INFO: id = 0x1d; break;
++      case SV_COMBINED_TID   : id = 0x20; break;
++      case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
++      case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
++      case SV_LANEMASK_EQ    : id = 0x38; break;
++      case SV_LANEMASK_LT    : id = 0x39; break;
++      case SV_LANEMASK_LE    : id = 0x3a; break;
++      case SV_LANEMASK_GT    : id = 0x3b; break;
++      case SV_LANEMASK_GE    : id = 0x3c; break;
++      case SV_CLOCK          : id = 0x50 + val->reg.data.sv.index; break;
++      default:
++         assert(!"invalid system value");
++         id = 0;
++         break;
++      }
++
++      emitField(pos, 8, id);
++   }
++
++   inline void emitSYS(int pos, const ValueRef &ref) {
++      emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
++   }
++
++   inline void emitGPR(int pos, const Value *val, int off) {
++      emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
++                val->reg.data.id + off: 255);
++   }
++
++   inline void emitGPR(int pos, const Value *v) {
++      emitGPR(pos, v, 0);
++   }
++
++   inline void emitGPR(int pos) {
++      emitGPR(pos, (const Value *)NULL);
++   }
++
++   inline void emitGPR(int pos, const ValueRef &ref) {
++      emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
++   }
++
++   inline void emitGPR(int pos, const ValueRef *ref) {
++      emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
++   }
++
++   inline void emitGPR(int pos, const ValueDef &def) {
++      emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
++   }
++
++   inline void emitGPR(int pos, const ValueDef &def, int off) {
++      emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL, off);
++   }
++
++   inline void emitPRED(int pos, const Value *val) {
++      emitField(pos, 3, val ? val->reg.data.id : 7);
++   };
++
++   inline void emitPRED(int pos) {
++      emitPRED(pos, (const Value *)NULL);
++   }
++
++   inline void emitPRED(int pos, const ValueRef &ref) {
++      emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
++   }
++
++   inline void emitPRED(int pos, const ValueDef &def) {
++      emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
++   }
++
++   inline void emitCBUF(int buf, int gpr, int off, int len, int align,
++                        const ValueRef &ref) {
++      const Value *v = ref.get();
++      const Symbol *s = v->asSym();
++
++      assert(!(s->reg.data.offset & ((1 << align) - 1)));
++
++      emitField(buf,  5, v->reg.fileIndex);
++      if (gpr >= 0)
++         emitGPR(gpr, ref.getIndirect(0));
++      emitField(off, 16, s->reg.data.offset);
++   }
++
++   inline void emitIMMD(int pos, int len, const ValueRef &ref) {
++      const ImmediateValue *imm = ref.get()->asImm();
++      uint32_t val = imm->reg.data.u32;
++
++      if (insn->sType == TYPE_F64) {
++         assert(!(imm->reg.data.u64 & 0x00000000ffffffffULL));
++         val = imm->reg.data.u64 >> 32;
++      }
++
++      emitField(pos, len, val);
++   }
++
++   inline void emitADDR(int gpr, int off, int len, int shr,
++                        const ValueRef &ref) {
++      const Value *v = ref.get();
++      assert(!(v->reg.data.offset & ((1 << shr) - 1)));
++      if (gpr >= 0)
++         emitGPR(gpr, ref.getIndirect(0));
++      emitField(off, len, v->reg.data.offset >> shr);
++   }
++
++   inline void emitFormA(uint16_t op, uint8_t forms, int src0, int src1, int src2);
++   inline void emitFormA_RRR(uint16_t op, int src1, int src2);
++   inline void emitFormA_RRI(uint16_t op, int src1, int src2);
++   inline void emitFormA_RRC(uint16_t op, int src1, int src2);
++   inline void emitFormA_I32(int src);
++
++   void emitBRA();
++   void emitEXIT();
++   void emitKILL();
++   void emitNOP();
++   void emitWARPSYNC();
++
++   void emitCS2R();
++   void emitF2F();
++   void emitF2I();
++   void emitFRND();
++   void emitI2F();
++   void emitMOV();
++   void emitPRMT();
++   void emitS2R();
++   void emitSEL();
++   void emitSHFL();
++
++   void emitFADD();
++   void emitFFMA();
++   void emitFMNMX();
++   void emitFMUL();
++   void emitFSET_BF();
++   void emitFSETP();
++   void emitFSWZADD();
++   void emitMUFU();
++
++   void emitDADD();
++   void emitDFMA();
++   void emitDMUL();
++   void emitDSETP();
++
++   void emitBMSK();
++   void emitBREV();
++   void emitFLO();
++   void emitIABS();
++   void emitIADD3();
++   void emitIMAD();
++   void emitIMAD_WIDE();
++   void emitISETP();
++   void emitLEA();
++   void emitLOP3_LUT();
++   void emitPOPC();
++   void emitSGXT();
++   void emitSHF();
++
++   void emitALD();
++   void emitAST();
++   void emitATOM();
++   void emitATOMS();
++   void emitIPA();
++   void emitISBERD();
++   void emitLDSTc(int);
++   void emitLDSTs(int, DataType);
++   void emitLD();
++   void emitLDC();
++   void emitLDL();
++   void emitLDS();
++   void emitOUT();
++   void emitRED();
++   void emitST();
++   void emitSTL();
++   void emitSTS();
++
++   void emitTEXs(int);
++   void emitTEX();
++   void emitTLD();
++   void emitTLD4();
++   void emitTMML();
++   void emitTXD();
++   void emitTXQ();
++
++   void emitSUHandle(const int);
++   void emitSUTarget();
++   void emitSUATOM();
++   void emitSULD();
++   void emitSUST();
++
++   void emitAL2P();
++   void emitBAR();
++   void emitCCTL();
++   void emitMEMBAR();
++   void emitPIXLD();
++   void emitPLOP3_LUT();
++   void emitVOTE();
++};
++
++};
++#endif
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+index bd78b76f384..69f9cfad0d6 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+@@ -571,6 +571,10 @@ Converter::getSubOp(nir_op op)
+    case nir_op_imul_high:
+    case nir_op_umul_high:
+       return NV50_IR_SUBOP_MUL_HIGH;
++   case nir_op_ishl:
++   case nir_op_ishr:
++   case nir_op_ushr:
++      return NV50_IR_SUBOP_SHIFT_WRAP;
+    default:
+       return 0;
+    }
+@@ -1067,7 +1071,11 @@ bool Converter::assignSlots() {
+          case TGSI_SEMANTIC_COLOR:
+             if (!var->data.fb_fetch_output)
+                info->prop.fp.numColourResults++;
+-            info->prop.fp.separateFragData = true;
++
++            if (var->data.location == FRAG_RESULT_COLOR &&
++                nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
++               info->prop.fp.separateFragData = true;
++
+             // sometimes we get FRAG_RESULT_DATAX with data.index 0
+             // sometimes we get FRAG_RESULT_DATA0 with data.index X
+             index = index == 0 ? var->data.index : index;
+@@ -1617,6 +1625,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+ {
+    nir_intrinsic_op op = insn->intrinsic;
+    const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
++   unsigned dest_components = nir_intrinsic_dest_components(insn);
+ 
+    switch (op) {
+    case nir_intrinsic_load_uniform: {
+@@ -1624,7 +1633,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       const DataType dType = getDType(insn);
+       Value *indirect;
+       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
+-      for (uint8_t i = 0; i < insn->num_components; ++i) {
++      for (uint8_t i = 0; i < dest_components; ++i) {
+          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
+       }
+       break;
+@@ -1635,7 +1644,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       DataType dType = getSType(insn->src[0], false, false);
+       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i) {
++      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
+          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+             continue;
+ 
+@@ -1688,7 +1697,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
+          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
+ 
+-         for (uint8_t i = 0u; i < insn->num_components; ++i) {
++         for (uint8_t i = 0u; i < dest_components; ++i) {
+             defs.push_back(newDefs[i]);
+             mask |= 1 << i;
+          }
+@@ -1723,7 +1732,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+          }
+       }
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i) {
++      for (uint8_t i = 0u; i < dest_components; ++i) {
+          uint32_t address = getSlotAddress(insn, idx, i);
+          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
+          if (prog->getType() == Program::TYPE_FRAGMENT) {
+@@ -1858,7 +1867,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       SVSemantic sv = convert(op);
+       LValues &newDefs = convert(&insn->dest);
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i) {
++      for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
+          Value *def;
+          if (typeSizeof(dType) == 8)
+             def = getSSA();
+@@ -1910,12 +1919,12 @@ Converter::visit(nir_intrinsic_instr *insn)
+ 
+       if (op == nir_intrinsic_read_first_invocation) {
+          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
+-         mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
++         mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
+          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+       } else
+          tmp = getSrc(&insn->src[1], 0);
+ 
+-      for (uint8_t i = 0; i < insn->num_components; ++i) {
++      for (uint8_t i = 0; i < dest_components; ++i) {
+          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
+             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
+       }
+@@ -1931,7 +1940,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+ 
+       Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
+                               mkImm(baseVertex), indirectVertex);
+-      for (uint8_t i = 0u; i < insn->num_components; ++i) {
++      for (uint8_t i = 0u; i < dest_components; ++i) {
+          uint32_t address = getSlotAddress(insn, idx, i);
+          loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
+                   indirectOffset, vtxBase, info->in[idx].patch);
+@@ -1954,7 +1963,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+ 
+       vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i) {
++      for (uint8_t i = 0u; i < dest_components; ++i) {
+          uint32_t address = getSlotAddress(insn, idx, i);
+          loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
+                   indirectOffset, vtxBase, info->in[idx].patch);
+@@ -1978,7 +1987,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
+       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i) {
++      for (uint8_t i = 0u; i < dest_components; ++i) {
+          loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
+                   indirectOffset, indirectIndex);
+       }
+@@ -2001,7 +2010,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
+       uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i) {
++      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
+          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+             continue;
+          Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
+@@ -2020,7 +2029,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
+       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i)
++      for (uint8_t i = 0u; i < dest_components; ++i)
+          loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
+                   indirectOffset, indirectBuffer);
+ 
+@@ -2314,7 +2323,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       Value *indirectOffset;
+       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i) {
++      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
+          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+             continue;
+          Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
+@@ -2328,7 +2337,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       Value *indirectOffset;
+       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
+ 
+-      for (uint8_t i = 0u; i < insn->num_components; ++i)
++      for (uint8_t i = 0u; i < dest_components; ++i)
+          loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
+ 
+       break;
+@@ -2367,7 +2376,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+       Value *indirectOffset;
+       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
+ 
+-      for (auto i = 0u; i < insn->num_components; ++i)
++      for (auto i = 0u; i < dest_components; ++i)
+          loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
+ 
+       info->io.globalAccess |= 0x1;
+@@ -2376,7 +2385,7 @@ Converter::visit(nir_intrinsic_instr *insn)
+    case nir_intrinsic_store_global: {
+       DataType sType = getSType(insn->src[0], false, false);
+ 
+-      for (auto i = 0u; i < insn->num_components; ++i) {
++      for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
+          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
+             continue;
+          if (typeSizeof(sType) == 8) {
+@@ -2774,7 +2783,7 @@ Converter::visit(nir_alu_instr *insn)
+    case nir_op_bfm: {
+       DEFAULT_CHECKS;
+       LValues &newDefs = convert(&insn->dest);
+-      mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
++      mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
+       break;
+    }
+    case nir_op_bitfield_insert: {
+@@ -2794,17 +2803,69 @@ Converter::visit(nir_alu_instr *insn)
+    case nir_op_bitfield_reverse: {
+       DEFAULT_CHECKS;
+       LValues &newDefs = convert(&insn->dest);
+-      mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
++      mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
+       break;
+    }
+    case nir_op_find_lsb: {
+       DEFAULT_CHECKS;
+       LValues &newDefs = convert(&insn->dest);
+       Value *tmp = getSSA();
+-      mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
++      mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
+       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+       break;
+    }
++   case nir_op_extract_u8: {
++      DEFAULT_CHECKS;
++      LValues &newDefs = convert(&insn->dest);
++      Value *prmt = getSSA();
++      mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
++      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
++      break;
++   }
++   case nir_op_extract_i8: {
++      DEFAULT_CHECKS;
++      LValues &newDefs = convert(&insn->dest);
++      Value *prmt = getSSA();
++      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
++      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
++      break;
++   }
++   case nir_op_extract_u16: {
++      DEFAULT_CHECKS;
++      LValues &newDefs = convert(&insn->dest);
++      Value *prmt = getSSA();
++      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
++      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
++      break;
++   }
++   case nir_op_extract_i16: {
++      DEFAULT_CHECKS;
++      LValues &newDefs = convert(&insn->dest);
++      Value *prmt = getSSA();
++      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
++      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
++      break;
++   }
++   case nir_op_urol: {
++      DEFAULT_CHECKS;
++      LValues &newDefs = convert(&insn->dest);
++      mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
++            getSrc(&insn->src[1]), getSrc(&insn->src[0]))
++         ->subOp = NV50_IR_SUBOP_SHF_L |
++                   NV50_IR_SUBOP_SHF_W |
++                   NV50_IR_SUBOP_SHF_HI;
++      break;
++   }
++   case nir_op_uror: {
++      DEFAULT_CHECKS;
++      LValues &newDefs = convert(&insn->dest);
++      mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
++            getSrc(&insn->src[1]), getSrc(&insn->src[0]))
++         ->subOp = NV50_IR_SUBOP_SHF_R |
++                   NV50_IR_SUBOP_SHF_W |
++                   NV50_IR_SUBOP_SHF_LO;
++      break;
++   }
+    // boolean conversions
+    case nir_op_b2f32: {
+       DEFAULT_CHECKS;
+@@ -3224,6 +3285,11 @@ Converter::run()
+    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
+    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
+ 
++   /*TODO: improve this lowering/optimisation loop so that we can use
++    *      nir_opt_idiv_const effectively before this.
++    */
++   NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise);
++
+    do {
+       progress = false;
+       NIR_PASS(progress, nir, nir_copy_prop);
+@@ -3285,3 +3351,125 @@ Program::makeFromNIR(struct nv50_ir_prog_info *info)
+ }
+ 
+ } // namespace nv50_ir
++
++static nir_shader_compiler_options
++nvir_nir_shader_compiler_options(int chipset)
++{
++   return {
++      .lower_fdiv = (chipset >= NVISA_GV100_CHIPSET),
++      .lower_ffma = false,
++      .fuse_ffma = false, /* nir doesn't track mad vs fma */
++      .lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET),
++      .lower_flrp32 = true,
++      .lower_flrp64 = true,
++      .lower_fpow = false, // TODO: nir's lowering is broken, or we could use it
++      .lower_fsat = false,
++      .lower_fsqrt = false, // TODO: only before gm200
++      .lower_sincos = false,
++      .lower_fmod = true,
++      .lower_bitfield_extract = false,
++      .lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET),
++      .lower_bitfield_insert = false,
++      .lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET),
++      .lower_bitfield_insert_to_bitfield_select = false,
++      .lower_bitfield_reverse = false,
++      .lower_bit_count = false,
++      .lower_ifind_msb = false,
++      .lower_find_lsb = false,
++      .lower_uadd_carry = true, // TODO
++      .lower_usub_borrow = true, // TODO
++      .lower_mul_high = false,
++      .lower_negate = false,
++      .lower_sub = true,
++      .lower_scmp = true, // TODO: not implemented yet
++      .lower_vector_cmp = false,
++      .lower_idiv = true,
++      .lower_bitops = false,
++      .lower_isign = (chipset >= NVISA_GV100_CHIPSET),
++      .lower_fsign = (chipset >= NVISA_GV100_CHIPSET),
++      .lower_fdph = false,
++      .lower_fdot = false,
++      .fdot_replicates = false, // TODO
++      .lower_ffloor = false, // TODO
++      .lower_ffract = true,
++      .lower_fceil = false, // TODO
++      .lower_ftrunc = false,
++      .lower_ldexp = true,
++      .lower_pack_half_2x16 = true,
++      .lower_pack_unorm_2x16 = true,
++      .lower_pack_snorm_2x16 = true,
++      .lower_pack_unorm_4x8 = true,
++      .lower_pack_snorm_4x8 = true,
++      .lower_unpack_half_2x16 = true,
++      .lower_unpack_unorm_2x16 = true,
++      .lower_unpack_snorm_2x16 = true,
++      .lower_unpack_unorm_4x8 = true,
++      .lower_unpack_snorm_4x8 = true,
++      .lower_pack_split = false,
++      .lower_extract_byte = (chipset < NVISA_GM107_CHIPSET),
++      .lower_extract_word = (chipset < NVISA_GM107_CHIPSET),
++      .lower_all_io_to_temps = false,
++      .lower_all_io_to_elements = false,
++      .vertex_id_zero_based = false,
++      .lower_base_vertex = false,
++      .lower_helper_invocation = false,
++      .optimize_sample_mask_in = false,
++      .lower_cs_local_index_from_id = true,
++      .lower_cs_local_id_from_index = false,
++      .lower_device_index_to_zero = false, // TODO
++      .lower_wpos_pntc = false, // TODO
++      .lower_hadd = true, // TODO
++      .lower_add_sat = true, // TODO
++      .vectorize_io = false,
++      .lower_to_scalar = true,
++      .unify_interfaces = false,
++      .use_interpolated_input_intrinsics = true,
++      .lower_mul_2x32_64 = true, // TODO
++      .lower_rotate = (chipset < NVISA_GV100_CHIPSET),
++      .has_imul24 = false,
++      .intel_vec4 = false,
++      .max_unroll_iterations = 32,
++      .lower_int64_options = (nir_lower_int64_options) (
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
++            nir_lower_divmod64 |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
++            ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
++            nir_lower_ufind_msb64
++      ),
++      .lower_doubles_options = (nir_lower_doubles_options) (
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
++            nir_lower_dmod |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
++            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
++      )
++   };
++}
++
++static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
++nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET);
++static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
++nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET);
++static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
++nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET);
++
++const nir_shader_compiler_options *
++nv50_ir_nir_shader_compiler_options(int chipset)
++{
++   if (chipset >= NVISA_GV100_CHIPSET)
++      return &gv100_nir_shader_compiler_options;
++   if (chipset >= NVISA_GM107_CHIPSET)
++      return &gm107_nir_shader_compiler_options;
++   return &gf100_nir_shader_compiler_options;
++}
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+index 60f3d582a0b..3fd76f64de0 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+@@ -3401,8 +3401,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
+       // ReadInvocationARB(src, findLSB(ballot(true)))
+       val0 = getScratch();
+       mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
+-      mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
+-         ->subOp = NV50_IR_SUBOP_EXTBF_REV;
++      mkOp1(OP_BREV, TYPE_U32, val0, val0);
+       mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+       src1 = val0;
+       /* fallthrough */
+@@ -3820,8 +3819,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
+       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+          src0 = fetchSrc(0, c);
+          val0 = getScratch();
+-         geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
+-         geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
++         mkOp1(OP_BREV, TYPE_U32, val0, src0);
+          geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
+          geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
+       }
+@@ -3836,8 +3834,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
+    case TGSI_OPCODE_BREV:
+       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
+          src0 = fetchSrc(0, c);
+-         geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
+-         geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
++         mkOp1(OP_BREV, TYPE_U32, dst0[c], src0);
+       }
+       break;
+    case TGSI_OPCODE_POPC:
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+index 49a5f3b01f2..9fad1dcfe89 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+@@ -239,9 +239,8 @@ GM107LoweringPass::handlePFETCH(Instruction *i)
+    Value *tmp1 = bld.getScratch();
+    Value *tmp2 = bld.getScratch();
+    bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
+-   bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
+-   bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
+-   bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
++   bld.mkOp3(OP_PERMT, TYPE_U32, tmp1, tmp0, bld.mkImm(0x4442), bld.mkImm(0));
++   bld.mkOp3(OP_PERMT, TYPE_U32, tmp0, tmp0, bld.mkImm(0x4440), bld.mkImm(0));
+    if (i->getSrc(1))
+       bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
+    else
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
+index 71e5ea6417a..dfa1d035dac 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
+@@ -21,6 +21,7 @@ class GM107LegalizeSSA : public NVC0LegalizeSSA
+ private:
+    virtual bool visit(Instruction *);
+ 
++protected:
+    void handlePFETCH(Instruction *);
+    void handleLOAD(Instruction *);
+ };
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
+new file mode 100644
+index 00000000000..4b6df0db588
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
+@@ -0,0 +1,477 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#include "codegen/nv50_ir.h"
++#include "codegen/nv50_ir_build_util.h"
++
++#include "codegen/nv50_ir_target_nvc0.h"
++#include "codegen/nv50_ir_lowering_gv100.h"
++
++#include <limits>
++
++namespace nv50_ir {
++
++bool
++GV100LegalizeSSA::handleCMP(Instruction *i)
++{
++   Value *pred = bld.getSSA(1, FILE_PREDICATE);
++
++   bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred,
++             i->sType, bld.mkImm(0), i->getSrc(2));
++   bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
++   return true;
++}
++
++// NIR deals with most of these for us, but codegen generates more in pointer
++// calculations from other lowering passes.
++bool
++GV100LegalizeSSA::handleIADD64(Instruction *i)
++{
++   Value *carry = bld.getSSA(1, FILE_PREDICATE);
++   Value *def[2] = { bld.getSSA(), bld.getSSA() };
++   Value *src[2][2];
++
++   for (int s = 0; s < 2; s++) {
++      if (i->getSrc(s)->reg.size == 8) {
++         bld.mkSplit(src[s], 4, i->getSrc(s));
++      } else {
++         src[s][0] = i->getSrc(s);
++         src[s][1] = bld.mkImm(0);
++      }
++   }
++
++   bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])->
++      setFlagsDef(1, carry);
++   bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])->
++      setFlagsSrc(2, carry);
++   bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i)
++{
++   Value *def = bld.getSSA(8), *defs[2];
++   Value *src2;
++
++   if (i->srcExists(2) &&
++       (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) {
++      Value *src2s[2] = { bld.getSSA(), bld.getSSA() };
++      bld.mkMov(src2s[0], bld.mkImm(0));
++      bld.mkMov(src2s[1], i->getSrc(2));
++      src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0);
++   } else {
++      src2 = bld.mkImm(0);
++   }
++
++   bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def,
++             i->getSrc(0), i->getSrc(1), src2);
++
++   bld.mkSplit(defs, 4, def);
++   i->def(0).replace(defs[1], false);
++   return true;
++}
++
++// XXX: We should be able to do this in GV100LoweringPass, but codegen messes
++//      up somehow and swaps the condcode without swapping the sources.
++//      - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
++bool
++GV100LegalizeSSA::handleIMNMX(Instruction *i)
++{
++   Value *pred = bld.getSSA(1, FILE_PREDICATE);
++
++   bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred,
++             i->sType, i->getSrc(0), i->getSrc(1));
++   bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleIMUL(Instruction *i)
++{
++   if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
++      return handleIMAD_HIGH(i);
++
++   bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
++             bld.mkImm(0));
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleLOP2(Instruction *i)
++{
++   uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0;
++   uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1;
++   uint8_t subOp;
++
++   if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
++      src0 = ~src0;
++   if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
++      src1 = ~src1;
++
++   switch (i->op) {
++   case OP_AND: subOp = src0 & src1; break;
++   case OP_OR : subOp = src0 | src1; break;
++   case OP_XOR: subOp = src0 ^ src1; break;
++   default:
++      assert(!"invalid LOP2 opcode");
++      break;
++   }
++
++   bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1),
++             bld.mkImm(0))->subOp = subOp;
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleNOT(Instruction *i)
++{
++   bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0),
++             bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1;
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handlePREEX2(Instruction *i)
++{
++   i->def(0).replace(i->src(0), false);
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleQUADON(Instruction *i)
++{
++   handleSHFL(i); // Inserts OP_WARPSYNC
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleQUADPOP(Instruction *i)
++{
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleSET(Instruction *i)
++{
++   Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
++   Value *pred = bld.getSSA(1, FILE_PREDICATE), *met;
++   Instruction *xsetp;
++
++   if (isFloatType(i->dType)) {
++      if (i->sType == TYPE_F32)
++         return false; // HW has FSET.BF
++      met = bld.mkImm(0x3f800000);
++   } else {
++      met = bld.mkImm(0xffffffff);
++   }
++
++   xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType,
++                     i->getSrc(0), i->getSrc(1));
++   xsetp->src(0).mod = i->src(0).mod;
++   xsetp->src(1).mod = i->src(1).mod;
++   xsetp->setSrc(2, src2);
++
++   i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred);
++   i->src(2).mod = Modifier(NV50_IR_MOD_NOT);
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleSHFL(Instruction *i)
++{
++   Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE);
++   sync->fixed = 1;
++   sync->setSrc(0, bld.mkImm(0xffffffff));
++   i->bb->insertBefore(i, sync);
++   return false;
++}
++
++bool
++GV100LegalizeSSA::handleSHL(Instruction *i)
++{
++   if (i->src(0).getFile() != FILE_GPR) {
++      bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1),
++                i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_L |
++                                       NV50_IR_SUBOP_SHF_HI;
++   } else {
++      bld.mkOp3(OP_SHF, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
++                bld.mkImm(0))->subOp = NV50_IR_SUBOP_SHF_L;
++   }
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleSHR(Instruction *i)
++{
++   bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1),
++             i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_R | NV50_IR_SUBOP_SHF_HI;
++   return true;
++}
++
++bool
++GV100LegalizeSSA::handleSUB(Instruction *i)
++{
++   Instruction *xadd =
++      bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1));
++   xadd->src(0).mod = i->src(0).mod;
++   xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
++   return true;
++}
++
++bool
++GV100LegalizeSSA::visit(Instruction *i)
++{
++   bool lowered = false;
++
++   bld.setPosition(i, false);
++
++   switch (i->op) {
++   case OP_AND:
++   case OP_OR:
++   case OP_XOR:
++      if (i->def(0).getFile() != FILE_PREDICATE)
++         lowered = handleLOP2(i);
++      break;
++   case OP_NOT:
++      lowered = handleNOT(i);
++      break;
++   case OP_SHL:
++      lowered = handleSHL(i);
++      break;
++   case OP_SHR:
++      lowered = handleSHR(i);
++      break;
++   case OP_SET:
++   case OP_SET_AND:
++   case OP_SET_OR:
++   case OP_SET_XOR:
++      if (i->def(0).getFile() != FILE_PREDICATE)
++         lowered = handleSET(i);
++      break;
++   case OP_SLCT:
++      lowered = handleCMP(i);
++      break;
++   case OP_PREEX2:
++      lowered = handlePREEX2(i);
++      break;
++   case OP_MUL:
++      if (!isFloatType(i->dType))
++         lowered = handleIMUL(i);
++      break;
++   case OP_MAD:
++      if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH)
++         lowered = handleIMAD_HIGH(i);
++      break;
++   case OP_SHFL:
++      lowered = handleSHFL(i);
++      break;
++   case OP_QUADON:
++      lowered = handleQUADON(i);
++      break;
++   case OP_QUADPOP:
++      lowered = handleQUADPOP(i);
++      break;
++   case OP_SUB:
++      lowered = handleSUB(i);
++      break;
++   case OP_MAX:
++   case OP_MIN:
++      if (!isFloatType(i->dType))
++         lowered = handleIMNMX(i);
++      break;
++   case OP_ADD:
++      if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8)
++         lowered = handleIADD64(i);
++      break;
++   case OP_PFETCH:
++      handlePFETCH(i);
++      break;
++   case OP_LOAD:
++      handleLOAD(i);
++      break;
++   default:
++      break;
++   }
++
++   if (lowered)
++      delete_Instruction(prog, i);
++
++   return true;
++}
++
++bool
++GV100LoweringPass::handleDMNMX(Instruction *i)
++{
++   Value *pred = bld.getSSA(1, FILE_PREDICATE);
++   Value *src0[2], *src1[2], *dest[2];
++
++   bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred,
++             i->sType, i->getSrc(0), i->getSrc(1));
++   bld.mkSplit(src0, 4, i->getSrc(0));
++   bld.mkSplit(src1, 4, i->getSrc(1));
++   bld.mkSplit(dest, 4, i->getDef(0));
++   bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred);
++   bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred);
++   bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]);
++   return true;
++}
++
++bool
++GV100LoweringPass::handleEXTBF(Instruction *i)
++{
++   Value *bit = bld.getScratch();
++   Value *cnt = bld.getScratch();
++   Value *mask = bld.getScratch();
++   Value *zero = bld.mkImm(0);
++
++   bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
++   bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
++   bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt);
++   bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask);
++   bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit);
++   if (isSignedType(i->dType))
++      bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt);
++
++   return true;
++}
++
++bool
++GV100LoweringPass::handleFLOW(Instruction *i)
++{
++   i->op = OP_BRA;
++   return false;
++}
++
++bool
++GV100LoweringPass::handleI2I(Instruction *i)
++{
++   bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))->
++      subOp = i->subOp;
++   bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0));
++   return true;
++}
++
++bool
++GV100LoweringPass::handleINSBF(Instruction *i)
++{
++   Value *bit = bld.getScratch();
++   Value *cnt = bld.getScratch();
++   Value *mask = bld.getScratch();
++   Value *src0 = bld.getScratch();
++   Value *zero = bld.mkImm(0);
++
++   bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
++   bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
++   bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt);
++
++   bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask);
++   bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit);
++
++   bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit);
++   bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)->
++      subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c));
++
++   return true;
++}
++
++bool
++GV100LoweringPass::handlePINTERP(Instruction *i)
++{
++   Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
++   Instruction *ipa, *mul;
++
++   ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2);
++   ipa->ipa = i->ipa;
++   mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1));
++
++   if (i->getInterpMode() == NV50_IR_INTERP_SC) {
++      ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE));
++      mul->setPredicate(CC_NOT_P, ipa->getDef(1));
++   }
++
++   return true;
++}
++
++bool
++GV100LoweringPass::handlePREFLOW(Instruction *i)
++{
++   return true;
++}
++
++bool
++GV100LoweringPass::handlePRESIN(Instruction *i)
++{
++   const float f = 1.0 / (2.0 * 3.14159265);
++   bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f));
++   return true;
++}
++
++bool
++GV100LoweringPass::visit(Instruction *i)
++{
++   bool lowered = false;
++
++   bld.setPosition(i, false);
++
++   switch (i->op) {
++   case OP_BREAK:
++   case OP_CONT:
++      lowered = handleFLOW(i);
++      break;
++   case OP_PREBREAK:
++   case OP_PRECONT:
++      lowered = handlePREFLOW(i);
++      break;
++   case OP_CVT:
++      if (i->src(0).getFile() != FILE_PREDICATE &&
++          i->def(0).getFile() != FILE_PREDICATE &&
++          !isFloatType(i->dType) && !isFloatType(i->sType))
++         lowered = handleI2I(i);
++      break;
++   case OP_EXTBF:
++      lowered = handleEXTBF(i);
++      break;
++   case OP_INSBF:
++      lowered = handleINSBF(i);
++      break;
++   case OP_MAX:
++   case OP_MIN:
++      if (i->dType == TYPE_F64)
++         lowered = handleDMNMX(i);
++      break;
++   case OP_PINTERP:
++      lowered = handlePINTERP(i);
++      break;
++   case OP_PRESIN:
++      lowered = handlePRESIN(i);
++      break;
++   default:
++      break;
++   }
++
++   if (lowered)
++      delete_Instruction(prog, i);
++
++   return true;
++}
++
++} // namespace nv50_ir
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h
+new file mode 100644
+index 00000000000..92fdb938244
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h
+@@ -0,0 +1,79 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#ifndef __NV50_IR_LOWERING_GV100_H__
++#define __NV50_IR_LOWERING_GV100_H__
++#include "codegen/nv50_ir_lowering_gm107.h"
++
++namespace nv50_ir {
++
++class GV100LoweringPass : public Pass
++{
++public:
++   GV100LoweringPass(Program *p) {
++      bld.setProgram(p);
++   }
++
++private:
++   BuildUtil bld;
++
++   virtual bool visit(Instruction *);
++
++   bool handleDMNMX(Instruction *);
++   bool handleEXTBF(Instruction *);
++   bool handleFLOW(Instruction *);
++   bool handleI2I(Instruction *);
++   bool handleINSBF(Instruction *);
++   bool handlePINTERP(Instruction *);
++   bool handlePREFLOW(Instruction *);
++   bool handlePRESIN(Instruction *);
++};
++
++class GV100LegalizeSSA : public GM107LegalizeSSA
++{
++public:
++   GV100LegalizeSSA(Program *p) {
++      bld.setProgram(p);
++   }
++
++private:
++   virtual bool visit(Function *) { return true; }
++   virtual bool visit(BasicBlock *) { return true; }
++   virtual bool visit(Instruction *);
++
++   bool handleCMP(Instruction *);
++   bool handleIADD64(Instruction *);
++   bool handleIMAD_HIGH(Instruction *);
++   bool handleIMNMX(Instruction *);
++   bool handleIMUL(Instruction *);
++   bool handleLOP2(Instruction *);
++   bool handleNOT(Instruction *);
++   bool handlePREEX2(Instruction *);
++   bool handleQUADON(Instruction *);
++   bool handleQUADPOP(Instruction *);
++   bool handleSET(Instruction *);
++   bool handleSHFL(Instruction *);
++   bool handleSHL(Instruction *);
++   bool handleSHR(Instruction *);
++   bool handleSUB(Instruction *);
++};
++}
++#endif
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+index a60881000fe..f100445e9d0 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+@@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp)
+    cmp->sType = hTy;
+ }
+ 
++void
++NVC0LegalizeSSA::handleBREV(Instruction *i)
++{
++   i->op = OP_EXTBF;
++   i->subOp = NV50_IR_SUBOP_EXTBF_REV;
++   i->setSrc(1, bld.mkImm(0x2000));
++}
++
+ bool
+ NVC0LegalizeSSA::visit(Function *fn)
+ {
+@@ -354,6 +362,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
+          if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64)
+             handleSET(i->asCmp());
+          break;
++      case OP_BREV:
++         handleBREV(i);
++         break;
+       default:
+          break;
+       }
+@@ -856,11 +867,11 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
+                next = hi;
+          }
+ 
+-         if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
+-            replaceCvt(i);
+-
+          if (i->op != OP_MOV && i->op != OP_PFETCH)
+             replaceZero(i);
++
++         if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
++            replaceCvt(i);
+       }
+    }
+    if (!bb->getEntry())
+@@ -887,6 +898,8 @@ NVC0LoweringPass::visit(Function *fn)
+       gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
+       if (fn->cfgExit) {
+          bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false);
++         if (prog->getTarget()->getChipset() >= NVISA_GV100_CHIPSET)
++            bld.mkOp1(OP_FINAL, TYPE_NONE, NULL, gpEmitAddress)->fixed = 1;
+          bld.mkMovToReg(0, gpEmitAddress);
+       }
+    }
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+index b4c405a9ea5..a4925013ee4 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+@@ -68,6 +68,7 @@ private:
+    void handleSET(CmpInstruction *);
+    void handleTEXLOD(TexInstruction *);
+    void handleShift(Instruction *);
++   void handleBREV(Instruction *);
+ 
+ protected:
+    BuildUtil bld;
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+index 2f46b0e886a..3a4ec3ca561 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+@@ -558,6 +558,19 @@ ConstantFolding::expr(Instruction *i,
+    memset(&res.data, 0, sizeof(res.data));
+ 
+    switch (i->op) {
++   case OP_SGXT: {
++      int bits = b->data.u32;
++      if (bits) {
++         uint32_t data = a->data.u32 & (0xffffffff >> (32 - bits));
++         if (bits < 32 && (data & (1 << (bits - 1))))
++            data = data - (1 << bits);
++         res.data.u32 = data;
++      }
++      break;
++   }
++   case OP_BMSK:
++      res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32;
++      break;
+    case OP_MAD:
+    case OP_FMA:
+    case OP_MUL:
+@@ -780,6 +793,23 @@ ConstantFolding::expr(Instruction *i,
+    memset(&res.data, 0, sizeof(res.data));
+ 
+    switch (i->op) {
++   case OP_LOP3_LUT:
++      for (int n = 0; n < 32; n++) {
++         uint8_t lut = ((a->data.u32 >> n) & 1) << 2 |
++                       ((b->data.u32 >> n) & 1) << 1 |
++                       ((c->data.u32 >> n) & 1);
++         res.data.u32 |= !!(i->subOp & (1 << lut)) << n;
++      }
++      break;
++   case OP_PERMT:
++      if (!i->subOp) {
++         uint64_t input = (uint64_t)c->data.u32 << 32 | a->data.u32;
++         uint16_t permt = b->data.u32;
++         for (int n = 0 ; n < 4; n++, permt >>= 4)
++            res.data.u32 |= ((input >> ((permt & 0xf) * 8)) & 0xff) << n * 8;
++      } else
++         return;
++      break;
+    case OP_INSBF: {
+       int offset = b->data.u32 & 0xff;
+       int width = (b->data.u32 >> 8) & 0xff;
+@@ -1526,6 +1556,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
+       i->subOp = 0;
+       break;
+    }
++   case OP_BREV: {
++      uint32_t res = util_bitreverse(imm0.reg.data.u32);
++      i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
++      i->op = OP_MOV;
++      break;
++   }
+    case OP_POPCNT: {
+       // Only deal with 1-arg POPCNT here
+       if (i->srcExists(1))
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+index 5dcbf3c3e0c..ce0d2507dc1 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+@@ -93,8 +93,10 @@ const char *operationStr[OP_LAST + 1] =
+    "and",
+    "or",
+    "xor",
++   "lop3 lut",
+    "shl",
+    "shr",
++   "shf",
+    "max",
+    "min",
+    "sat",
+@@ -142,6 +144,7 @@ const char *operationStr[OP_LAST + 1] =
+    "pinterp",
+    "emit",
+    "restart",
++   "final",
+    "tex",
+    "texbias",
+    "texlod",
+@@ -177,7 +180,10 @@ const char *operationStr[OP_LAST + 1] =
+    "insbf",
+    "extbf",
+    "bfind",
++   "brev",
++   "bmsk",
+    "permt",
++   "sgxt",
+    "atom",
+    "bar",
+    "vadd",
+@@ -193,6 +199,7 @@ const char *operationStr[OP_LAST + 1] =
+    "shfl",
+    "vote",
+    "bufq",
++   "warpsync",
+    "(invalid)"
+ };
+ 
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+index 6df2664da22..4e5b21d9176 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+@@ -988,6 +988,8 @@ GCRA::coalesce(ArrayList& insns)
+    case 0x110:
+    case 0x120:
+    case 0x130:
++   case 0x140:
++   case 0x160:
+       ret = doCoalesce(insns, JOIN_MASK_UNION);
+       break;
+    default:
+@@ -2297,13 +2299,25 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
+    if (isTextureOp(tex->op))
+       textureMask(tex);
+ 
+-   if (isScalarTexGM107(tex)) {
+-      handleScalarTexGM107(tex);
+-      return;
+-   }
++   if (targ->getChipset() < NVISA_GV100_CHIPSET) {
++      if (isScalarTexGM107(tex)) {
++         handleScalarTexGM107(tex);
++         return;
++      }
+ 
+-   assert(!tex->tex.scalar);
+-   condenseDefs(tex);
++      assert(!tex->tex.scalar);
++      condenseDefs(tex);
++   } else {
++      if (isTextureOp(tex->op)) {
++         int defCount = tex->defCount(0xff);
++         if (defCount > 3)
++            condenseDefs(tex, 2, 3);
++         if (defCount > 1)
++            condenseDefs(tex, 0, 1);
++      } else {
++         condenseDefs(tex);
++      }
++   }
+ 
+    if (isSurfaceOp(tex->op)) {
+       int s = tex->tex.target.getDim() +
+@@ -2485,6 +2499,8 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
+          case 0x110:
+          case 0x120:
+          case 0x130:
++         case 0x140:
++         case 0x160:
+             texConstraintGM107(tex);
+             break;
+          default:
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h
+new file mode 100644
+index 00000000000..54443ae2770
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h
+@@ -0,0 +1,156 @@
++#ifndef __NV50_IR_SCHED_GM107_H__
++#define __NV50_IR_SCHED_GM107_H__
++namespace nv50_ir {
++
++class SchedDataCalculatorGM107 : public Pass
++{
++public:
++   SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
++
++private:
++   struct RegScores
++   {
++      struct ScoreData {
++         int r[256];
++         int p[8];
++         int c;
++      } rd, wr;
++      int base;
++
++      void rebase(const int base)
++      {
++         const int delta = this->base - base;
++         if (!delta)
++            return;
++         this->base = 0;
++
++         for (int i = 0; i < 256; ++i) {
++            rd.r[i] += delta;
++            wr.r[i] += delta;
++         }
++         for (int i = 0; i < 8; ++i) {
++            rd.p[i] += delta;
++            wr.p[i] += delta;
++         }
++         rd.c += delta;
++         wr.c += delta;
++      }
++      void wipe()
++      {
++         memset(&rd, 0, sizeof(rd));
++         memset(&wr, 0, sizeof(wr));
++      }
++      int getLatest(const ScoreData& d) const
++      {
++         int max = 0;
++         for (int i = 0; i < 256; ++i)
++            if (d.r[i] > max)
++               max = d.r[i];
++         for (int i = 0; i < 8; ++i)
++            if (d.p[i] > max)
++               max = d.p[i];
++         if (d.c > max)
++            max = d.c;
++         return max;
++      }
++      inline int getLatestRd() const
++      {
++         return getLatest(rd);
++      }
++      inline int getLatestWr() const
++      {
++         return getLatest(wr);
++      }
++      inline int getLatest() const
++      {
++         return MAX2(getLatestRd(), getLatestWr());
++      }
++      void setMax(const RegScores *that)
++      {
++         for (int i = 0; i < 256; ++i) {
++            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
++            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
++         }
++         for (int i = 0; i < 8; ++i) {
++            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
++            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
++         }
++         rd.c = MAX2(rd.c, that->rd.c);
++         wr.c = MAX2(wr.c, that->wr.c);
++      }
++      void print(int cycle)
++      {
++         for (int i = 0; i < 256; ++i) {
++            if (rd.r[i] > cycle)
++               INFO("rd $r%i @ %i\n", i, rd.r[i]);
++            if (wr.r[i] > cycle)
++               INFO("wr $r%i @ %i\n", i, wr.r[i]);
++         }
++         for (int i = 0; i < 8; ++i) {
++            if (rd.p[i] > cycle)
++               INFO("rd $p%i @ %i\n", i, rd.p[i]);
++            if (wr.p[i] > cycle)
++               INFO("wr $p%i @ %i\n", i, wr.p[i]);
++         }
++         if (rd.c > cycle)
++            INFO("rd $c @ %i\n", rd.c);
++         if (wr.c > cycle)
++            INFO("wr $c @ %i\n", wr.c);
++      }
++   };
++
++   RegScores *score; // for current BB
++   std::vector<RegScores> scoreBoards;
++
++   const TargetGM107 *targ;
++   bool visit(Function *);
++   bool visit(BasicBlock *);
++
++   void commitInsn(const Instruction *, int);
++   int calcDelay(const Instruction *, int) const;
++   void setDelay(Instruction *, int, const Instruction *);
++   void recordWr(const Value *, int, int);
++   void checkRd(const Value *, int, int&) const;
++
++   inline void emitYield(Instruction *);
++   inline void emitStall(Instruction *, uint8_t);
++   inline void emitReuse(Instruction *, uint8_t);
++   inline void emitWrDepBar(Instruction *, uint8_t);
++   inline void emitRdDepBar(Instruction *, uint8_t);
++   inline void emitWtDepBar(Instruction *, uint8_t);
++
++   inline int getStall(const Instruction *) const;
++   inline int getWrDepBar(const Instruction *) const;
++   inline int getRdDepBar(const Instruction *) const;
++   inline int getWtDepBar(const Instruction *) const;
++
++   void setReuseFlag(Instruction *);
++
++   inline void printSchedInfo(int, const Instruction *) const;
++
++   struct LiveBarUse {
++      LiveBarUse(Instruction *insn, Instruction *usei)
++         : insn(insn), usei(usei) { }
++      Instruction *insn;
++      Instruction *usei;
++   };
++
++   struct LiveBarDef {
++      LiveBarDef(Instruction *insn, Instruction *defi)
++         : insn(insn), defi(defi) { }
++      Instruction *insn;
++      Instruction *defi;
++   };
++
++   bool insertBarriers(BasicBlock *);
++
++   bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
++   Instruction *findFirstUse(const Instruction *) const;
++   Instruction *findFirstDef(const Instruction *) const;
++
++   bool needRdDepBar(const Instruction *) const;
++   bool needWrDepBar(const Instruction *) const;
++};
++
++}; // namespace nv50_ir
++#endif
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+index 5c6d0570ae2..765375a47df 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+@@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] =
+    2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
+    3, 3,                   // SHLADD, XMAD
+    1, 1, 1,                // ABS, NEG, NOT
+-   2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
++   2, 2, 2, 3, 2, 2, 3,    // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
+    2, 2, 1,                // MAX, MIN, SAT
+    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
+    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
+@@ -43,7 +43,7 @@ const uint8_t Target::operationSrcNr[] =
+    0, 0, 0,                // PRERET,CONT,BREAK
+    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
+    1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
+-   1, 1,                   // EMIT, RESTART
++   1, 1, 1,                // EMIT, RESTART, FINAL
+    1, 1, 1,                // TEX, TXB, TXL,
+    1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
+    1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
+@@ -51,13 +51,15 @@ const uint8_t Target::operationSrcNr[] =
+    0,                      // TEXBAR
+    1, 1,                   // DFDX, DFDY
+    1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
+-   2, 3, 2, 1, 3,          // POPCNT, INSBF, EXTBF, BFIND, PERMT
++   2, 3, 2, 1, 1, 2, 3,    // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
++   2,                      // SGXT
+    2, 2,                   // ATOM, BAR
+    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
+    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
+    3,                      // SHFL
+    1,                      // VOTE
+    1,                      // BUFQ
++   1,                      // WARPSYNC
+    0
+ };
+ 
+@@ -75,10 +77,10 @@ const OpClass Target::operationClass[] =
+    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
+    OPCLASS_ARITH, OPCLASS_ARITH,
+    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
+-   // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
++   // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
+    OPCLASS_CONVERT, OPCLASS_CONVERT,
+-   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
+-   OPCLASS_SHIFT, OPCLASS_SHIFT,
++   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
++   OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
+    // MAX, MIN
+    OPCLASS_COMPARE, OPCLASS_COMPARE,
+    // SAT, CEIL, FLOOR, TRUNC; CVT
+@@ -103,8 +105,8 @@ const OpClass Target::operationClass[] =
+    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
+    // LINTERP, PINTERP
+    OPCLASS_SFU, OPCLASS_SFU,
+-   // EMIT, RESTART
+-   OPCLASS_CONTROL, OPCLASS_CONTROL,
++   // EMIT, RESTART, FINAL
++   OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,
+    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
+    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
+    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
+@@ -119,9 +121,9 @@ const OpClass Target::operationClass[] =
+    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
+    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
+    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
+-   // POPCNT, INSBF, EXTBF, BFIND; PERMT
++   // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT
++   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
+    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
+-   OPCLASS_BITFIELD,
+    // ATOM, BAR
+    OPCLASS_ATOMIC, OPCLASS_CONTROL,
+    // VADD, VAVG, VMIN, VMAX
+@@ -136,10 +138,13 @@ const OpClass Target::operationClass[] =
+    OPCLASS_OTHER,
+    // BUFQ
+    OPCLASS_OTHER,
++   // WARPSYNC
++   OPCLASS_OTHER,
+    OPCLASS_PSEUDO // LAST
+ };
+ 
+ 
++extern Target *getTargetGV100(unsigned int chipset);
+ extern Target *getTargetGM107(unsigned int chipset);
+ extern Target *getTargetNVC0(unsigned int chipset);
+ extern Target *getTargetNV50(unsigned int chipset);
+@@ -149,6 +154,9 @@ Target *Target::create(unsigned int chipset)
+    STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
+    STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
+    switch (chipset & ~0xf) {
++   case 0x160:
++   case 0x140:
++      return getTargetGV100(chipset);
+    case 0x110:
+    case 0x120:
+    case 0x130:
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+index afeca14d7d1..0f7db116577 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+@@ -200,7 +200,7 @@ public:
+       uint8_t dstMods;
+       uint16_t srcFiles[3];
+       uint16_t dstFiles;
+-      unsigned int minEncSize  : 4;
++      unsigned int minEncSize  : 5;
+       unsigned int vector      : 1;
+       unsigned int predicate   : 1;
+       unsigned int commutative : 1;
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp
+new file mode 100644
+index 00000000000..fd969e1ece5
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp
+@@ -0,0 +1,594 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#include "codegen/nv50_ir_target_gv100.h"
++#include "codegen/nv50_ir_lowering_gv100.h"
++#include "codegen/nv50_ir_emit_gv100.h"
++
++namespace nv50_ir {
++
++void
++TargetGV100::initOpInfo()
++{
++   unsigned int i, j;
++
++   static const operation commutative[] =
++   {
++      OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_MAX, OP_MIN,
++      OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT
++   };
++
++   static const operation noDest[] =
++   {
++      OP_EXIT
++   };
++
++   static const operation noPred[] =
++   {
++   };
++
++   for (i = 0; i < DATA_FILE_COUNT; ++i)
++      nativeFileMap[i] = (DataFile)i;
++   nativeFileMap[FILE_ADDRESS] = FILE_GPR;
++   nativeFileMap[FILE_FLAGS] = FILE_PREDICATE;
++
++   for (i = 0; i < OP_LAST; ++i) {
++      opInfo[i].variants = NULL;
++      opInfo[i].op = (operation)i;
++      opInfo[i].srcTypes = 1 << (int)TYPE_F32;
++      opInfo[i].dstTypes = 1 << (int)TYPE_F32;
++      opInfo[i].immdBits = 0;
++      opInfo[i].srcNr = operationSrcNr[i];
++
++      for (j = 0; j < opInfo[i].srcNr; ++j) {
++         opInfo[i].srcMods[j] = 0;
++         opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
++      }
++      opInfo[i].dstMods = 0;
++      opInfo[i].dstFiles = 1 << (int)FILE_GPR;
++
++      opInfo[i].hasDest = 1;
++      opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
++      opInfo[i].commutative = false; /* set below */
++      opInfo[i].pseudo = (i < OP_MOV);
++      opInfo[i].predicate = !opInfo[i].pseudo;
++      opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
++      opInfo[i].minEncSize = 16;
++   }
++   for (i = 0; i < ARRAY_SIZE(commutative); ++i)
++      opInfo[commutative[i]].commutative = true;
++   for (i = 0; i < ARRAY_SIZE(noDest); ++i)
++      opInfo[noDest[i]].hasDest = 0;
++   for (i = 0; i < ARRAY_SIZE(noPred); ++i)
++      opInfo[noPred[i]].predicate = 0;
++}
++
++struct opInfo {
++   struct {
++      uint8_t files;
++      uint8_t mods;
++   } src[3];
++};
++
++#define SRC_NONE 0
++#define SRC_R    (1 << FILE_GPR)
++#define SRC_I    (1 << FILE_MEMORY_CONST)
++#define SRC_C    (1 << FILE_IMMEDIATE)
++#define SRC_RC   (SRC_R |         SRC_C)
++#define SRC_RI   (SRC_R | SRC_I        )
++#define SRC_RIC  (SRC_R | SRC_I | SRC_C)
++
++#define MOD_NONE 0
++#define MOD_NEG  NV50_IR_MOD_NEG
++#define MOD_ABS  NV50_IR_MOD_ABS
++#define MOD_NOT  NV50_IR_MOD_NOT
++#define MOD_NA   (MOD_NEG | MOD_ABS)
++
++#define OPINFO(O,SA,MA,SB,MB,SC,MC)                                            \
++static struct opInfo                                                           \
++opInfo_##O = {                                                                 \
++   .src = { { SRC_##SA, MOD_##MA },                                            \
++            { SRC_##SB, MOD_##MB },                                            \
++            { SRC_##SC, MOD_##MC }},                                           \
++};
++
++
++/* Handled by GV100LegalizeSSA. */
++OPINFO(FABS     , RIC , NA  , NONE, NONE, NONE, NONE);
++OPINFO(FCMP     , R   , NONE, RIC , NONE, RIC , NONE); //XXX: use FSEL for mods
++OPINFO(FNEG     , RIC , NA  , NONE, NONE, NONE, NONE);
++OPINFO(FSET     , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(ICMP     , R   , NONE, RIC , NONE, RIC , NONE);
++OPINFO(IMUL     , R   , NONE, RIC , NONE, NONE, NONE);
++OPINFO(INEG     , RIC , NEG , NONE, NONE, NONE, NONE);
++OPINFO(ISET     , R   , NONE, RIC , NONE, NONE, NONE);
++OPINFO(LOP2     , R   , NOT , RIC , NOT , NONE, NONE);
++OPINFO(NOT      , RIC , NONE, NONE, NONE, NONE, NONE);
++OPINFO(SAT      , RIC , NA  , NONE, NONE, NONE, NONE);
++OPINFO(SHL      , RIC , NONE, RIC , NONE, NONE, NONE);
++OPINFO(SHR      , RIC , NONE, RIC , NONE, NONE, NONE);
++OPINFO(SUB      , R   , NONE, RIC , NEG , NONE, NONE);
++OPINFO(IMNMX    , R   , NONE, RIC , NONE, NONE, NONE);
++
++/* Handled by CodeEmitterGV100. */
++OPINFO(AL2P     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(ALD      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(AST      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(ATOM     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(ATOMS    , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(BAR      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(BRA      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(BMSK     , R   , NONE, RIC , NONE, NONE, NONE);
++OPINFO(BREV     , RIC , NONE, NONE, NONE, NONE, NONE);
++OPINFO(CCTL     , NONE, NONE, NONE, NONE, NONE, NONE);
++//OPINFO(CS2R     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(DADD     , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(DFMA     , R   , NA  , RIC , NA  , RIC , NA  );
++OPINFO(DMUL     , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(DSETP    , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(EXIT     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(F2F      , RIC , NA  , NONE, NONE, NONE, NONE);
++OPINFO(F2I      , RIC , NA  , NONE, NONE, NONE, NONE);
++OPINFO(FADD     , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(FFMA     , R   , NA  , RIC , NA  , RIC , NA  );
++OPINFO(FLO      , RIC , NOT , NONE, NONE, NONE, NONE);
++OPINFO(FMNMX    , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(FMUL     , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(FRND     , RIC , NA  , NONE, NONE, NONE, NONE);
++OPINFO(FSET_BF  , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(FSETP    , R   , NA  , RIC , NA  , NONE, NONE);
++OPINFO(FSWZADD  , R   , NONE, R   , NONE, NONE, NONE);
++OPINFO(I2F      , RIC , NONE, NONE, NONE, NONE, NONE);
++OPINFO(IABS     , RIC , NONE, NONE, NONE, NONE, NONE);
++OPINFO(IADD3    , R   , NEG , RIC , NEG , R   , NEG );
++OPINFO(IMAD     , R   , NONE, RIC , NONE, RIC , NEG );
++OPINFO(IMAD_WIDE, R   , NONE, RIC , NONE, RC  , NEG );
++OPINFO(IPA      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(ISBERD   , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(ISETP    , R   , NONE, RIC , NONE, NONE, NONE);
++OPINFO(KILL     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(LD       , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(LDC      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(LDL      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(LDS      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(LEA      , R   , NEG , I   , NONE, RIC , NEG );
++OPINFO(LOP3_LUT , R   , NONE, RIC , NONE, R   , NONE);
++OPINFO(MEMBAR   , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(MOV      , RIC , NONE, NONE, NONE, NONE, NONE);
++OPINFO(MUFU     , RIC , NA  , NONE, NONE, NONE, NONE);
++OPINFO(NOP      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(OUT      , R   , NONE, RI  , NONE, NONE, NONE);
++OPINFO(PIXLD    , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(PLOP3_LUT, NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(POPC     , RIC , NOT , NONE, NONE, NONE, NONE);
++OPINFO(PRMT     , R   , NONE, RIC , NONE, RIC , NONE);
++OPINFO(RED      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(SGXT     , R   , NONE, RIC , NONE, NONE, NONE);
++OPINFO(S2R      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(SEL      , R   , NONE, RIC , NONE, NONE, NONE);
++OPINFO(SHF      , R   , NONE, RIC , NONE, RIC , NONE);
++OPINFO(SHFL     , R   , NONE, R   , NONE, R   , NONE);
++OPINFO(ST       , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(STL      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(STS      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(SUATOM   , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(SULD     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(SUST     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(TEX      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(TLD      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(TLD4     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(TMML     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(TXD      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(TXQ      , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(VOTE     , NONE, NONE, NONE, NONE, NONE, NONE);
++OPINFO(WARPSYNC , R   , NONE, NONE, NONE, NONE, NONE);
++
++static const struct opInfo *
++getOpInfo(const Instruction *i)
++{
++   switch (i->op) {
++   case OP_ABS:
++      if (isFloatType(i->dType))
++         return &opInfo_FABS;
++      return &opInfo_IABS;
++   case OP_ADD:
++      if (isFloatType(i->dType)) {
++         if (i->dType == TYPE_F32)
++            return &opInfo_FADD;
++         else
++            return &opInfo_DADD;
++      } else {
++         return &opInfo_IADD3;
++      }
++      break;
++   case OP_AFETCH: return &opInfo_AL2P;
++   case OP_AND:
++   case OP_OR:
++   case OP_XOR:
++      if (i->def(0).getFile() == FILE_PREDICATE)
++         return &opInfo_PLOP3_LUT;
++      return &opInfo_LOP2;
++   case OP_ATOM:
++      if (i->src(0).getFile() == FILE_MEMORY_SHARED)
++         return &opInfo_ATOMS;
++      else
++         if (!i->defExists(0) && i->subOp < NV50_IR_SUBOP_ATOM_CAS)
++            return &opInfo_RED;
++         else
++            return &opInfo_ATOM;
++      break;
++   case OP_BAR: return &opInfo_BAR;
++   case OP_BFIND: return &opInfo_FLO;
++   case OP_BMSK: return &opInfo_BMSK;
++   case OP_BREV: return &opInfo_BREV;
++   case OP_BRA:
++   case OP_JOIN: return &opInfo_BRA; //XXX
++   case OP_CCTL: return &opInfo_CCTL;
++   case OP_CEIL:
++   case OP_CVT:
++   case OP_FLOOR:
++   case OP_TRUNC:
++      if (i->op == OP_CVT && (i->def(0).getFile() == FILE_PREDICATE ||
++                                 i->src(0).getFile() == FILE_PREDICATE)) {
++         return &opInfo_MOV;
++      } else if (isFloatType(i->dType)) {
++         if (isFloatType(i->sType)) {
++            if (i->sType == i->dType)
++               return &opInfo_FRND;
++            else
++               return &opInfo_F2F;
++         } else {
++            return &opInfo_I2F;
++         }
++      } else {
++         if (isFloatType(i->sType))
++            return &opInfo_F2I;
++      }
++      break;
++   case OP_COS:
++   case OP_EX2:
++   case OP_LG2:
++   case OP_RCP:
++   case OP_RSQ:
++   case OP_SIN:
++   case OP_SQRT: return &opInfo_MUFU;
++   case OP_DISCARD: return &opInfo_KILL;
++   case OP_EMIT:
++   case OP_FINAL:
++   case OP_RESTART: return &opInfo_OUT;
++   case OP_EXIT: return &opInfo_EXIT;
++   case OP_EXPORT: return &opInfo_AST;
++   case OP_FMA:
++   case OP_MAD:
++      if (isFloatType(i->dType)) {
++         if (i->dType == TYPE_F32)
++            return &opInfo_FFMA;
++         else
++            return &opInfo_DFMA;
++      } else {
++         if (typeSizeof(i->dType) != 8)
++            return &opInfo_IMAD;
++         else
++            return &opInfo_IMAD_WIDE;
++      }
++      break;
++   case OP_JOINAT: return &opInfo_NOP; //XXX
++   case OP_LINTERP: return &opInfo_IPA;
++   case OP_LOAD:
++      switch (i->src(0).getFile()) {
++      case FILE_MEMORY_CONST : return &opInfo_LDC;
++      case FILE_MEMORY_LOCAL : return &opInfo_LDL;
++      case FILE_MEMORY_SHARED: return &opInfo_LDS;
++      case FILE_MEMORY_GLOBAL: return &opInfo_LD;
++      default:
++         break;
++      }
++      break;
++   case OP_LOP3_LUT: return &opInfo_LOP3_LUT;
++   case OP_MAX:
++   case OP_MIN:
++      if (isFloatType(i->dType)) {
++         if (i->dType == TYPE_F32)
++            return &opInfo_FMNMX;
++      } else {
++         return &opInfo_IMNMX;
++      }
++      break;
++   case OP_MEMBAR: return &opInfo_MEMBAR;
++   case OP_MOV: return &opInfo_MOV;
++   case OP_MUL:
++      if (isFloatType(i->dType)) {
++         if (i->dType == TYPE_F32)
++            return &opInfo_FMUL;
++         else
++            return &opInfo_DMUL;
++      }
++      return &opInfo_IMUL;
++   case OP_NEG:
++      if (isFloatType(i->dType))
++         return &opInfo_FNEG;
++      return &opInfo_INEG;
++   case OP_NOT: return &opInfo_NOT;
++   case OP_PERMT: return &opInfo_PRMT;
++   case OP_PFETCH: return &opInfo_ISBERD;
++   case OP_PIXLD: return &opInfo_PIXLD;
++   case OP_POPCNT: return &opInfo_POPC;
++   case OP_QUADOP: return &opInfo_FSWZADD;
++   case OP_RDSV:
++#if 0
++      if (targ->isCS2RSV(i->getSrc(0)->reg.data.sv.sv))
++         return &opInfo_CS2R;
++#endif
++      return &opInfo_S2R;
++   case OP_SAT: return &opInfo_SAT;
++   case OP_SELP: return &opInfo_SEL;
++   case OP_SET:
++   case OP_SET_AND:
++   case OP_SET_OR:
++   case OP_SET_XOR:
++      if (i->def(0).getFile() != FILE_PREDICATE) {
++         if (isFloatType(i->dType)) {
++            if (i->dType == TYPE_F32)
++               return &opInfo_FSET_BF;
++         } else {
++            if (isFloatType(i->sType))
++                  return &opInfo_FSET;
++            return &opInfo_ISET;
++         }
++      } else {
++         if (isFloatType(i->sType))
++            if (i->sType == TYPE_F64)
++               return &opInfo_DSETP;
++            else
++               return &opInfo_FSETP;
++         else
++            return &opInfo_ISETP;
++      }
++      break;
++   case OP_SGXT: return &opInfo_SGXT;
++   case OP_SHF: return &opInfo_SHF;
++   case OP_SHFL: return &opInfo_SHFL;
++   case OP_SHL: return &opInfo_SHL;
++   case OP_SHLADD: return &opInfo_LEA;
++   case OP_SHR: return &opInfo_SHR;
++   case OP_SLCT:
++      if (isFloatType(i->sType))
++         return &opInfo_FCMP;
++      return &opInfo_ICMP;
++   case OP_STORE:
++      switch (i->src(0).getFile()) {
++      case FILE_MEMORY_LOCAL : return &opInfo_STL;
++      case FILE_MEMORY_SHARED: return &opInfo_STS;
++      case FILE_MEMORY_GLOBAL: return &opInfo_ST;
++      default:
++         break;
++      }
++      break;
++   case OP_SUB: return &opInfo_SUB;
++   case OP_SULDB:
++   case OP_SULDP: return &opInfo_SULD;
++   case OP_SUREDB:
++   case OP_SUREDP: return &opInfo_SUATOM;
++   case OP_SUSTB:
++   case OP_SUSTP: return &opInfo_SUST;
++   case OP_TEX:
++   case OP_TXB:
++   case OP_TXL: return &opInfo_TEX;
++   case OP_TXD: return &opInfo_TXD;
++   case OP_TXF: return &opInfo_TLD;
++   case OP_TXG: return &opInfo_TLD4;
++   case OP_TXLQ: return &opInfo_TMML;
++   case OP_TXQ: return &opInfo_TXQ;
++   case OP_VFETCH: return &opInfo_ALD;
++   case OP_VOTE: return &opInfo_VOTE;
++   case OP_WARPSYNC: return &opInfo_WARPSYNC;
++   default:
++      break;
++   }
++   return NULL;
++}
++
++bool
++TargetGV100::isSatSupported(const Instruction *i) const
++{
++   switch (i->dType) {
++   case TYPE_F32:
++      switch (i->op) {
++      case OP_ADD:
++      case OP_FMA:
++      case OP_MAD:
++      case OP_MUL: return true;
++      default:
++         break;
++      }
++      break;
++   default:
++      break;
++   }
++   return false;
++}
++
++bool
++TargetGV100::isModSupported(const Instruction *i, int s, Modifier mod) const
++{
++   const struct opInfo *info = nv50_ir::getOpInfo(i);
++   uint8_t mods = 0;
++   if (info && s < (int)ARRAY_SIZE(info->src))
++      mods = info->src[s].mods;
++   return (mod & Modifier(mods)) == mod;
++}
++
++bool
++TargetGV100::isOpSupported(operation op, DataType ty) const
++{
++   if (op == OP_MAD || op == OP_FMA)
++      return true;
++   if (ty == TYPE_F32) {
++      if (op == OP_MAX)
++         return true;
++   }
++   if (op == OP_RSQ)
++      return true;
++   if (op == OP_SET ||
++       op == OP_SET_AND ||
++       op == OP_SET_OR ||
++       op == OP_SET_XOR)
++      return true;
++   if (op == OP_SHLADD)
++      return true;
++   return false;
++}
++
++bool
++TargetGV100::isBarrierRequired(const Instruction *i) const
++{
++   switch (i->op) {
++   case OP_BREV:
++      return true;
++   default:
++      break;
++   }
++
++   return TargetGM107::isBarrierRequired(i);
++}
++
++bool
++TargetGV100::insnCanLoad(const Instruction *i, int s,
++                         const Instruction *ld) const
++{
++   const struct opInfo *info = nv50_ir::getOpInfo(i);
++   uint16_t files = 0;
++
++   if (ld->src(0).getFile() == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
++      return (!i->isPseudo() &&
++              !i->asTex() &&
++              i->op != OP_EXPORT && i->op != OP_STORE);
++
++   if (ld->src(0).isIndirect(0))
++      return false;
++
++   if (info && s < (int)ARRAY_SIZE(info->src)) {
++      files = info->src[s].files;
++      if ((s == 1 && i->srcExists(2) && i->src(2).getFile() != FILE_GPR) ||
++          (s == 2 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR)) {
++         files &= ~(1 << FILE_MEMORY_CONST);
++         files &= ~(1 << FILE_IMMEDIATE);
++      } else
++      if ((i->op == OP_SHL || i->op == OP_SHR) &&
++          ((s == 0 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR) ||
++           (s == 1 && i->srcExists(0) && i->src(0).getFile() != FILE_GPR))) {
++         files &= ~(1 << FILE_MEMORY_CONST);
++         files &= ~(1 << FILE_IMMEDIATE);
++      }
++   }
++
++   if (ld->src(0).getFile() == FILE_IMMEDIATE) {
++      if (i->sType == TYPE_F64) {
++         if (ld->getSrc(0)->asImm()->reg.data.u64 & 0x00000000ffffffff)
++            return false;
++      }
++   }
++
++   return (files & (1 << ld->src(0).getFile()));
++}
++
++void
++TargetGV100::getBuiltinCode(const uint32_t **code, uint32_t *size) const
++{
++   //XXX: find out why gv100 (tu1xx is fine) hangs without this
++   static uint32_t builtin[] = {
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
++   };
++   *code = builtin;
++   *size = sizeof(builtin);
++}
++
++uint32_t
++TargetGV100::getBuiltinOffset(int builtin) const
++{
++   return 0;
++}
++
++bool
++TargetGV100::runLegalizePass(Program *prog, CGStage stage) const
++{
++   if (stage == CG_STAGE_PRE_SSA) {
++      GM107LoweringPass pass1(prog);
++      GV100LoweringPass pass2(prog);
++      pass1.run(prog, false, true);
++      pass2.run(prog, false, true);
++      return true;
++   } else
++   if (stage == CG_STAGE_SSA) {
++      GV100LegalizeSSA pass(prog);
++      return pass.run(prog, false, true);
++   } else
++   if (stage == CG_STAGE_POST_RA) {
++      NVC0LegalizePostRA pass(prog);
++      return pass.run(prog, false, true);
++   }
++   return false;
++}
++
++CodeEmitter *
++TargetGV100::getCodeEmitter(Program::Type type)
++{
++   return new CodeEmitterGV100(this);
++}
++
++TargetGV100::TargetGV100(unsigned int chipset)
++   : TargetGM107(chipset)
++{
++   initOpInfo();
++};
++
++Target *getTargetGV100(unsigned int chipset)
++{
++   return new TargetGV100(chipset);
++}
++
++};
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h
+new file mode 100644
+index 00000000000..897e6a22d30
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h
+@@ -0,0 +1,52 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#ifndef __NV50_IR_TARGET_GV100_H__
++#define __NV50_IR_TARGET_GV100_H__
++#include "codegen/nv50_ir_target_gm107.h"
++
++namespace nv50_ir {
++
++class TargetGV100 : public TargetGM107 {
++public:
++   TargetGV100(unsigned int chipset);
++
++   virtual CodeEmitter *getCodeEmitter(Program::Type);
++
++   virtual bool runLegalizePass(Program *, CGStage stage) const;
++
++   virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
++   virtual uint32_t getBuiltinOffset(int builtin) const;
++
++   virtual bool insnCanLoad(const Instruction *, int, const Instruction *) const;
++   virtual bool isOpSupported(operation, DataType) const;
++   virtual bool isModSupported(const Instruction *, int s, Modifier) const;
++   virtual bool isSatSupported(const Instruction *) const;
++
++   virtual bool isBarrierRequired(const Instruction *) const;
++
++private:
++   void initOpInfo();
++   void initProps(const struct opProperties *, int);
++};
++
++};
++#endif
+diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+index 60134b445db..ed5b343ccba 100644
+--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+@@ -30,7 +30,7 @@ Target *getTargetNVC0(unsigned int chipset)
+ }
+ 
+ TargetNVC0::TargetNVC0(unsigned int card) :
+-   Target(card < 0x110, false, card >= 0xe4)
++   Target(card < 0x110, false, card >= 0xe4 && card < 0x140)
+ {
+    chipset = card;
+    initOpInfo();
+diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build
+index 7a1d18a6394..68cfebdf20c 100644
+--- a/src/gallium/drivers/nouveau/meson.build
++++ b/src/gallium/drivers/nouveau/meson.build
+@@ -150,17 +150,31 @@ files_libnouveau = files(
+   'codegen/nv50_ir_util.cpp',
+   'codegen/nv50_ir_util.h',
+   'codegen/unordered_set.h',
++  'codegen/nv50_ir_emit_gv100.cpp',
++  'codegen/nv50_ir_emit_gv100.h',
+   'codegen/nv50_ir_emit_gk110.cpp',
+   'codegen/nv50_ir_emit_gm107.cpp',
+   'codegen/nv50_ir_emit_nvc0.cpp',
++  'codegen/nv50_ir_lowering_gv100.cpp',
++  'codegen/nv50_ir_lowering_gv100.h',
+   'codegen/nv50_ir_lowering_gm107.cpp',
+   'codegen/nv50_ir_lowering_gm107.h',
+   'codegen/nv50_ir_lowering_nvc0.cpp',
+   'codegen/nv50_ir_lowering_nvc0.h',
++  'codegen/nv50_ir_target_gv100.cpp',
++  'codegen/nv50_ir_target_gv100.h',
+   'codegen/nv50_ir_target_gm107.cpp',
+   'codegen/nv50_ir_target_gm107.h',
+   'codegen/nv50_ir_target_nvc0.cpp',
+   'codegen/nv50_ir_target_nvc0.h',
++  'nvc0/cla0c0qmd.h',
++  'nvc0/clc0c0qmd.h',
++  'nvc0/clc3c0qmd.h',
++  'nvc0/drf.h',
++  'nvc0/qmd.h',
++  'nvc0/qmda0c0.c',
++  'nvc0/qmdc0c0.c',
++  'nvc0/qmdc3c0.c',
+   'nvc0/gm107_texture.xml.h',
+   'nvc0/nvc0_3d.xml.h',
+   'nvc0/nvc0_compute.c',
+diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
+index de9cce3812a..8606ba43c1a 100644
+--- a/src/gallium/drivers/nouveau/nouveau_screen.c
++++ b/src/gallium/drivers/nouveau/nouveau_screen.c
+@@ -188,7 +188,11 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
+    if (nv_dbg)
+       nouveau_mesa_debug = atoi(nv_dbg);
+ 
+-   screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
++   if (dev->chipset < 0x140)
++      screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
++   else
++      screen->prefer_nir = true;
++
+    screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false);
+    if (screen->force_enable_cl)
+       glsl_type_singleton_init_or_ref();
+diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+index 899d73d7398..31e7cf82233 100644
+--- a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
++++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+@@ -218,9 +218,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define NV50_2D_PATTERN_SELECT_BITMAP_1X64			0x00000002
+ #define NV50_2D_PATTERN_SELECT_COLOR				0x00000003
+ 
+-#define NVC0_2D_UNK02B8(i0)				       (0x000002b8 + 0x4*(i0))
+-#define NVC0_2D_UNK02B8__ESIZE					0x00000004
+-#define NVC0_2D_UNK02B8__LEN					0x00000009
++#define NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE  0x000002b8
+ 
+ #define NVC0_2D_UNK2DC						0x000002dc
+ 
+diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
+index 664bfae9f64..fac195d4846 100644
+--- a/src/gallium/drivers/nouveau/nv_object.xml.h
++++ b/src/gallium/drivers/nouveau/nv_object.xml.h
+@@ -195,6 +195,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define GM200_3D_CLASS						0x0000b197
+ #define GP100_3D_CLASS						0x0000c097
+ #define GP102_3D_CLASS						0x0000c197
++#define GV100_3D_CLASS						0x0000c397
++#define TU102_3D_CLASS						0x0000c597
+ #define NV50_2D_CLASS						0x0000502d
+ #define NVC0_2D_CLASS						0x0000902d
+ #define NV50_COMPUTE_CLASS					0x000050c0
+@@ -207,6 +209,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define GM200_COMPUTE_CLASS					0x0000b1c0
+ #define GP100_COMPUTE_CLASS					0x0000c0c0
+ #define GP104_COMPUTE_CLASS					0x0000c1c0
++#define GV100_COMPUTE_CLASS					0x0000c3c0
++#define TU102_COMPUTE_CLASS					0x0000c5c0
+ #define NV84_CRYPT_CLASS					0x000074c1
+ #define BLOB_NVC0_PCOPY1_CLASS					0x000090b8
+ #define BLOB_NVC0_PCOPY0_CLASS					0x000090b5
+diff --git a/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h
+new file mode 100644
+index 00000000000..c0829f1cdc2
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h
+@@ -0,0 +1,660 @@
++/*******************************************************************************
++    Copyright (c) 2016 NVIDIA Corporation
++
++    Permission is hereby granted, free of charge, to any person obtaining a copy
++    of this software and associated documentation files (the "Software"), to
++    deal in the Software without restriction, including without limitation the
++    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
++    sell copies of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++        The above copyright notice and this permission notice shall be
++        included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++
++*******************************************************************************/
++
++/* AUTO GENERATED FILE -- DO NOT EDIT */
++
++#ifndef __CLA0C0QMD_H__
++#define __CLA0C0QMD_H__
++
++/*
++** Queue Meta Data, Version 00_06
++ */
++
++// The below C preprocessor definitions describe "multi-word" structures, where
++// fields may have bit numbers beyond 32.  For example, MW(127:96) means
++// the field is in bits 0-31 of word number 3 of the structure.  The "MW(X:Y)"
++// syntax is to distinguish from similar "X:Y" single-word definitions: the
++// macros historically used for single-word definitions would fail with
++// multi-word definitions.
++//
++// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
++// interface layer of nvidia.ko for an example of how to manipulate
++// these MW(X:Y) definitions.
++
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_A                         MW(30:0)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_B                         MW(31:31)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_C                         MW(62:32)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_D                         MW(63:63)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_E                         MW(94:64)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_F                         MW(95:95)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_G                         MW(126:96)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_H                         MW(127:127)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_A_A                          MW(159:128)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_I                         MW(191:160)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_J                         MW(196:192)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_A                            MW(199:197)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K                         MW(200:200)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_FALSE                   0x00000000
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_TRUE                    0x00000001
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L                         MW(201:201)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_FALSE                   0x00000000
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_TRUE                    0x00000001
++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
++#define NVA0C0_QMDV00_06_QMD_RESERVED_B                            MW(207:204)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_M                         MW(222:208)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N                         MW(223:223)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_FALSE                   0x00000000
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_TRUE                    0x00000001
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_O                         MW(248:224)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_C                            MW(249:249)
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
++#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
++#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
++#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
++#define NVA0C0_QMDV00_06_PROGRAM_OFFSET                            MW(287:256)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_P                         MW(319:288)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Q                         MW(327:320)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_D                            MW(335:328)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_R                         MW(351:336)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_S                         MW(357:352)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_E                            MW(365:358)
++#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE                       MW(366:366)
++#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
++#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
++#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE                           MW(369:368)
++#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
++#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
++#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T                         MW(370:370)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_FALSE                   0x00000000
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_TRUE                    0x00000001
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U                         MW(371:371)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_FALSE                   0x00000000
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_TRUE                    0x00000001
++#define NVA0C0_QMDV00_06_THROTTLED                                 MW(372:372)
++#define NVA0C0_QMDV00_06_THROTTLED_FALSE                           0x00000000
++#define NVA0C0_QMDV00_06_THROTTLED_TRUE                            0x00000001
++#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_A                         MW(376:376)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_B                         MW(377:377)
++#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT                    MW(378:378)
++#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT__32                0x00000000
++#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
++#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING                MW(379:379)
++#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
++#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
++#define NVA0C0_QMDV00_06_SAMPLER_INDEX                             MW(382:382)
++#define NVA0C0_QMDV00_06_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
++#define NVA0C0_QMDV00_06_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
++#define NVA0C0_QMDV00_06_QMD_RESERVED_E3_A                         MW(383:383)
++#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH                          MW(415:384)
++#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT                         MW(431:416)
++#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH                          MW(447:432)
++#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH_RESUME                   MW(479:448)
++#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT_RESUME                  MW(495:480)
++#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH_RESUME                   MW(511:496)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_V                         MW(535:512)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_F                            MW(542:536)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W                         MW(543:543)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_FALSE                   0x00000000
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_TRUE                    0x00000001
++#define NVA0C0_QMDV00_06_SHARED_MEMORY_SIZE                        MW(561:544)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_G                            MW(575:562)
++#define NVA0C0_QMDV00_06_QMD_VERSION                               MW(579:576)
++#define NVA0C0_QMDV00_06_QMD_MAJOR_VERSION                         MW(583:580)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_H                            MW(591:584)
++#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION0                     MW(607:592)
++#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION1                     MW(623:608)
++#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION2                     MW(639:624)
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_FALSE               0x00000000
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_TRUE                0x00000001
++#define NVA0C0_QMDV00_06_QMD_RESERVED_I                            MW(668:648)
++#define NVA0C0_QMDV00_06_L1_CONFIGURATION                          MW(671:669)
++#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
++#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
++#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_X                         MW(703:672)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Y                         MW(735:704)
++#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_LOWER                    MW(767:736)
++#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_UPPER                    MW(775:768)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_J                            MW(783:776)
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP                     MW(790:788)
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
++#define NVA0C0_QMDV00_06_QMD_RESERVED_K                            MW(791:791)
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
++#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVA0C0_QMDV00_06_RELEASE0_PAYLOAD                          MW(831:800)
++#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_LOWER                    MW(863:832)
++#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_UPPER                    MW(871:864)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_L                            MW(879:872)
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP                     MW(886:884)
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
++#define NVA0C0_QMDV00_06_QMD_RESERVED_M                            MW(887:887)
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
++#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVA0C0_QMDV00_06_RELEASE1_PAYLOAD                          MW(927:896)
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((959+(i)*64):(928+(i)*64))
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((967+(i)*64):(960+(i)*64))
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((973+(i)*64):(968+(i)*64))
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE(i)             MW((974+(i)*64):(974+(i)*64))
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_SIZE(i)                   MW((991+(i)*64):(975+(i)*64))
++#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(1463:1440)
++#define NVA0C0_QMDV00_06_QMD_RESERVED_N                            MW(1466:1464)
++#define NVA0C0_QMDV00_06_BARRIER_COUNT                             MW(1471:1467)
++#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(1495:1472)
++#define NVA0C0_QMDV00_06_REGISTER_COUNT                            MW(1503:1496)
++#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1527:1504)
++#define NVA0C0_QMDV00_06_SASS_VERSION                              MW(1535:1528)
++#define NVA0C0_QMDV00_06_QMD_SPARE_A                               MW(1567:1536)
++#define NVA0C0_QMDV00_06_QMD_SPARE_B                               MW(1599:1568)
++#define NVA0C0_QMDV00_06_QMD_SPARE_C                               MW(1631:1600)
++#define NVA0C0_QMDV00_06_QMD_SPARE_D                               MW(1663:1632)
++#define NVA0C0_QMDV00_06_QMD_SPARE_E                               MW(1695:1664)
++#define NVA0C0_QMDV00_06_QMD_SPARE_F                               MW(1727:1696)
++#define NVA0C0_QMDV00_06_QMD_SPARE_G                               MW(1759:1728)
++#define NVA0C0_QMDV00_06_QMD_SPARE_H                               MW(1791:1760)
++#define NVA0C0_QMDV00_06_QMD_SPARE_I                               MW(1823:1792)
++#define NVA0C0_QMDV00_06_QMD_SPARE_J                               MW(1855:1824)
++#define NVA0C0_QMDV00_06_QMD_SPARE_K                               MW(1887:1856)
++#define NVA0C0_QMDV00_06_QMD_SPARE_L                               MW(1919:1888)
++#define NVA0C0_QMDV00_06_QMD_SPARE_M                               MW(1951:1920)
++#define NVA0C0_QMDV00_06_QMD_SPARE_N                               MW(1983:1952)
++#define NVA0C0_QMDV00_06_DEBUG_ID_UPPER                            MW(2015:1984)
++#define NVA0C0_QMDV00_06_DEBUG_ID_LOWER                            MW(2047:2016)
++
++
++/*
++** Queue Meta Data, Version 01_06
++ */
++
++#define NVA0C0_QMDV01_06_OUTER_PUT                                 MW(30:0)
++#define NVA0C0_QMDV01_06_OUTER_OVERFLOW                            MW(31:31)
++#define NVA0C0_QMDV01_06_OUTER_GET                                 MW(62:32)
++#define NVA0C0_QMDV01_06_OUTER_STICKY_OVERFLOW                     MW(63:63)
++#define NVA0C0_QMDV01_06_INNER_GET                                 MW(94:64)
++#define NVA0C0_QMDV01_06_INNER_OVERFLOW                            MW(95:95)
++#define NVA0C0_QMDV01_06_INNER_PUT                                 MW(126:96)
++#define NVA0C0_QMDV01_06_INNER_STICKY_OVERFLOW                     MW(127:127)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_A_A                          MW(159:128)
++#define NVA0C0_QMDV01_06_SCHEDULER_NEXT_QMD_POINTER                MW(191:160)
++#define NVA0C0_QMDV01_06_QMD_GROUP_ID                              MW(197:192)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_A                            MW(199:198)
++#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE             MW(200:200)
++#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_FALSE       0x00000000
++#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_TRUE        0x00000001
++#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(201:201)
++#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
++#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
++#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS                   MW(204:204)
++#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
++#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
++#define NVA0C0_QMDV01_06_QMD_RESERVED_B                            MW(207:205)
++#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_ADDR                    MW(222:208)
++#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID                   MW(223:223)
++#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_FALSE             0x00000000
++#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_TRUE              0x00000001
++#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_SIZE                       MW(248:224)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_C                            MW(249:249)
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
++#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
++#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
++#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
++#define NVA0C0_QMDV01_06_PROGRAM_OFFSET                            MW(287:256)
++#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
++#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_D                            MW(335:328)
++#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_ID                    MW(357:352)
++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
++#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE                       MW(366:366)
++#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
++#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
++#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE                           MW(369:368)
++#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
++#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
++#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
++#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
++#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
++#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
++#define NVA0C0_QMDV01_06_THROTTLED                                 MW(372:372)
++#define NVA0C0_QMDV01_06_THROTTLED_FALSE                           0x00000000
++#define NVA0C0_QMDV01_06_THROTTLED_TRUE                            0x00000001
++#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR                         MW(376:376)
++#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_LEGACY                  0x00000000
++#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE         0x00000001
++#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR                     MW(377:377)
++#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO           0x00000000
++#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE     0x00000001
++#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT                    MW(378:378)
++#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT__32                0x00000000
++#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
++#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING                MW(379:379)
++#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
++#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
++#define NVA0C0_QMDV01_06_SAMPLER_INDEX                             MW(382:382)
++#define NVA0C0_QMDV01_06_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
++#define NVA0C0_QMDV01_06_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
++#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION                   MW(383:383)
++#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_KEEP_DENORMS      0x00000000
++#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS     0x00000001
++#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH                          MW(415:384)
++#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT                         MW(431:416)
++#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH                          MW(447:432)
++#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH_RESUME                   MW(479:448)
++#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT_RESUME                  MW(495:480)
++#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH_RESUME                   MW(511:496)
++#define NVA0C0_QMDV01_06_LAUNCH_QUOTA                              MW(535:512)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_F                            MW(542:536)
++#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE                       MW(543:543)
++#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_FALSE                 0x00000000
++#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_TRUE                  0x00000001
++#define NVA0C0_QMDV01_06_SHARED_MEMORY_SIZE                        MW(561:544)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_G                            MW(575:562)
++#define NVA0C0_QMDV01_06_QMD_VERSION                               MW(579:576)
++#define NVA0C0_QMDV01_06_QMD_MAJOR_VERSION                         MW(583:580)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_H                            MW(591:584)
++#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION0                     MW(607:592)
++#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION1                     MW(623:608)
++#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION2                     MW(639:624)
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_FALSE               0x00000000
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_TRUE                0x00000001
++#define NVA0C0_QMDV01_06_QMD_RESERVED_I                            MW(668:648)
++#define NVA0C0_QMDV01_06_L1_CONFIGURATION                          MW(671:669)
++#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
++#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
++#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
++#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_LOWER                     MW(703:672)
++#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_UPPER                     MW(735:704)
++#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_LOWER                    MW(767:736)
++#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_UPPER                    MW(775:768)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_J                            MW(783:776)
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP                     MW(790:788)
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
++#define NVA0C0_QMDV01_06_QMD_RESERVED_K                            MW(791:791)
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
++#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVA0C0_QMDV01_06_RELEASE0_PAYLOAD                          MW(831:800)
++#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_LOWER                    MW(863:832)
++#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_UPPER                    MW(871:864)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_L                            MW(879:872)
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP                     MW(886:884)
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
++#define NVA0C0_QMDV01_06_QMD_RESERVED_M                            MW(887:887)
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
++#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVA0C0_QMDV01_06_RELEASE1_PAYLOAD                          MW(927:896)
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((959+(i)*64):(928+(i)*64))
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((967+(i)*64):(960+(i)*64))
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((973+(i)*64):(968+(i)*64))
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE(i)             MW((974+(i)*64):(974+(i)*64))
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_SIZE(i)                   MW((991+(i)*64):(975+(i)*64))
++#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(1463:1440)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_N                            MW(1466:1464)
++#define NVA0C0_QMDV01_06_BARRIER_COUNT                             MW(1471:1467)
++#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(1495:1472)
++#define NVA0C0_QMDV01_06_REGISTER_COUNT                            MW(1503:1496)
++#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1527:1504)
++#define NVA0C0_QMDV01_06_SASS_VERSION                              MW(1535:1528)
++#define NVA0C0_QMDV01_06_HW_ONLY_INNER_GET                         MW(1566:1536)
++#define NVA0C0_QMDV01_06_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1567:1567)
++#define NVA0C0_QMDV01_06_HW_ONLY_INNER_PUT                         MW(1598:1568)
++#define NVA0C0_QMDV01_06_HW_ONLY_SCHEDULE_ON_PUT_UPDATE_ENABLE     MW(1599:1599)
++#define NVA0C0_QMDV01_06_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(1606:1600)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_Q                            MW(1609:1607)
++#define NVA0C0_QMDV01_06_COALESCE_WAITING_PERIOD                   MW(1617:1610)
++#define NVA0C0_QMDV01_06_QMD_RESERVED_R                            MW(1631:1618)
++#define NVA0C0_QMDV01_06_QMD_SPARE_D                               MW(1663:1632)
++#define NVA0C0_QMDV01_06_QMD_SPARE_E                               MW(1695:1664)
++#define NVA0C0_QMDV01_06_QMD_SPARE_F                               MW(1727:1696)
++#define NVA0C0_QMDV01_06_QMD_SPARE_G                               MW(1759:1728)
++#define NVA0C0_QMDV01_06_QMD_SPARE_H                               MW(1791:1760)
++#define NVA0C0_QMDV01_06_QMD_SPARE_I                               MW(1823:1792)
++#define NVA0C0_QMDV01_06_QMD_SPARE_J                               MW(1855:1824)
++#define NVA0C0_QMDV01_06_QMD_SPARE_K                               MW(1887:1856)
++#define NVA0C0_QMDV01_06_QMD_SPARE_L                               MW(1919:1888)
++#define NVA0C0_QMDV01_06_QMD_SPARE_M                               MW(1951:1920)
++#define NVA0C0_QMDV01_06_QMD_SPARE_N                               MW(1983:1952)
++#define NVA0C0_QMDV01_06_DEBUG_ID_UPPER                            MW(2015:1984)
++#define NVA0C0_QMDV01_06_DEBUG_ID_LOWER                            MW(2047:2016)
++
++
++/*
++** Queue Meta Data, Version 01_07
++ */
++
++#define NVA0C0_QMDV01_07_OUTER_PUT                                 MW(30:0)
++#define NVA0C0_QMDV01_07_OUTER_OVERFLOW                            MW(31:31)
++#define NVA0C0_QMDV01_07_OUTER_GET                                 MW(62:32)
++#define NVA0C0_QMDV01_07_OUTER_STICKY_OVERFLOW                     MW(63:63)
++#define NVA0C0_QMDV01_07_INNER_GET                                 MW(94:64)
++#define NVA0C0_QMDV01_07_INNER_OVERFLOW                            MW(95:95)
++#define NVA0C0_QMDV01_07_INNER_PUT                                 MW(126:96)
++#define NVA0C0_QMDV01_07_INNER_STICKY_OVERFLOW                     MW(127:127)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_A_A                          MW(159:128)
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_POINTER                     MW(191:160)
++#define NVA0C0_QMDV01_07_QMD_GROUP_ID                              MW(197:192)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_A                            MW(200:198)
++#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(201:201)
++#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
++#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
++#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS                   MW(204:204)
++#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
++#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(205:205)
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE                        MW(206:206)
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID                   0x00000001
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY                  MW(207:207)
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
++#define NVA0C0_QMDV01_07_QMD_RESERVED_B                            MW(223:208)
++#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE                       MW(248:224)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_C                            MW(249:249)
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
++#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
++#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
++#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
++#define NVA0C0_QMDV01_07_PROGRAM_OFFSET                            MW(287:256)
++#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
++#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_D                            MW(335:328)
++#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID                    MW(357:352)
++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
++#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE                       MW(366:366)
++#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
++#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
++#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE                           MW(369:368)
++#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
++#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
++#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
++#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
++#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
++#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
++#define NVA0C0_QMDV01_07_THROTTLED                                 MW(372:372)
++#define NVA0C0_QMDV01_07_THROTTLED_FALSE                           0x00000000
++#define NVA0C0_QMDV01_07_THROTTLED_TRUE                            0x00000001
++#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR                         MW(376:376)
++#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY                  0x00000000
++#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE         0x00000001
++#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR                     MW(377:377)
++#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO           0x00000000
++#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE     0x00000001
++#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT                    MW(378:378)
++#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32                0x00000000
++#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
++#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING                MW(379:379)
++#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
++#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
++#define NVA0C0_QMDV01_07_SAMPLER_INDEX                             MW(382:382)
++#define NVA0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
++#define NVA0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
++#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION                   MW(383:383)
++#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS      0x00000000
++#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS     0x00000001
++#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH                          MW(415:384)
++#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT                         MW(431:416)
++#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH                          MW(447:432)
++#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME                   MW(479:448)
++#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME                  MW(495:480)
++#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME                   MW(511:496)
++#define NVA0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
++#define NVA0C0_QMDV01_07_COALESCE_WAITING_PERIOD                   MW(529:522)
++#define NVA0C0_QMDV01_07_SHARED_MEMORY_SIZE                        MW(561:544)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_G                            MW(575:562)
++#define NVA0C0_QMDV01_07_QMD_VERSION                               MW(579:576)
++#define NVA0C0_QMDV01_07_QMD_MAJOR_VERSION                         MW(583:580)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_H                            MW(591:584)
++#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION0                     MW(607:592)
++#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION1                     MW(623:608)
++#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION2                     MW(639:624)
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE               0x00000000
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE                0x00000001
++#define NVA0C0_QMDV01_07_QMD_RESERVED_I                            MW(668:648)
++#define NVA0C0_QMDV01_07_L1_CONFIGURATION                          MW(671:669)
++#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
++#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
++#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
++#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_LOWER                     MW(703:672)
++#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_UPPER                     MW(735:704)
++#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER                    MW(767:736)
++#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER                    MW(775:768)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_J                            MW(783:776)
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP                     MW(790:788)
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
++#define NVA0C0_QMDV01_07_QMD_RESERVED_K                            MW(791:791)
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
++#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVA0C0_QMDV01_07_RELEASE0_PAYLOAD                          MW(831:800)
++#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER                    MW(863:832)
++#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER                    MW(871:864)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_L                            MW(879:872)
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP                     MW(886:884)
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
++#define NVA0C0_QMDV01_07_QMD_RESERVED_M                            MW(887:887)
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
++#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVA0C0_QMDV01_07_RELEASE1_PAYLOAD                          MW(927:896)
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((959+(i)*64):(928+(i)*64))
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((967+(i)*64):(960+(i)*64))
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((973+(i)*64):(968+(i)*64))
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i)             MW((974+(i)*64):(974+(i)*64))
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i)                   MW((991+(i)*64):(975+(i)*64))
++#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(1463:1440)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_N                            MW(1466:1464)
++#define NVA0C0_QMDV01_07_BARRIER_COUNT                             MW(1471:1467)
++#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(1495:1472)
++#define NVA0C0_QMDV01_07_REGISTER_COUNT                            MW(1503:1496)
++#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1527:1504)
++#define NVA0C0_QMDV01_07_SASS_VERSION                              MW(1535:1528)
++#define NVA0C0_QMDV01_07_HW_ONLY_INNER_GET                         MW(1566:1536)
++#define NVA0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1567:1567)
++#define NVA0C0_QMDV01_07_HW_ONLY_INNER_PUT                         MW(1598:1568)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_P                            MW(1599:1599)
++#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1629:1600)
++#define NVA0C0_QMDV01_07_QMD_RESERVED_Q                            MW(1630:1630)
++#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1631:1631)
++#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
++#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
++#define NVA0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1663:1632)
++#define NVA0C0_QMDV01_07_QMD_SPARE_E                               MW(1695:1664)
++#define NVA0C0_QMDV01_07_QMD_SPARE_F                               MW(1727:1696)
++#define NVA0C0_QMDV01_07_QMD_SPARE_G                               MW(1759:1728)
++#define NVA0C0_QMDV01_07_QMD_SPARE_H                               MW(1791:1760)
++#define NVA0C0_QMDV01_07_QMD_SPARE_I                               MW(1823:1792)
++#define NVA0C0_QMDV01_07_QMD_SPARE_J                               MW(1855:1824)
++#define NVA0C0_QMDV01_07_QMD_SPARE_K                               MW(1887:1856)
++#define NVA0C0_QMDV01_07_QMD_SPARE_L                               MW(1919:1888)
++#define NVA0C0_QMDV01_07_QMD_SPARE_M                               MW(1951:1920)
++#define NVA0C0_QMDV01_07_QMD_SPARE_N                               MW(1983:1952)
++#define NVA0C0_QMDV01_07_DEBUG_ID_UPPER                            MW(2015:1984)
++#define NVA0C0_QMDV01_07_DEBUG_ID_LOWER                            MW(2047:2016)
++
++
++
++#endif // #ifndef __CLA0C0QMD_H__
+diff --git a/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h
+new file mode 100644
+index 00000000000..040bdcd9dcb
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h
+@@ -0,0 +1,665 @@
++/*******************************************************************************
++    Copyright (c) 2016 NVIDIA Corporation
++
++    Permission is hereby granted, free of charge, to any person obtaining a copy
++    of this software and associated documentation files (the "Software"), to
++    deal in the Software without restriction, including without limitation the
++    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
++    sell copies of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++        The above copyright notice and this permission notice shall be
++        included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++
++*******************************************************************************/
++
++/* AUTO GENERATED FILE -- DO NOT EDIT */
++
++#ifndef __CLC0C0QMD_H__
++#define __CLC0C0QMD_H__
++
++/*
++** Queue Meta Data, Version 01_07
++ */
++
++// The below C preprocessor definitions describe "multi-word" structures, where
++// fields may have bit numbers beyond 32.  For example, MW(127:96) means
++// the field is in bits 0-31 of word number 3 of the structure.  The "MW(X:Y)"
++// syntax is to distinguish from similar "X:Y" single-word definitions: the
++// macros historically used for single-word definitions would fail with
++// multi-word definitions.
++//
++// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
++// interface layer of nvidia.ko for an example of how to manipulate
++// these MW(X:Y) definitions.
++
++#define NVC0C0_QMDV01_07_OUTER_PUT                                 MW(30:0)
++#define NVC0C0_QMDV01_07_OUTER_OVERFLOW                            MW(31:31)
++#define NVC0C0_QMDV01_07_OUTER_GET                                 MW(62:32)
++#define NVC0C0_QMDV01_07_OUTER_STICKY_OVERFLOW                     MW(63:63)
++#define NVC0C0_QMDV01_07_INNER_GET                                 MW(94:64)
++#define NVC0C0_QMDV01_07_INNER_OVERFLOW                            MW(95:95)
++#define NVC0C0_QMDV01_07_INNER_PUT                                 MW(126:96)
++#define NVC0C0_QMDV01_07_INNER_STICKY_OVERFLOW                     MW(127:127)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_A_A                          MW(159:128)
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_POINTER                     MW(191:160)
++#define NVC0C0_QMDV01_07_QMD_GROUP_ID                              MW(197:192)
++#define NVC0C0_QMDV01_07_SM_GLOBAL_CACHING_ENABLE                  MW(198:198)
++#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION               MW(199:199)
++#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
++#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
++#define NVC0C0_QMDV01_07_IS_QUEUE                                  MW(200:200)
++#define NVC0C0_QMDV01_07_IS_QUEUE_FALSE                            0x00000000
++#define NVC0C0_QMDV01_07_IS_QUEUE_TRUE                             0x00000001
++#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(201:201)
++#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
++#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
++#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS                   MW(204:204)
++#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
++#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(205:205)
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE                        MW(206:206)
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID                   0x00000001
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY                  MW(207:207)
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
++#define NVC0C0_QMDV01_07_QMD_RESERVED_B                            MW(223:208)
++#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE                       MW(248:224)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_C                            MW(249:249)
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
++#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
++#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
++#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
++#define NVC0C0_QMDV01_07_PROGRAM_OFFSET                            MW(287:256)
++#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
++#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_D                            MW(335:328)
++#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID                    MW(357:352)
++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
++#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE                       MW(366:366)
++#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
++#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
++#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE                           MW(369:368)
++#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
++#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
++#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
++#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
++#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
++#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
++#define NVC0C0_QMDV01_07_THROTTLED                                 MW(372:372)
++#define NVC0C0_QMDV01_07_THROTTLED_FALSE                           0x00000000
++#define NVC0C0_QMDV01_07_THROTTLED_TRUE                            0x00000001
++#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR                         MW(376:376)
++#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY                  0x00000000
++#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE         0x00000001
++#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR                     MW(377:377)
++#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO           0x00000000
++#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE     0x00000001
++#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT                    MW(378:378)
++#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32                0x00000000
++#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
++#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING                MW(379:379)
++#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
++#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
++#define NVC0C0_QMDV01_07_SAMPLER_INDEX                             MW(382:382)
++#define NVC0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
++#define NVC0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
++#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION                   MW(383:383)
++#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS      0x00000000
++#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS     0x00000001
++#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH                          MW(415:384)
++#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT                         MW(431:416)
++#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH                          MW(447:432)
++#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME                   MW(479:448)
++#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME                  MW(495:480)
++#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME                   MW(511:496)
++#define NVC0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
++#define NVC0C0_QMDV01_07_COALESCE_WAITING_PERIOD                   MW(529:522)
++#define NVC0C0_QMDV01_07_SHARED_MEMORY_SIZE                        MW(561:544)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_G                            MW(575:562)
++#define NVC0C0_QMDV01_07_QMD_VERSION                               MW(579:576)
++#define NVC0C0_QMDV01_07_QMD_MAJOR_VERSION                         MW(583:580)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_H                            MW(591:584)
++#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION0                     MW(607:592)
++#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION1                     MW(623:608)
++#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION2                     MW(639:624)
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE               0x00000000
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE                0x00000001
++#define NVC0C0_QMDV01_07_QMD_RESERVED_I                            MW(668:648)
++#define NVC0C0_QMDV01_07_L1_CONFIGURATION                          MW(671:669)
++#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
++#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
++#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
++#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_LOWER                     MW(703:672)
++#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_UPPER                     MW(735:704)
++#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER                    MW(767:736)
++#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER                    MW(775:768)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_J                            MW(783:776)
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP                     MW(790:788)
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
++#define NVC0C0_QMDV01_07_QMD_RESERVED_K                            MW(791:791)
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
++#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVC0C0_QMDV01_07_RELEASE0_PAYLOAD                          MW(831:800)
++#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER                    MW(863:832)
++#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER                    MW(871:864)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_L                            MW(879:872)
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP                     MW(886:884)
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
++#define NVC0C0_QMDV01_07_QMD_RESERVED_M                            MW(887:887)
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
++#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVC0C0_QMDV01_07_RELEASE1_PAYLOAD                          MW(927:896)
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((959+(i)*64):(928+(i)*64))
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((967+(i)*64):(960+(i)*64))
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((973+(i)*64):(968+(i)*64))
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i)             MW((974+(i)*64):(974+(i)*64))
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i)                   MW((991+(i)*64):(975+(i)*64))
++#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(1463:1440)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_N                            MW(1466:1464)
++#define NVC0C0_QMDV01_07_BARRIER_COUNT                             MW(1471:1467)
++#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(1495:1472)
++#define NVC0C0_QMDV01_07_REGISTER_COUNT                            MW(1503:1496)
++#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1527:1504)
++#define NVC0C0_QMDV01_07_SASS_VERSION                              MW(1535:1528)
++#define NVC0C0_QMDV01_07_HW_ONLY_INNER_GET                         MW(1566:1536)
++#define NVC0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1567:1567)
++#define NVC0C0_QMDV01_07_HW_ONLY_INNER_PUT                         MW(1598:1568)
++#define NVC0C0_QMDV01_07_HW_ONLY_SCG_TYPE                          MW(1599:1599)
++#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1629:1600)
++#define NVC0C0_QMDV01_07_QMD_RESERVED_Q                            MW(1630:1630)
++#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1631:1631)
++#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
++#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
++#define NVC0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1663:1632)
++#define NVC0C0_QMDV01_07_QMD_SPARE_E                               MW(1695:1664)
++#define NVC0C0_QMDV01_07_QMD_SPARE_F                               MW(1727:1696)
++#define NVC0C0_QMDV01_07_QMD_SPARE_G                               MW(1759:1728)
++#define NVC0C0_QMDV01_07_QMD_SPARE_H                               MW(1791:1760)
++#define NVC0C0_QMDV01_07_QMD_SPARE_I                               MW(1823:1792)
++#define NVC0C0_QMDV01_07_QMD_SPARE_J                               MW(1855:1824)
++#define NVC0C0_QMDV01_07_QMD_SPARE_K                               MW(1887:1856)
++#define NVC0C0_QMDV01_07_QMD_SPARE_L                               MW(1919:1888)
++#define NVC0C0_QMDV01_07_QMD_SPARE_M                               MW(1951:1920)
++#define NVC0C0_QMDV01_07_QMD_SPARE_N                               MW(1983:1952)
++#define NVC0C0_QMDV01_07_DEBUG_ID_UPPER                            MW(2015:1984)
++#define NVC0C0_QMDV01_07_DEBUG_ID_LOWER                            MW(2047:2016)
++
++
++/*
++** Queue Meta Data, Version 02_00
++ */
++
++#define NVC0C0_QMDV02_00_OUTER_PUT                                 MW(30:0)
++#define NVC0C0_QMDV02_00_OUTER_OVERFLOW                            MW(31:31)
++#define NVC0C0_QMDV02_00_OUTER_GET                                 MW(62:32)
++#define NVC0C0_QMDV02_00_OUTER_STICKY_OVERFLOW                     MW(63:63)
++#define NVC0C0_QMDV02_00_INNER_GET                                 MW(94:64)
++#define NVC0C0_QMDV02_00_INNER_OVERFLOW                            MW(95:95)
++#define NVC0C0_QMDV02_00_INNER_PUT                                 MW(126:96)
++#define NVC0C0_QMDV02_00_INNER_STICKY_OVERFLOW                     MW(127:127)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_A_A                          MW(159:128)
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_POINTER                     MW(191:160)
++#define NVC0C0_QMDV02_00_QMD_GROUP_ID                              MW(197:192)
++#define NVC0C0_QMDV02_00_SM_GLOBAL_CACHING_ENABLE                  MW(198:198)
++#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION               MW(199:199)
++#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
++#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
++#define NVC0C0_QMDV02_00_IS_QUEUE                                  MW(200:200)
++#define NVC0C0_QMDV02_00_IS_QUEUE_FALSE                            0x00000000
++#define NVC0C0_QMDV02_00_IS_QUEUE_TRUE                             0x00000001
++#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(201:201)
++#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
++#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
++#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS                   MW(204:204)
++#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
++#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(205:205)
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE                        MW(206:206)
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_GRID                   0x00000001
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY                  MW(207:207)
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
++#define NVC0C0_QMDV02_00_QMD_RESERVED_B                            MW(223:208)
++#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_SIZE                       MW(248:224)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_C                            MW(249:249)
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
++#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
++#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
++#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
++#define NVC0C0_QMDV02_00_PROGRAM_OFFSET                            MW(287:256)
++#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
++#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_D                            MW(335:328)
++#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_ID                    MW(357:352)
++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
++#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE                       MW(366:366)
++#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
++#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
++#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE                           MW(369:368)
++#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
++#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
++#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
++#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
++#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
++#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
++#define NVC0C0_QMDV02_00_THROTTLED                                 MW(372:372)
++#define NVC0C0_QMDV02_00_THROTTLED_FALSE                           0x00000000
++#define NVC0C0_QMDV02_00_THROTTLED_TRUE                            0x00000001
++#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT                    MW(378:378)
++#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT__32                0x00000000
++#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
++#define NVC0C0_QMDV02_00_SAMPLER_INDEX                             MW(382:382)
++#define NVC0C0_QMDV02_00_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
++#define NVC0C0_QMDV02_00_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
++#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH                          MW(415:384)
++#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT                         MW(431:416)
++#define NVC0C0_QMDV02_00_QMD_RESERVED13A                           MW(447:432)
++#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH                          MW(463:448)
++#define NVC0C0_QMDV02_00_QMD_RESERVED14A                           MW(479:464)
++#define NVC0C0_QMDV02_00_QMD_RESERVED15A                           MW(511:480)
++#define NVC0C0_QMDV02_00_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
++#define NVC0C0_QMDV02_00_COALESCE_WAITING_PERIOD                   MW(529:522)
++#define NVC0C0_QMDV02_00_SHARED_MEMORY_SIZE                        MW(561:544)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_G                            MW(575:562)
++#define NVC0C0_QMDV02_00_QMD_VERSION                               MW(579:576)
++#define NVC0C0_QMDV02_00_QMD_MAJOR_VERSION                         MW(583:580)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_H                            MW(591:584)
++#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION0                     MW(607:592)
++#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION1                     MW(623:608)
++#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION2                     MW(639:624)
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_FALSE               0x00000000
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_TRUE                0x00000001
++#define NVC0C0_QMDV02_00_QMD_RESERVED_I                            MW(671:648)
++#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_LOWER                     MW(703:672)
++#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_UPPER                     MW(735:704)
++#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_LOWER                    MW(767:736)
++#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_UPPER                    MW(775:768)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_J                            MW(783:776)
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP                     MW(790:788)
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
++#define NVC0C0_QMDV02_00_QMD_RESERVED_K                            MW(791:791)
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
++#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVC0C0_QMDV02_00_RELEASE0_PAYLOAD                          MW(831:800)
++#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_LOWER                    MW(863:832)
++#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_UPPER                    MW(871:864)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_L                            MW(879:872)
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP                     MW(886:884)
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
++#define NVC0C0_QMDV02_00_QMD_RESERVED_M                            MW(887:887)
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
++#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVC0C0_QMDV02_00_RELEASE1_PAYLOAD                          MW(927:896)
++#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(951:928)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_N                            MW(954:952)
++#define NVC0C0_QMDV02_00_BARRIER_COUNT                             MW(959:955)
++#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(983:960)
++#define NVC0C0_QMDV02_00_REGISTER_COUNT                            MW(991:984)
++#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1015:992)
++#define NVC0C0_QMDV02_00_SASS_VERSION                              MW(1023:1016)
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((1055+(i)*64):(1024+(i)*64))
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((1072+(i)*64):(1056+(i)*64))
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((1073+(i)*64):(1073+(i)*64))
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE(i)             MW((1074+(i)*64):(1074+(i)*64))
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_SIZE_SHIFTED4(i)          MW((1087+(i)*64):(1075+(i)*64))
++#define NVC0C0_QMDV02_00_HW_ONLY_INNER_GET                         MW(1566:1536)
++#define NVC0C0_QMDV02_00_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1567:1567)
++#define NVC0C0_QMDV02_00_HW_ONLY_INNER_PUT                         MW(1598:1568)
++#define NVC0C0_QMDV02_00_HW_ONLY_SCG_TYPE                          MW(1599:1599)
++#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1629:1600)
++#define NVC0C0_QMDV02_00_QMD_RESERVED_Q                            MW(1630:1630)
++#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1631:1631)
++#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
++#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
++#define NVC0C0_QMDV02_00_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1663:1632)
++#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH_RESUME                   MW(1695:1664)
++#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT_RESUME                  MW(1711:1696)
++#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH_RESUME                   MW(1727:1712)
++#define NVC0C0_QMDV02_00_QMD_SPARE_G                               MW(1759:1728)
++#define NVC0C0_QMDV02_00_QMD_SPARE_H                               MW(1791:1760)
++#define NVC0C0_QMDV02_00_QMD_SPARE_I                               MW(1823:1792)
++#define NVC0C0_QMDV02_00_QMD_SPARE_J                               MW(1855:1824)
++#define NVC0C0_QMDV02_00_QMD_SPARE_K                               MW(1887:1856)
++#define NVC0C0_QMDV02_00_QMD_SPARE_L                               MW(1919:1888)
++#define NVC0C0_QMDV02_00_QMD_SPARE_M                               MW(1951:1920)
++#define NVC0C0_QMDV02_00_QMD_SPARE_N                               MW(1983:1952)
++#define NVC0C0_QMDV02_00_DEBUG_ID_UPPER                            MW(2015:1984)
++#define NVC0C0_QMDV02_00_DEBUG_ID_LOWER                            MW(2047:2016)
++
++
++/*
++** Queue Meta Data, Version 02_01
++ */
++
++#define NVC0C0_QMDV02_01_OUTER_PUT                                 MW(30:0)
++#define NVC0C0_QMDV02_01_OUTER_OVERFLOW                            MW(31:31)
++#define NVC0C0_QMDV02_01_OUTER_GET                                 MW(62:32)
++#define NVC0C0_QMDV02_01_OUTER_STICKY_OVERFLOW                     MW(63:63)
++#define NVC0C0_QMDV02_01_INNER_GET                                 MW(94:64)
++#define NVC0C0_QMDV02_01_INNER_OVERFLOW                            MW(95:95)
++#define NVC0C0_QMDV02_01_INNER_PUT                                 MW(126:96)
++#define NVC0C0_QMDV02_01_INNER_STICKY_OVERFLOW                     MW(127:127)
++#define NVC0C0_QMDV02_01_QMD_GROUP_ID                              MW(133:128)
++#define NVC0C0_QMDV02_01_SM_GLOBAL_CACHING_ENABLE                  MW(134:134)
++#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION               MW(135:135)
++#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
++#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
++#define NVC0C0_QMDV02_01_IS_QUEUE                                  MW(136:136)
++#define NVC0C0_QMDV02_01_IS_QUEUE_FALSE                            0x00000000
++#define NVC0C0_QMDV02_01_IS_QUEUE_TRUE                             0x00000001
++#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(137:137)
++#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
++#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0                 MW(138:138)
++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1                 MW(139:139)
++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
++#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS                   MW(140:140)
++#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
++#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(141:141)
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE                        MW(142:142)
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_GRID                   0x00000001
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY                  MW(143:143)
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
++#define NVC0C0_QMDV02_01_QMD_RESERVED_B                            MW(159:144)
++#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_SIZE                       MW(184:160)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_C                            MW(185:185)
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE           MW(186:186)
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(187:187)
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE             MW(188:188)
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE              MW(189:189)
++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
++#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE              MW(190:190)
++#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
++#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE          MW(191:191)
++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
++#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH_RESUME                   MW(223:192)
++#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT_RESUME                  MW(239:224)
++#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH_RESUME                   MW(255:240)
++#define NVC0C0_QMDV02_01_PROGRAM_OFFSET                            MW(287:256)
++#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
++#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_D                            MW(335:328)
++#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_ID                    MW(357:352)
++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
++#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE                       MW(366:366)
++#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
++#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
++#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE                           MW(369:368)
++#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
++#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
++#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
++#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
++#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
++#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
++#define NVC0C0_QMDV02_01_THROTTLED                                 MW(372:372)
++#define NVC0C0_QMDV02_01_THROTTLED_FALSE                           0x00000000
++#define NVC0C0_QMDV02_01_THROTTLED_TRUE                            0x00000001
++#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT                    MW(378:378)
++#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT__32                0x00000000
++#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
++#define NVC0C0_QMDV02_01_SAMPLER_INDEX                             MW(382:382)
++#define NVC0C0_QMDV02_01_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
++#define NVC0C0_QMDV02_01_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
++#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH                          MW(415:384)
++#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT                         MW(431:416)
++#define NVC0C0_QMDV02_01_QMD_RESERVED13A                           MW(447:432)
++#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH                          MW(463:448)
++#define NVC0C0_QMDV02_01_QMD_RESERVED14A                           MW(479:464)
++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_POINTER                     MW(511:480)
++#define NVC0C0_QMDV02_01_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
++#define NVC0C0_QMDV02_01_COALESCE_WAITING_PERIOD                   MW(529:522)
++#define NVC0C0_QMDV02_01_SHARED_MEMORY_SIZE                        MW(561:544)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_G                            MW(575:562)
++#define NVC0C0_QMDV02_01_QMD_VERSION                               MW(579:576)
++#define NVC0C0_QMDV02_01_QMD_MAJOR_VERSION                         MW(583:580)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_H                            MW(591:584)
++#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION0                     MW(607:592)
++#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION1                     MW(623:608)
++#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION2                     MW(639:624)
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_FALSE               0x00000000
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_TRUE                0x00000001
++#define NVC0C0_QMDV02_01_QMD_RESERVED_I                            MW(671:648)
++#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_LOWER                     MW(703:672)
++#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_UPPER                     MW(735:704)
++#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_LOWER                    MW(767:736)
++#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_UPPER                    MW(775:768)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_J                            MW(783:776)
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP                     MW(790:788)
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
++#define NVC0C0_QMDV02_01_QMD_RESERVED_K                            MW(791:791)
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
++#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVC0C0_QMDV02_01_RELEASE0_PAYLOAD                          MW(831:800)
++#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_LOWER                    MW(863:832)
++#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_UPPER                    MW(871:864)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_L                            MW(879:872)
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP                     MW(886:884)
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
++#define NVC0C0_QMDV02_01_QMD_RESERVED_M                            MW(887:887)
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
++#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVC0C0_QMDV02_01_RELEASE1_PAYLOAD                          MW(927:896)
++#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(951:928)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_N                            MW(954:952)
++#define NVC0C0_QMDV02_01_BARRIER_COUNT                             MW(959:955)
++#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(983:960)
++#define NVC0C0_QMDV02_01_REGISTER_COUNT                            MW(991:984)
++#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1015:992)
++#define NVC0C0_QMDV02_01_SASS_VERSION                              MW(1023:1016)
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((1055+(i)*64):(1024+(i)*64))
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((1072+(i)*64):(1056+(i)*64))
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((1073+(i)*64):(1073+(i)*64))
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE(i)             MW((1074+(i)*64):(1074+(i)*64))
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_SIZE_SHIFTED4(i)          MW((1087+(i)*64):(1075+(i)*64))
++#define NVC0C0_QMDV02_01_QMD_RESERVED_R                            MW(1567:1536)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_S                            MW(1599:1568)
++#define NVC0C0_QMDV02_01_HW_ONLY_INNER_GET                         MW(1630:1600)
++#define NVC0C0_QMDV02_01_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1631:1631)
++#define NVC0C0_QMDV02_01_HW_ONLY_INNER_PUT                         MW(1662:1632)
++#define NVC0C0_QMDV02_01_HW_ONLY_SCG_TYPE                          MW(1663:1663)
++#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1693:1664)
++#define NVC0C0_QMDV02_01_QMD_RESERVED_Q                            MW(1694:1694)
++#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1695:1695)
++#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
++#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
++#define NVC0C0_QMDV02_01_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1727:1696)
++#define NVC0C0_QMDV02_01_QMD_SPARE_G                               MW(1759:1728)
++#define NVC0C0_QMDV02_01_QMD_SPARE_H                               MW(1791:1760)
++#define NVC0C0_QMDV02_01_QMD_SPARE_I                               MW(1823:1792)
++#define NVC0C0_QMDV02_01_QMD_SPARE_J                               MW(1855:1824)
++#define NVC0C0_QMDV02_01_QMD_SPARE_K                               MW(1887:1856)
++#define NVC0C0_QMDV02_01_QMD_SPARE_L                               MW(1919:1888)
++#define NVC0C0_QMDV02_01_QMD_SPARE_M                               MW(1951:1920)
++#define NVC0C0_QMDV02_01_QMD_SPARE_N                               MW(1983:1952)
++#define NVC0C0_QMDV02_01_DEBUG_ID_UPPER                            MW(2015:1984)
++#define NVC0C0_QMDV02_01_DEBUG_ID_LOWER                            MW(2047:2016)
++
++
++
++#endif // #ifndef __CLC0C0QMD_H__
+diff --git a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
+new file mode 100644
+index 00000000000..588cc639d32
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
+@@ -0,0 +1,245 @@
++/*******************************************************************************
++    Copyright (c) 2001-2010 NVIDIA Corporation
++
++    Permission is hereby granted, free of charge, to any person obtaining a copy
++    of this software and associated documentation files (the "Software"), to
++    deal in the Software without restriction, including without limitation the
++    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
++    sell copies of the Software, and to permit persons to whom the Software is
++    furnished to do so, subject to the following conditions:
++
++    The above copyright notice and this permission notice shall be
++    included in all copies or substantial portions of the Software.
++
++    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++    DEALINGS IN THE SOFTWARE.
++
++*******************************************************************************/
++
++/* AUTO GENERATED FILE -- DO NOT EDIT */
++
++#ifndef __CLC3C0QMD_H__
++#define __CLC3C0QMD_H__
++
++/*
++** Queue Meta Data, Version 02_02
++ */
++
++// The below C preprocessor definitions describe "multi-word" structures, where
++// fields may have bit numbers beyond 32.  For example, MW(127:96) means
++// the field is in bits 0-31 of word number 3 of the structure.  The "MW(X:Y)"
++// syntax is to distinguish from similar "X:Y" single-word definitions: the
++// macros historically used for single-word definitions would fail with
++// multi-word definitions.
++//
++// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
++// interface layer of nvidia.ko for an example of how to manipulate
++// these MW(X:Y) definitions.
++
++#define NVC3C0_QMDV02_02_OUTER_PUT                                 MW(30:0)
++#define NVC3C0_QMDV02_02_OUTER_OVERFLOW                            MW(31:31)
++#define NVC3C0_QMDV02_02_OUTER_GET                                 MW(62:32)
++#define NVC3C0_QMDV02_02_OUTER_STICKY_OVERFLOW                     MW(63:63)
++#define NVC3C0_QMDV02_02_INNER_GET                                 MW(94:64)
++#define NVC3C0_QMDV02_02_INNER_OVERFLOW                            MW(95:95)
++#define NVC3C0_QMDV02_02_INNER_PUT                                 MW(126:96)
++#define NVC3C0_QMDV02_02_INNER_STICKY_OVERFLOW                     MW(127:127)
++#define NVC3C0_QMDV02_02_QMD_GROUP_ID                              MW(133:128)
++#define NVC3C0_QMDV02_02_SM_GLOBAL_CACHING_ENABLE                  MW(134:134)
++#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION               MW(135:135)
++#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
++#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
++#define NVC3C0_QMDV02_02_IS_QUEUE                                  MW(136:136)
++#define NVC3C0_QMDV02_02_IS_QUEUE_FALSE                            0x00000000
++#define NVC3C0_QMDV02_02_IS_QUEUE_TRUE                             0x00000001
++#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(137:137)
++#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
++#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0                 MW(138:138)
++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1                 MW(139:139)
++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
++#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS                   MW(140:140)
++#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
++#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(141:141)
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE                        MW(142:142)
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_GRID                   0x00000001
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY                  MW(143:143)
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
++#define NVC3C0_QMDV02_02_QMD_RESERVED_B                            MW(159:144)
++#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_SIZE                       MW(184:160)
++#define NVC3C0_QMDV02_02_QMD_RESERVED_C                            MW(185:185)
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE           MW(186:186)
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(187:187)
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE             MW(188:188)
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE              MW(189:189)
++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
++#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE              MW(190:190)
++#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
++#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE          MW(191:191)
++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
++#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH_RESUME                   MW(223:192)
++#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT_RESUME                  MW(239:224)
++#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH_RESUME                   MW(255:240)
++#define NVC3C0_QMDV02_02_PROGRAM_OFFSET                            MW(287:256)
++#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
++#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
++#define NVC3C0_QMDV02_02_QMD_RESERVED_D                            MW(335:328)
++#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_ID                    MW(357:352)
++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
++#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE                       MW(366:366)
++#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
++#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
++#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE                           MW(369:368)
++#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
++#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
++#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
++#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
++#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
++#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
++#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT                    MW(378:378)
++#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT__32                0x00000000
++#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
++#define NVC3C0_QMDV02_02_SAMPLER_INDEX                             MW(382:382)
++#define NVC3C0_QMDV02_02_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
++#define NVC3C0_QMDV02_02_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
++#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH                          MW(415:384)
++#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT                         MW(431:416)
++#define NVC3C0_QMDV02_02_QMD_RESERVED13A                           MW(447:432)
++#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH                          MW(463:448)
++#define NVC3C0_QMDV02_02_QMD_RESERVED14A                           MW(479:464)
++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_POINTER                     MW(511:480)
++#define NVC3C0_QMDV02_02_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
++#define NVC3C0_QMDV02_02_COALESCE_WAITING_PERIOD                   MW(529:522)
++#define NVC3C0_QMDV02_02_SHARED_MEMORY_SIZE                        MW(561:544)
++#define NVC3C0_QMDV02_02_MIN_SM_CONFIG_SHARED_MEM_SIZE             MW(568:562)
++#define NVC3C0_QMDV02_02_MAX_SM_CONFIG_SHARED_MEM_SIZE             MW(575:569)
++#define NVC3C0_QMDV02_02_QMD_VERSION                               MW(579:576)
++#define NVC3C0_QMDV02_02_QMD_MAJOR_VERSION                         MW(583:580)
++#define NVC3C0_QMDV02_02_QMD_RESERVED_H                            MW(591:584)
++#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION0                     MW(607:592)
++#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION1                     MW(623:608)
++#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION2                     MW(639:624)
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_FALSE               0x00000000
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_TRUE                0x00000001
++#define NVC3C0_QMDV02_02_REGISTER_COUNT_V                          MW(656:648)
++#define NVC3C0_QMDV02_02_TARGET_SM_CONFIG_SHARED_MEM_SIZE          MW(663:657)
++#define NVC3C0_QMDV02_02_FREE_CTA_SLOTS_EMPTY_SM                   MW(671:664)
++#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_LOWER                     MW(703:672)
++#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_UPPER                     MW(735:704)
++#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_LOWER                    MW(767:736)
++#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_UPPER                    MW(775:768)
++#define NVC3C0_QMDV02_02_QMD_RESERVED_J                            MW(783:776)
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP                     MW(790:788)
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
++#define NVC3C0_QMDV02_02_QMD_RESERVED_K                            MW(791:791)
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
++#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVC3C0_QMDV02_02_RELEASE0_PAYLOAD                          MW(831:800)
++#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_LOWER                    MW(863:832)
++#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_UPPER                    MW(871:864)
++#define NVC3C0_QMDV02_02_QMD_RESERVED_L                            MW(879:872)
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP                     MW(886:884)
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
++#define NVC3C0_QMDV02_02_QMD_RESERVED_M                            MW(887:887)
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
++#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
++#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
++#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
++#define NVC3C0_QMDV02_02_RELEASE1_PAYLOAD                          MW(927:896)
++#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(951:928)
++#define NVC3C0_QMDV02_02_QMD_RESERVED_N                            MW(954:952)
++#define NVC3C0_QMDV02_02_BARRIER_COUNT                             MW(959:955)
++#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(983:960)
++#define NVC3C0_QMDV02_02_REGISTER_COUNT                            MW(991:984)
++#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1015:992)
++#define NVC3C0_QMDV02_02_SASS_VERSION                              MW(1023:1016)
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((1055+(i)*64):(1024+(i)*64))
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((1072+(i)*64):(1056+(i)*64))
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((1073+(i)*64):(1073+(i)*64))
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE(i)             MW((1074+(i)*64):(1074+(i)*64))
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_SIZE_SHIFTED4(i)          MW((1087+(i)*64):(1075+(i)*64))
++#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_LOWER                     MW(1567:1536)
++#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_UPPER                     MW(1584:1568)
++#define NVC3C0_QMDV02_02_QMD_RESERVED_S                            MW(1599:1585)
++#define NVC3C0_QMDV02_02_HW_ONLY_INNER_GET                         MW(1630:1600)
++#define NVC3C0_QMDV02_02_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1631:1631)
++#define NVC3C0_QMDV02_02_HW_ONLY_INNER_PUT                         MW(1662:1632)
++#define NVC3C0_QMDV02_02_HW_ONLY_SCG_TYPE                          MW(1663:1663)
++#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1693:1664)
++#define NVC3C0_QMDV02_02_QMD_RESERVED_Q                            MW(1694:1694)
++#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1695:1695)
++#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
++#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
++#define NVC3C0_QMDV02_02_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1727:1696)
++#define NVC3C0_QMDV02_02_QMD_SPARE_G                               MW(1759:1728)
++#define NVC3C0_QMDV02_02_QMD_SPARE_H                               MW(1791:1760)
++#define NVC3C0_QMDV02_02_QMD_SPARE_I                               MW(1823:1792)
++#define NVC3C0_QMDV02_02_QMD_SPARE_J                               MW(1855:1824)
++#define NVC3C0_QMDV02_02_QMD_SPARE_K                               MW(1887:1856)
++#define NVC3C0_QMDV02_02_QMD_SPARE_L                               MW(1919:1888)
++#define NVC3C0_QMDV02_02_QMD_SPARE_M                               MW(1951:1920)
++#define NVC3C0_QMDV02_02_QMD_SPARE_N                               MW(1983:1952)
++#define NVC3C0_QMDV02_02_DEBUG_ID_UPPER                            MW(2015:1984)
++#define NVC3C0_QMDV02_02_DEBUG_ID_LOWER                            MW(2047:2016)
++
++
++
++#endif // #ifndef __CLC3C0QMD_H__
+diff --git a/src/gallium/drivers/nouveau/nvc0/drf.h b/src/gallium/drivers/nouveau/nvc0/drf.h
+new file mode 100644
+index 00000000000..bf95c8c3185
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/drf.h
+@@ -0,0 +1,119 @@
++/*
++ * Copyright 2019 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#ifndef __NVHW_DRF_H__
++#define __NVHW_DRF_H__
++
++/* Helpers common to all DRF accessors. */
++#define DRF_LO(drf)    (0 ? drf)
++#define DRF_HI(drf)    (1 ? drf)
++#define DRF_BITS(drf)  (DRF_HI(drf) - DRF_LO(drf) + 1)
++#define DRF_MASK(drf)  (~0ULL >> (64 - DRF_BITS(drf)))
++#define DRF_SMASK(drf) (DRF_MASK(drf) << DRF_LO(drf))
++
++/* Helpers for DRF-MW accessors. */
++#define DRF_MX_MW(drf)      drf
++#define DRF_MX(drf)         DRF_MX_##drf
++#define DRF_MW(drf)         DRF_MX(drf)
++#define DRF_MW_SPANS(o,drf) (DRF_LW_IDX((o),drf) != DRF_HW_IDX((o),drf))
++#define DRF_MW_SIZE(o)      (sizeof((o)[0]) * 8)
++
++#define DRF_LW_IDX(o,drf)   (DRF_LO(DRF_MW(drf)) / DRF_MW_SIZE(o))
++#define DRF_LW_LO(o,drf)    (DRF_LO(DRF_MW(drf)) % DRF_MW_SIZE(o))
++#define DRF_LW_HI(o,drf)    (DRF_MW_SPANS((o),drf) ? (DRF_MW_SIZE(o) - 1) : DRF_HW_HI((o),drf))
++#define DRF_LW_BITS(o,drf)  (DRF_LW_HI((o),drf) - DRF_LW_LO((o),drf) + 1)
++#define DRF_LW_MASK(o,drf)  (~0ULL >> (64 - DRF_LW_BITS((o),drf)))
++#define DRF_LW_SMASK(o,drf) (DRF_LW_MASK((o),drf) << DRF_LW_LO((o),drf))
++#define DRF_LW_GET(o,drf)   (((o)[DRF_LW_IDX((o),drf)] >> DRF_LW_LO((o),drf)) & DRF_LW_MASK((o),drf))
++#define DRF_LW_VAL(o,drf,v) (((v) & DRF_LW_MASK((o),drf)) << DRF_LW_LO((o),drf))
++#define DRF_LW_CLR(o,drf)   ((o)[DRF_LW_IDX((o),drf)] & ~DRF_LW_SMASK((o),drf))
++#define DRF_LW_SET(o,drf,v) (DRF_LW_CLR((o),drf) | DRF_LW_VAL((o),drf,(v)))
++
++#define DRF_HW_IDX(o,drf)   (DRF_HI(DRF_MW(drf)) / DRF_MW_SIZE(o))
++#define DRF_HW_LO(o,drf)    0
++#define DRF_HW_HI(o,drf)    (DRF_HI(DRF_MW(drf)) % DRF_MW_SIZE(o))
++#define DRF_HW_BITS(o,drf)  (DRF_HW_HI((o),drf) - DRF_HW_LO((o),drf) + 1)
++#define DRF_HW_MASK(o,drf)  (~0ULL >> (64 - DRF_HW_BITS((o),drf)))
++#define DRF_HW_SMASK(o,drf) (DRF_HW_MASK((o),drf) << DRF_HW_LO((o),drf))
++#define DRF_HW_GET(o,drf)   ((o)[DRF_HW_IDX(o,drf)] & DRF_HW_SMASK((o),drf))
++#define DRF_HW_VAL(o,drf,v) (((long long)(v) >> DRF_LW_BITS((o),drf)) & DRF_HW_SMASK((o),drf))
++#define DRF_HW_CLR(o,drf)   ((o)[DRF_HW_IDX((o),drf)] & ~DRF_HW_SMASK((o),drf))
++#define DRF_HW_SET(o,drf,v) (DRF_HW_CLR((o),drf) | DRF_HW_VAL((o),drf,(v)))
++
++/* DRF accessors. */
++#define NVVAL_X(drf,v) (((v) & DRF_MASK(drf)) << DRF_LO(drf))
++#define NVVAL_N(X,d,r,f,  v) NVVAL_X(d##_##r##_##f, (v))
++#define NVVAL_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), (v))
++#define NVVAL_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
++#define NVVAL(A...) NVVAL_(X, ##A, NVVAL_I, NVVAL_N)(X, ##A)
++
++#define NVDEF_N(X,d,r,f,  v) NVVAL_X(d##_##r##_##f, d##_##r##_##f##_##v)
++#define NVDEF_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), d##_##r##_##f##_##v)
++#define NVDEF_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
++#define NVDEF(A...) NVDEF_(X, ##A, NVDEF_I, NVDEF_N)(X, ##A)
++
++#define NVVAL_GET_X(o,drf) (((o) >> DRF_LO(drf)) & DRF_MASK(drf))
++#define NVVAL_GET_N(X,o,d,r,f  ) NVVAL_GET_X(o, d##_##r##_##f)
++#define NVVAL_GET_I(X,o,d,r,f,i) NVVAL_GET_X(o, d##_##r##_##f(i))
++#define NVVAL_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
++#define NVVAL_GET(A...) NVVAL_GET_(X, ##A, NVVAL_GET_I, NVVAL_GET_N)(X, ##A)
++
++#define NVVAL_SET_X(o,drf,v) (((o) & ~DRF_SMASK(drf)) | NVVAL_X(drf, (v)))
++#define NVVAL_SET_N(X,o,d,r,f,  v) NVVAL_SET_X(o, d##_##r##_##f, (v))
++#define NVVAL_SET_I(X,o,d,r,f,i,v) NVVAL_SET_X(o, d##_##r##_##f(i), (v))
++#define NVVAL_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
++#define NVVAL_SET(A...) NVVAL_SET_(X, ##A, NVVAL_SET_I, NVVAL_SET_N)(X, ##A)
++
++#define NVDEF_SET_N(X,o,d,r,f,  v)                                             \
++	NVVAL_SET_X(o, d##_##r##_##f,    d##_##r##_##f##_##v)
++#define NVDEF_SET_I(X,o,d,r,f,i,v)                                             \
++	NVVAL_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v)
++#define NVDEF_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
++#define NVDEF_SET(A...) NVDEF_SET_(X, ##A, NVDEF_SET_I, NVDEF_SET_N)(X, ##A)
++
++/* DRF-MW accessors. */
++#define NVVAL_MW_GET_X(o,drf)                                                  \
++	((DRF_MW_SPANS((o),drf) ?                                              \
++	  (DRF_HW_GET((o),drf) << DRF_LW_BITS((o),drf)) : 0) | DRF_LW_GET((o),drf))
++#define NVVAL_MW_GET_N(X,o,d,r,f  ) NVVAL_MW_GET_X((o), d##_##r##_##f)
++#define NVVAL_MW_GET_I(X,o,d,r,f,i) NVVAL_MW_GET_X((o), d##_##r##_##f(i))
++#define NVVAL_MW_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
++#define NVVAL_MW_GET(A...) NVVAL_MW_GET_(X, ##A, NVVAL_MW_GET_I, NVVAL_MW_GET_N)(X, ##A)
++
++#define NVVAL_MW_SET_X(o,drf,v) do {                                           \
++	(o)[DRF_LW_IDX((o),drf)] = DRF_LW_SET((o),drf,(v));                    \
++	if (DRF_MW_SPANS((o),drf))                                             \
++		(o)[DRF_HW_IDX((o),drf)] = DRF_HW_SET((o),drf,(v));            \
++} while(0)
++#define NVVAL_MW_SET_N(X,o,d,r,f,  v) NVVAL_MW_SET_X((o), d##_##r##_##f, (v))
++#define NVVAL_MW_SET_I(X,o,d,r,f,i,v) NVVAL_MW_SET_X((o), d##_##r##_##f(i), (v))
++#define NVVAL_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
++#define NVVAL_MW_SET(A...)                                                     \
++	NVVAL_MW_SET_(X, ##A, NVVAL_MW_SET_I, NVVAL_MW_SET_N)(X, ##A)
++
++#define NVDEF_MW_SET_N(X,o,d,r,f,  v)                                          \
++	NVVAL_MW_SET_X(o, d##_##r##_##f,    d##_##r##_##f##_##v)
++#define NVDEF_MW_SET_I(X,o,d,r,f,i,v)                                          \
++	NVVAL_MW_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v)
++#define NVDEF_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
++#define NVDEF_MW_SET(A...)                                                     \
++	NVDEF_MW_SET_(X, ##A, NVDEF_MW_SET_I, NVDEF_MW_SET_N)(X, ##A)
++#endif
+diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
+new file mode 100644
+index 00000000000..390741cbd04
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
+@@ -0,0 +1,904 @@
++#define NV_MME_PRED_MODE_UUUU                0
++#define NV_MME_PRED_MODE_TTTT                1
++#define NV_MME_PRED_MODE_FFFF                2
++#define NV_MME_PRED_MODE_TTUU                3
++#define NV_MME_PRED_MODE_FFUU                4
++#define NV_MME_PRED_MODE_TFUU                5
++#define NV_MME_PRED_MODE_TUUU                6
++#define NV_MME_PRED_MODE_FUUU                7
++#define NV_MME_PRED_MODE_UUTT                8
++#define NV_MME_PRED_MODE_UUTF                9
++#define NV_MME_PRED_MODE_UUTU                10
++#define NV_MME_PRED_MODE_UUFT                11
++#define NV_MME_PRED_MODE_UUFF                12
++#define NV_MME_PRED_MODE_UUFU                13
++#define NV_MME_PRED_MODE_UUUT                14
++#define NV_MME_PRED_MODE_UUUF                15
++
++#define NV_MME_REG_R0                       0
++#define NV_MME_REG_R1                       1
++#define NV_MME_REG_R2                       2
++#define NV_MME_REG_R3                       3
++#define NV_MME_REG_R4                       4
++#define NV_MME_REG_R5                       5
++#define NV_MME_REG_R6                       6
++#define NV_MME_REG_R7                       7
++#define NV_MME_REG_R8                       8
++#define NV_MME_REG_R9                       9
++#define NV_MME_REG_R10                      10
++#define NV_MME_REG_R11                      11
++#define NV_MME_REG_R12                      12
++#define NV_MME_REG_R13                      13
++#define NV_MME_REG_R14                      14
++#define NV_MME_REG_R15                      15
++#define NV_MME_REG_R16                      16
++#define NV_MME_REG_R17                      17
++#define NV_MME_REG_R18                      18
++#define NV_MME_REG_R19                      19
++#define NV_MME_REG_R20                      20
++#define NV_MME_REG_R21                      21
++#define NV_MME_REG_R22                      22
++#define NV_MME_REG_R23                      23
++#define NV_MME_REG_ZERO                     24
++#define NV_MME_REG_IMMED                    25
++#define NV_MME_REG_IMMEDPAIR                26
++#define NV_MME_REG_IMMED32                  27
++#define NV_MME_REG_LOAD0                    28
++#define NV_MME_REG_LOAD1                    29
++
++#define NV_MME_ALU_ADD                    0
++#define NV_MME_ALU_ADDC                   1
++#define NV_MME_ALU_SUB                    2
++#define NV_MME_ALU_SUBB                   3
++#define NV_MME_ALU_MUL                    4
++#define NV_MME_ALU_MULH                   5
++#define NV_MME_ALU_MULU                   6
++#define NV_MME_ALU_EXTENDED               7
++#define NV_MME_ALU_CLZ                    8
++#define NV_MME_ALU_SLL                    9
++#define NV_MME_ALU_SRL                    10
++#define NV_MME_ALU_SRA                    11
++#define NV_MME_ALU_AND                    12
++#define NV_MME_ALU_NAND                   13
++#define NV_MME_ALU_OR                     14
++#define NV_MME_ALU_XOR                    15
++#define NV_MME_ALU_MERGE                  16
++#define NV_MME_ALU_SLT                    17
++#define NV_MME_ALU_SLTU                   18
++#define NV_MME_ALU_SLE                    19
++#define NV_MME_ALU_SLEU                   20
++#define NV_MME_ALU_SEQ                    21
++#define NV_MME_ALU_STATE                  22
++#define NV_MME_ALU_LOOP                   23
++#define NV_MME_ALU_JAL                    24
++#define NV_MME_ALU_BLT                    25
++#define NV_MME_ALU_BLTU                   26
++#define NV_MME_ALU_BLE                    27
++#define NV_MME_ALU_BLEU                   28
++#define NV_MME_ALU_BEQ                    29
++#define NV_MME_ALU_DREAD                  30
++#define NV_MME_ALU_DWRITE                 31
++
++#define NV_MME_OUT_NONE                 0
++#define NV_MME_OUT_ALU0                 1
++#define NV_MME_OUT_ALU1                 2
++#define NV_MME_OUT_LOAD0                3
++#define NV_MME_OUT_LOAD1                4
++#define NV_MME_OUT_IMMED0               5
++#define NV_MME_OUT_IMMED1               6
++#define NV_MME_OUT_RESERVED             7
++#define NV_MME_OUT_IMMEDHIGH0           8
++#define NV_MME_OUT_IMMEDHIGH1           9
++#define NV_MME_OUT_IMMED32_0            10
++
++#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1)           \
++   ((e1) << (92 - 64) | (m1) << (89 - 64) |                                    \
++    (e0) << (85 - 64) | (m0) << (82 - 64) |                                    \
++    (i1) << (66 - 64) | (b1) >> (64 - 61)),                                    \
++   (((b1) & 7)  << (61 - 32) | (a1) << (56 - 32) |                             \
++    (d1) << (51 - 32) | (o1) << (46 - 32) |                                    \
++    (i0) >> (32 - 30)),                                                        \
++   (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 |     \
++    (pr) << 5 | (pm) << 1 | (en))
++
++#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1)                 \
++   MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO,                      \
++            NV_MME_ALU_##o0, NV_MME_REG_##d0,                               \
++            NV_MME_REG_##a0, NV_MME_REG_##b0, (i0),                            \
++            NV_MME_ALU_##o1, NV_MME_REG_##d1,                               \
++            NV_MME_REG_##a1, NV_MME_REG_##b1, (i1),                            \
++            NV_MME_OUT_##m0, NV_MME_OUT_##e0,                                  \
++            NV_MME_OUT_##m1, NV_MME_OUT_##e1)
++
++uint32_t mmec597_per_instance_bf[] = {
++// r1 = load();      // count
++// r3 = load();      // mask
++// mthd(0x1880, 1);  // VERTEX_ARRAY_PER_INSTANCE[0]
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (1<<12)|0x1880/4, IMMED0,   NONE,
++                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
++// while (HW_LOOP_COUNT < r1) {
++//    send(r3 & 1);
++//    r3 >>= 1;
++// }
++   MME_INSN(0,  LOOP, ZERO,    R1,  ZERO,            0x0003,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   AND, ZERO,    R3, IMMED,                 1,   NONE,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   SRL,   R3,    R3, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_vertex_array_select[] = {
++// r1 = load();            // array
++// r2 = load();            // limit hi
++// r3 = load();            // limit lo
++// r4 = load();            // start hi
++// r5 = load();            // start lo
++// r6 = (r1 & 0x1f) << 2;
++// r7 = (r1 & 0x1f) << 1;
++// mthd(0x1c04 + r6, 1);   // VERTEX_ARRAY_START_HIGH[]
++// send(r4);
++// send(r5);
++// mthd(0x0600 + r7, 1);   // VERTEX_ARRAY_LIMIT_HIGH[]
++// send(r2);
++// send(r3);
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R5, LOAD0,  ZERO,                 0,   NONE,   NONE,
++               MERGE,   R6,  ZERO,    R1,  (2<<10)|(5<<5)|0,   NONE,   NONE),
++   MME_INSN(0, MERGE,   R7,  ZERO,    R1,  (1<<10)|(5<<5)|0,   ALU1,   NONE,
++                 ADD, ZERO,    R6, IMMED,  (1<<12)|0x1c04/4,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(1,   ADD, ZERO,    R7, IMMED,  (1<<12)|0x0600/4,   ALU0,   ALU1,
++                 ADD, ZERO,    R2,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,    R3,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_blend_enables[] = {
++// r1 = load();         // enable mask
++// mthd(0x1360, 1);     // NVC0_3D_BLEND_ENABLE[]
++// send((r1 >> 0) & 1);
++// send((r1 >> 1) & 1);
++// send((r1 >> 2) & 1);
++// send((r1 >> 3) & 1);
++// send((r1 >> 4) & 1);
++// send((r1 >> 5) & 1);
++// send((r1 >> 6) & 1);
++// send((r1 >> 7) & 1);
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0, IMMED1,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x1360/4,   NONE,   NONE),
++   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|0,   NONE,   ALU0,
++               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|1,   NONE,   ALU1),
++   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|2,   NONE,   ALU0,
++               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|3,   NONE,   ALU1),
++   MME_INSN(1, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|4,   NONE,   ALU0,
++               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|5,   NONE,   ALU1),
++   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|6,   NONE,   ALU0,
++               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|7,   NONE,   ALU1),
++};
++
++uint32_t mmec597_poly_mode_front[] = {
++// r1 = load();
++// mthd(0x0dac,0);      // POLYGON_MODE_FRONT
++// send(r1);
++// r2 = read(0x0db0);   // POLYGON_MODE_BACK
++// r3 = read(0x20c0);   // SP_SELECT[3]
++// r7 = r1 | r2;
++// r4 = read(0x2100);   // SP_SELECT[4]
++// r6 = 0x60;
++// r7 = r7 & 1;
++// if (r7 != 0)
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x0dac/4, IMMED0,   ALU0,
++               STATE,   R2, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE),
++   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
++                  OR,   R7,    R1,    R2,                 0,   NONE,   NONE),
++   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
++                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
++   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = 0x200;
++   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// r7 = r3 | r4;
++// r7 = r7 & 1;
++// if (r7 != 0)
++   MME_INSN(0,    OR,   R7,    R3,    R4,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = 0;
++   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// mthd(0x02ec, 0);
++// send(r6);
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_poly_mode_back[] = {
++// r1 = load();
++// mthd(0x0db0,0);      // POLYGON_MODE_BACK
++// send(r1);
++// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
++// r3 = read(0x20c0);   // SP_SELECT[3]
++// r7 = r1 | r2;
++// r4 = read(0x2100);   // SP_SELECT[4]
++// r6 = 0x60;
++// r7 = r7 & 1;
++// if (r7 != 0)
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x0db0/4, IMMED0,   ALU0,
++               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
++   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
++                  OR,   R7,    R1,    R2,                 0,   NONE,   NONE),
++   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
++                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
++   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = 0x200;
++   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// r7 = r3 | r4;
++// r7 = r7 & 1;
++// if (r7 != 0)
++   MME_INSN(0,    OR,   R7,    R3,    R4,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = 0;
++   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// mthd(0x02ec, 0);
++// send(r6);
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_gp_select[] = {
++// r1 = load();
++// mthd(0x2100,0);      // SP_SELECT[4]
++// send(r1);
++// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
++// r3 = read(0x0db0);   // POLYGON_MODE_BACK
++// r7 = r2 | r3;
++// r4 = read(0x20c0);   // SP_SELECT[3]
++// r6 = 0x60;
++// r7 = r7 & 1;
++// if (r7 != 0)
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x2100/4, IMMED0,   ALU0,
++               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
++   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE,
++                  OR,   R7,    R2,    R3,                 0,   NONE,   NONE),
++   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
++                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
++   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = 0x200;
++   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// r7 = r1 | r4;
++// r7 = r7 & 1;
++// if (r7 != 0)
++   MME_INSN(0,    OR,   R7,    R1,    R4,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = 0;
++   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// mthd(0x02ec, 0);
++// send(r6);
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_tep_select[] = {
++// r1 = load();
++// mthd(0x20c0,0);      // SP_SELECT[3]
++// send(r1);
++// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
++// r3 = read(0x0db0);   // POLYGON_MODE_BACK
++// r7 = r2 | r3;
++// r4 = read(0x2100);   // SP_SELECT[4]
++// r6 = 0x60;
++// r7 = r7 & 1;
++// if (r7 != 0)
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x20c0/4, IMMED0,   ALU0,
++               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
++   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE,
++                  OR,   R7,    R2,    R3,                 0,   NONE,   NONE),
++   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
++                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
++   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = 0x200;
++   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// r7 = r1 | r4;
++// r7 = r7 & 1;
++// if (r7 != 0)
++   MME_INSN(0,    OR,   R7,    R1,    R4,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = 0;
++   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// mthd(0x02ec, 0);
++// send(r6);
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_draw_arrays_indirect[] = {
++// r1 = load();         // mode
++// r5 = read(0x1438);   // VB_INSTANCE_BASE
++// r6 = load();         // start_drawid
++// r7 = load();         // numparams
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                0,   NONE,   NONE,
++                 ADD,   R6, LOAD1,  ZERO,                0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                0,   NONE,   NONE,
++               STATE,   R5, IMMED,  ZERO,         0x1438/4,   NONE,   NONE),
++// while (HW_LOOP_COUNT < r7) {
++//    r2 = load();      // count
++//    r3 = load();      // instance_count
++//    mthd(0x0d74, 0);  // VERTEX_BUFFER_FIRST
++//    send(load());     // start
++//    r4 = load();      // start_instance
++//    if (r3) {
++   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x000c,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,          0x0d74/4, IMMED0,   NONE,
++                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
++                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//       mthd(0x238c, 1);     // CB_POS
++//       send(256 + 160);
++//       send(0);             // base_vertex
++//       send(r4);            // start_instance
++//       send(r6);            // draw id
++//       mthd(0x1438, 0);     // VB_INSTANCE_BASE
++//       send(r4);
++//       r1 = r1 & ~(1<<26);  // clear INSTANCE_NEXT
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
++                 ADD, ZERO,  ZERO,  ZERO,         256 + 160,   NONE,   ALU0),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1438/4, IMMED0,   ALU0,
++               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
++//       do {
++//          mthd(0x1618, 0);  // VERTEX_BEGIN_GL
++//          send(r1);         // mode
++//          mthd(0x0d78, 0);  // VERTEX_BUFFER_COUNT
++//          send(r2);         // count
++//          mthd(0x1614, 0);  // VERTEX_END_GL
++//          send(0);
++//          r1 |= (1<<26);    // set INSTANCE_NEXT
++//       } while(--r3);
++//    }
++   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
++                 ADD, ZERO,    R2,  ZERO,          0x0d78/4, IMMED1,   ALU1),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
++                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
++   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
++                 SUB,   R3,    R3, IMMED,                 1,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r6 = r6 + 1;
++// };
++   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// mthd(0x1438, 0);  // restore VB_INSTANCE_BASE
++// send(r5);
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,          0x1438/4, IMMED0,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,    R5,  ZERO,                 0,   NONE,      ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++};
++
++uint32_t mmec597_draw_elts_indirect[] = {
++// r1 = load();         // mode
++// r8 = read(0x1434);   // VB_ELEMENT_BASE
++// r9 = read(0x1438);   // VB_INSTANCE_BASE
++// r6 = load();         // start_drawid
++// r7 = load();         // numparams
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
++               STATE,   R8, IMMED,  ZERO,          0x1434/4,   NONE,   NONE),
++   MME_INSN(0, STATE,   R9, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
++                 ADD,   R6, LOAD0,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// while (HW_LOOP_COUNT < r7) {
++//    r3 = load();      // count
++//    r2 = load();      // instance_count
++//    mthd(0x17dc, 0);  // INDEX_BATCH_FIRST
++//    send(load());     // start
++//    r4 = load();      // index_bias
++//    mthd(0x238c, 1);  // CB_POS
++//    send(256 + 160);
++//    send(r4);         // index_bias
++//    r5 = load();      // start_instance
++//    if (r2) {
++   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x000d,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,          0x17dc/4, IMMED0,   NONE,
++                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
++                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
++                 ADD, ZERO,    R4,  ZERO,         256 + 160,   NONE,   ALU1),
++   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
++                 ADD,   R5, LOAD0,  ZERO,                 0,   NONE,   NONE),
++//       send(r5);         // start_instance
++//       send(r6);         // draw_id
++//       mthd(0x1434, 1);  // VB_ELEMENT_BASE
++//       send(r4);         // index_bias
++//       send(r5);         // start_instance
++//       mthd(0x1118, 0);  // VERTEX_ID_BASE
++//       send(r4);         // index_bias
++//       r1 &= ~(1 << 26); // clear INSTANCE_NEXT
++   MME_INSN(0,   ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
++                 ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1118/4, IMMED0,   ALU0,
++               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
++//       do {
++//          mthd(0x1618, 0);  // VERTEX_BEGIN_GL
++//          send(r1);         // mode
++//          mthd(0x17e0, 0);  // INDEX_BATCH_COUNT
++//          send(r3);         // count
++//          mthd(0x1614, 0);  // VERTEX_END_GL
++//          send(0);
++//          r1 |= (1 << 26);  // set INSTANCE_NEXT
++//       } while (--r2);
++//    }
++   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
++                 ADD, ZERO,    R3,  ZERO,          0x17e0/4, IMMED1,   ALU1),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
++                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
++   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
++                 SUB,   R2,    R2, IMMED,                 1,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//   r6 = r6 + 1;
++// };
++   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// mthd(0x1434, 1);
++// send(r8);         // restore VB_ELEMENT_BASE
++// send(r9);         // restore VB_INSTANCE_BASE
++// mthd(0x1118, 0);
++// send(r8);         // restore VERTEX_ID_BASE
++   MME_INSN(1,   ADD, ZERO,    R8,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
++                 ADD, ZERO,    R9,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,          0x1118/4, IMMED0,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_draw_arrays_indirect_count[] = {
++// r1 = load();         // mode
++// r6 = load();         // start_drawid
++// r7 = load();         // numparams
++// r5 = load();         // totaldraws
++// r8 = read(0x1438);   // VB_INSTANCE_BASE
++// r5 = r5 - r6;        // remaining draws
++// if (r5 > r7)
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD,   R6, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD,   R5, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0, STATE,   R8, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
++                 SUB,   R5,    R5,    R6,                 0,   NONE,   NONE),
++   MME_INSN(0,   BLE, ZERO,    R5,    R7,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r5 = r7;
++   MME_INSN(0,   ADD,   R5,    R7,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// if (r5 >= 0) {
++   MME_INSN(0,   BLT, ZERO,    R5,  ZERO,    (2<<14)|0x000e,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    while (HW_LOOP_COUNT < r5) {
++//       r2 = load();      // count
++//       r3 = load();      // instance_count
++//       mthd(0x0d74, 0);  // VERTEX_BUFFER_FIRST
++//       send(load());     // start
++//       r4 = load();      // start_instance
++//       if (r3) {
++   MME_INSN(0,  LOOP, ZERO,    R5,  ZERO,            0x000c,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,          0x0d74/4, IMMED0,   NONE,
++                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
++                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//          mthd(0x238c, 1);  // CB_POS
++//          send(256 + 160);
++//          send(0);          // base_vertex
++//          send(r4);         // start_instance
++//          send(r6);         // draw_id
++//          mthd(0x1438, 0);  // VB_INSTANCE_BASE
++//          send(r4);
++//          r1 &= ~(1 << 26); // clear INSTANCE_NEXT
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
++                 ADD, ZERO,  ZERO,  ZERO,           256+160,   NONE,   ALU0),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1438/4, IMMED0,   ALU0,
++               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
++//          do {
++//             mthd(0x1618, 0);  // VERTEX_BEGIN_GL
++//             send(r1);         // mode
++//             mthd(0x0d78, 0);  // VERTEX_BUFFER_COUNT
++//             send(r2);
++//             mthd(0x1614, 0);  // VERTEX_END_GL
++//             send(0);
++//             r1 |= (1 << 26);  // set INSTANCE_NEXT
++//          } while (--r3);
++//       }
++   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
++                 ADD, ZERO,    R2,  ZERO,          0x0d78/4, IMMED1,   ALU1),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
++                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
++   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
++                 SUB,   R3,    R3, IMMED,                 1,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//       r6 = r6 + 1;   // draw_id++
++//    }
++   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r7 = r7 - r5;  // unneeded params
++// }
++   MME_INSN(0,   SUB,   R7,    R7,    R5,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// while (HW_LOOP_COUNT < r7) {
++//    load();
++//    load();
++//    load();
++//    load();
++// }
++   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x0003,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
++// exit mthd(0x1438, 0);   // VB_INSTANCE_BASE
++// send(r8);
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,          0x1438/4, IMMED0,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_draw_elts_indirect_count[] = {
++// r8 = read(0x1434);
++// r1 = load();
++// r9 = read(0x1438);
++// r6 = load();
++// r7 = load();
++// r5 = load();
++// r5 = r5 - r6;
++// if (r5 > r7)
++   MME_INSN(0, STATE,   R8, IMMED,  ZERO,          0x1434/4,   NONE,   NONE,
++                 ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0, STATE,   R9, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
++                 ADD,   R6, LOAD0,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD,   R5, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   SUB,   R5,    R5,    R6,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BLE, ZERO,    R5,    R7,    (2<<14)|0x0002,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r5 = r7;
++   MME_INSN(0,   ADD,   R5,    R7,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// if (r5 >= 0) {
++   MME_INSN(0,   BLT, ZERO,    R5,  ZERO,    (2<<14)|0x000f,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    while (HW_LOOP_COUNT < r5) {
++//       r3 = load();
++//       r2 = load();
++//       mthd(0x17dc, 0);
++//       send(load());
++//       r4 = load();
++//       mthd(0x238c, 1);
++//       send(256 + 160);
++//       send(r4);
++//       r10 = load();
++//       if (r2) {
++   MME_INSN(0,  LOOP, ZERO,    R5,  ZERO,            0x000d,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,  (0<<12)|0x17dc/4, IMMED0,   NONE,
++                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,  (1<<12)|0x238c/4,   NONE,   ALU0,
++                 ADD,   R4, LOAD1,  ZERO,         256 + 160, IMMED0, IMMED1),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
++                 ADD,  R10, LOAD0,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//          send(r10);
++//          send(r6);
++//          mthd(0x1434, 1);
++//          send(r4);
++//          send(r10);
++//          mthd(0x1118, 0);
++//          send(r4);
++//          r1 &= ~(1 << 26);
++   MME_INSN(0,   ADD, ZERO,   R10,  ZERO,                 0,   NONE,   ALU0,
++                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
++                 ADD, ZERO,   R10,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (0<<12)|0x1118/4, IMMED0,   ALU0,
++               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
++//          do {
++//             mthd(0x1618, 0);
++//             send(r1);
++//             mthd(0x17e0, 0);
++//             send(r3);
++//             mthd(0x1614, 0);
++//             send(0);
++//             r1 |= (1 << 26);
++//          } while (--r2);
++//       }
++   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
++                 ADD, ZERO,    R3,  ZERO,          0x17e0/4, IMMED1,   ALU1),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
++                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
++   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
++                 SUB,   R2,    R2, IMMED,                 1,   NONE,   NONE),
++   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//       r6 = r6 + 1;
++//    }
++   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++//    r7 = r7 - r5; // unneeded params
++// }
++   MME_INSN(0,   SUB,   R7,    R7,    R5,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// while (HW_LOOP_COUNT < r7) {
++//    r2 = load();
++//    r2 = load();
++//    r2 = load();
++//    r2 = load();
++//    r2 = load();
++// }
++   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x0004,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
++   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++// mthd(0x1434, 1);
++// send(r8);
++// send(r9);
++// exit mthd(0x1118, 0);
++// send(r8);
++   MME_INSN(1,   ADD, ZERO,    R8,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
++                 ADD, ZERO,    R9,  ZERO,                 0,   NONE,   ALU1),
++   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,  (0<<12)|0x1118/4, IMMED0,   ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
++};
++
++uint32_t mmec597_query_buffer_write[] = {
++// r1 = load();   // clamp value
++// r2 = load();   // end value (lo)
++// r3 = load();   // end value (hi)
++// r4 = load();   // start value (lo)
++// r5 = load();   // start value (hi)
++// r8 = load();   // desired sequence
++// r9 = load();   // actual sequence
++// r7 = load();   // query address (hi)
++// r6 = load();   // query address (lo)
++// if (r9 >= r8) {
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,      NONE,
++                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      NONE,
++                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R5, LOAD0,  ZERO,                 0,   NONE,      NONE,
++                 ADD,   R8, LOAD1,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R9, LOAD0,  ZERO,                 0,   NONE,      NONE,
++                 ADD,   R7, LOAD1,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R6, LOAD0,  ZERO,                 0,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   BLT, ZERO,    R9,    R8,    (2<<14)|0x000e,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++//    [r3,r2] = [r3,r2] - [r5,r4];
++//    if (r1) {
++   MME_INSN(0,   SUB,   R2,    R2,    R4,                 0,   NONE,      NONE,
++                SUBB,   R3,    R3,    R5,                 0,   NONE,      NONE),
++   MME_INSN(0,   BEQ, ZERO,    R1,  ZERO,    (2<<14)|0x0004,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++//       if (r3 != 0 || r1 < r2)
++//          r2 = r1;
++//    }
++   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x0002,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,  BLTU, ZERO,    R1,    R2,    (1<<14)|0x0002,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R2,    R1,  ZERO,                 0,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++//    mthd(0x1b00, 1);
++//    send(r7);
++//    send(r6);
++//    send(r2)
++//    send(0x10000000);
++//    if (!r1) {
++   MME_INSN(0,   ADD, ZERO,    R7,  ZERO,  (1<<12)|0x1b00/4, IMMED0,      ALU0,
++                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,      ALU1),
++   MME_INSN(0,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
++                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
++   MME_INSN(0,   BEQ, ZERO,    R1,  ZERO,    (1<<14)|0x0004,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++//       [r7,r6] = [r7,r6] + 4;
++//       mthd(0x1b00, 1);
++//       send(r7);
++//       send(r6);
++//       send(r3);
++//       send(0x10000000);
++//    }
++   MME_INSN(0,   ADD, ZERO,    R6, IMMED,                 4, IMMED1,      ALU1,
++                ADDC, ZERO,    R7,  ZERO,  (1<<12)|0x1b00/4,   NONE,      ALU0),
++   MME_INSN(0,   ADD, ZERO,    R3,  ZERO,                 0,   NONE,      ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
++                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
++//    mthd(0x0110, 0);
++//    send(0);
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x0110/4, IMMED0,      ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++// }
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++};
++
++uint32_t mmec597_conservative_raster_state[] = {
++// r1 = load();
++// mthd(0x3400, 1);
++// send(0);
++// send(((r1 >> 8) & 7) << 23);
++// send(0x03800000);
++// mthd(0x2310, 1);
++// send(0x00418800);
++// r2 = r1 & 0xf;
++// r3 = 16;
++// r2 = r2 | (((r1 >> 4) & 0xf) << 8);
++// mthd(0x0a1c, 8);
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (1<<12)|0x3400/4, IMMED0,    IMMED1,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0, MERGE, ZERO,  ZERO,    R1, (23<<10)|(3<<5)|8,   NONE,      ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x0380,   NONE, IMMED32_0,
++                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x2310/4, IMMED0,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x0041,   NONE, IMMED32_0,
++                 ADD, ZERO,  ZERO,  ZERO,            0x8800,   NONE,      NONE),
++   MME_INSN(0,   AND,   R2,    R1, IMMED,               0xf,   NONE,      NONE,
++                 ADD,   R3,  ZERO, IMMED,                16,   NONE,      NONE),
++   MME_INSN(0, MERGE,   R2,    R2,    R1,  (8<<10)|(4<<5)|4, IMMED1,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,  (8<<12)|0x0a1c/4,   NONE,      NONE),
++// while (HW_LOOP_COUNT < r3)
++//    send(r2);
++   MME_INSN(0,  LOOP, ZERO,    R3,  ZERO,            0x0002,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++// mthd(0x1148, 0);
++// send(1);
++   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x1148/4, IMMED0,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 1,   NONE,    IMMED1,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++};
++
++uint32_t mmec597_compute_counter[] = {
++// r0 = load();
++// r1 = 1;
++// r2 = 0;
++// while (HW_LOOP_COUNT < r2) {
++   MME_INSN(0,   ADD,   R0, LOAD0,  ZERO,                 0,   NONE,      NONE,
++                 ADD,   R1, IMMED,  ZERO,                 1,   NONE,      NONE),
++   MME_INSN(0,  LOOP, ZERO,    R0,  ZERO,            0x0003,   NONE,      NONE,
++                 ADD,   R2,  ZERO,  ZERO,                 0,   NONE,      NONE),
++//    r3 = load();
++//    [r1,r0] *= r3;
++// }
++   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,  MULU,   R1,    R1,    R3,                 0,   NONE,      NONE,
++                MULH,   R2,  ZERO,  ZERO,                 0,   NONE,      NONE),
++// r3 = read(0x3410);
++// r4 = read(0x3414);
++// [r4,r3] += [r2,r1];
++// mthd(0x3410, 1);
++// send(r3);
++// send(r4);
++   MME_INSN(0, STATE, ZERO,  ZERO,  ZERO,          0x3410/4,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(1, STATE, ZERO,  ZERO,  ZERO,          0x3414/4,   NONE,      NONE,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R3,    R3,    R1,  (1<<12)|0x3410/4, IMMED0,      ALU0,
++                ADDC,   R4,    R4,    R2,                 0,   NONE,      ALU1),
++};
++
++uint32_t mmec597_compute_counter_to_query[] = {
++// r1 = load();
++// r3 = read(0x3410);
++// r2 = load();
++// r4 = read(0x3414);
++// [r2,r1] = [r2,r1] + [r4,r3];
++// mthd(0x1b00, 1);
++// r3 = load();
++// send(r3);
++// r4 = load();
++// send(r4);
++// send(r1);
++// send(0x10000000);
++   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,      NONE,
++               STATE,   R3, IMMED,  ZERO,          0x3410/4,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,                 0,   NONE,      NONE,
++               STATE,   R4, IMMED,  ZERO,          0x3414/4,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R1,    R1,    R3,  (1<<12)|0x1b00/4, IMMED0,      NONE,
++                ADDC,   R2,    R2,    R4,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      ALU0,
++                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,      ALU1),
++   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,                 0,   NONE,      ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
++                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
++// [r3,r4] = [r3,r4] + 4;
++// mthd(0x1b00, 1);
++// send(r3);
++// send(r4);
++// send(r2);
++// send(0x10000000);
++   MME_INSN(0,   ADD, ZERO,    R4, IMMED,                 4, IMMED1,      ALU1,
++                ADDC, ZERO,    R3,  ZERO,  (1<<12)|0x1b00/4,   NONE,      ALU0),
++   MME_INSN(1,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
++                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
++   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
++                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
++};
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+index 221bab3105b..539bdc75022 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+@@ -157,6 +157,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define NVC0_3D_UNK0220__ESIZE					0x00000004
+ #define NVC0_3D_UNK0220__LEN					0x00000028
+ 
++#define TU102_3D_INDEX_ARRAY_LIMIT_HIGH				0x00000238
++
++#define TU102_3D_INDEX_ARRAY_LIMIT_LOW				0x0000023c
++
++#define TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE		0x000002b8
++
+ #define NVC0_3D_UNK02C0					0x000002c0
+ 
+ #define NVC0_3D_UNK02C4					0x000002c4
+@@ -278,6 +284,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define NVC0_3D_UNK0400__ESIZE					0x00000004
+ #define NVC0_3D_UNK0400__LEN					0x000000c0
+ 
++#define TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i0)		       (0x00000600 + 0x8*(i0))
++#define TU102_3D_VERTEX_ARRAY_LIMIT_LOW(i0)		       (0x00000604 + 0x8*(i0))
++
+ #define NVC0_3D_TFB_STREAM(i0)				       (0x00000700 + 0x10*(i0))
+ #define NVC0_3D_TFB_STREAM__ESIZE				0x00000010
+ #define NVC0_3D_TFB_STREAM__LEN				0x00000004
+@@ -1787,6 +1796,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ #define NVC0_3D_SP_UNK14__ESIZE				0x00000004
+ #define NVC0_3D_SP_UNK14__LEN					0x00000004
+ 
++#define GV100_3D_SP_ADDRESS_HIGH(i0)			       (0x00002014 + 0x40*(i0))
++#define GV100_3D_SP_ADDRESS_LOW(i0)			       (0x00002018 + 0x40*(i0))
++
+ #define NVC0_3D_TEX_LIMITS(i0)				       (0x00002200 + 0x10*(i0))
+ #define NVC0_3D_TEX_LIMITS__ESIZE				0x00000010
+ #define NVC0_3D_TEX_LIMITS__LEN				0x00000005
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+index c897e4e8b97..69131fa22d3 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+@@ -37,6 +37,55 @@ nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
+    return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);
+ }
+ 
++static uint32_t
++tu102_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
++{
++   uint32_t kind;
++
++   if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
++      return 0;
++   if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
++      return 0;
++
++   switch (mt->base.base.format) {
++   case PIPE_FORMAT_Z16_UNORM:
++      if (compressed)
++         kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC
++      else
++         kind = 0x01; // NV_MMU_PTE_KIND_Z16
++      break;
++   case PIPE_FORMAT_X8Z24_UNORM:
++   case PIPE_FORMAT_S8X24_UINT:
++   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
++      if (compressed)
++         kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC
++      else
++         kind = 0x05; // NV_MMU_PTE_KIND_Z24S8
++      break;
++   case PIPE_FORMAT_X24S8_UINT:
++   case PIPE_FORMAT_Z24X8_UNORM:
++   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
++      if (compressed)
++         kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC
++      else
++         kind = 0x03; // NV_MMU_PTE_KIND_S8Z24
++      break;
++   case PIPE_FORMAT_X32_S8X24_UINT:
++   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
++      if (compressed)
++         kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC
++      else
++         kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S8
++      break;
++   case PIPE_FORMAT_Z32_FLOAT:
++   default:
++      kind = 0x06;
++      break;
++   }
++
++   return kind;
++}
++
+ static uint32_t
+ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
+ {
+@@ -357,7 +406,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
+    if (pt->bind & PIPE_BIND_LINEAR)
+       pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
+ 
+-   bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
++   if (dev->chipset < 0x160)
++      bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
++   else
++      bo_config.nvc0.memtype = tu102_mt_choose_storage_type(mt, compressed);
+ 
+    if (!nvc0_miptree_init_ms_mode(mt)) {
+       FREE(mt);
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+index 32aa82d168c..d2b2de47c8d 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+@@ -645,7 +645,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
+    prog->code_size = info->bin.codeSize;
+    prog->relocs = info->bin.relocData;
+    prog->fixups = info->bin.fixupData;
+-   prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
++   if (info->target >= NVISA_GV100_CHIPSET)
++      prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why?
++   else
++      prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+    prog->cp.smem_size = info->bin.smemSize;
+    prog->num_barriers = info->numBarriers;
+ 
+@@ -734,7 +737,14 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
+    struct nvc0_screen *screen = nvc0->screen;
+    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
+    int ret;
+-   uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
++   uint32_t size = prog->code_size;
++
++   if (!is_cp) {
++      if (screen->eng3d->oclass < TU102_3D_CLASS)
++         size += GF100_SHADER_HEADER_SIZE;
++      else
++         size += TU102_SHADER_HEADER_SIZE;
++   }
+ 
+    /* On Fermi, SP_START_ID must be aligned to 0x40.
+     * On Kepler, the first instruction must be aligned to 0x80 because
+@@ -750,7 +760,8 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
+    prog->code_base = prog->mem->start;
+ 
+    if (!is_cp) {
+-      if (screen->base.class_3d >= NVE4_3D_CLASS) {
++      if (screen->base.class_3d >= NVE4_3D_CLASS &&
++          screen->base.class_3d < TU102_3D_CLASS) {
+          switch (prog->mem->start & 0xff) {
+          case 0x40: prog->code_base += 0x70; break;
+          case 0x80: prog->code_base += 0x30; break;
+@@ -777,7 +788,16 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
+ {
+    struct nvc0_screen *screen = nvc0->screen;
+    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
+-   uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
++   uint32_t code_pos = prog->code_base;
++   uint32_t size_sph = 0;
++
++   if (!is_cp) {
++      if (screen->eng3d->oclass < TU102_3D_CLASS)
++         size_sph = GF100_SHADER_HEADER_SIZE;
++      else
++         size_sph = TU102_SHADER_HEADER_SIZE;
++   }
++   code_pos += size_sph;
+ 
+    if (prog->relocs)
+       nv50_ir_relocate_code(prog->relocs, prog->code, code_pos,
+@@ -803,8 +823,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
+ 
+    if (!is_cp)
+       nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
+-                           NV_VRAM_DOMAIN(&screen->base),
+-                           NVC0_SHADER_HEADER_SIZE, prog->hdr);
++                           NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr);
+ 
+    nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
+                         NV_VRAM_DOMAIN(&screen->base), prog->code_size,
+@@ -817,7 +836,14 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
+    struct nvc0_screen *screen = nvc0->screen;
+    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
+    int ret;
+-   uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
++   uint32_t size = prog->code_size;
++
++   if (!is_cp) {
++      if (screen->eng3d->oclass < TU102_3D_CLASS)
++         size += GF100_SHADER_HEADER_SIZE;
++      else
++         size += TU102_SHADER_HEADER_SIZE;
++   }
+ 
+    ret = nvc0_program_alloc_code(nvc0, prog);
+    if (ret) {
+@@ -874,8 +900,7 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
+             BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(FLUSH), 1);
+             PUSH_DATA (nvc0->base.pushbuf, NVC0_COMPUTE_FLUSH_CODE);
+          } else {
+-            BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(SP_START_ID(i)), 1);
+-            PUSH_DATA (nvc0->base.pushbuf, progs[i]->code_base);
++            nvc0_program_sp_start_id(nvc0, i, progs[i]);
+          }
+       }
+    }
+@@ -953,7 +978,7 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
+    unsigned base = 0;
+    unsigned i;
+    if (prog->type != PIPE_SHADER_COMPUTE)
+-      base = NVC0_SHADER_HEADER_SIZE;
++      base = GF100_SHADER_HEADER_SIZE;
+    for (i = 0; i < prog->cp.num_syms; ++i)
+       if (syms[i].label == label)
+          return prog->code_base + base + syms[i].offset;
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+index 5684207aa54..2c465b342e9 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+@@ -15,7 +15,9 @@ struct nvc0_transform_feedback_state {
+ };
+ 
+ 
+-#define NVC0_SHADER_HEADER_SIZE (20 * 4)
++#define GF100_SHADER_HEADER_SIZE (20 * 4)
++#define TU102_SHADER_HEADER_SIZE (32 * 4)
++#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE
+ 
+ struct nvc0_program {
+    struct pipe_shader_state pipe;
+@@ -30,7 +32,7 @@ struct nvc0_program {
+    unsigned code_size;
+    unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
+ 
+-   uint32_t hdr[20];
++   uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4];
+    uint32_t flags[2];
+ 
+    struct {
+@@ -72,4 +74,6 @@ struct nvc0_program {
+    struct nouveau_heap *mem;
+ };
+ 
++void
++nvc0_program_sp_start_id(struct nvc0_context *, int, struct nvc0_program *);
+ #endif
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+index 7abbf762af2..07d74ddd50c 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+@@ -27,15 +27,17 @@
+ #include "util/format/u_format_s3tc.h"
+ #include "util/u_screen.h"
+ #include "pipe/p_screen.h"
+-#include "compiler/nir/nir.h"
+ 
+ #include "nouveau_vp3_video.h"
+ 
++#include "codegen/nv50_ir_driver.h"
++
+ #include "nvc0/nvc0_context.h"
+ #include "nvc0/nvc0_screen.h"
+ 
+ #include "nvc0/mme/com9097.mme.h"
+ #include "nvc0/mme/com90c0.mme.h"
++#include "nvc0/mme/comc597.mme.h"
+ 
+ #include "nv50/g80_texture.xml.h"
+ 
+@@ -443,8 +445,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
+    case PIPE_SHADER_CAP_PREFERRED_IR:
+       return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
+    case PIPE_SHADER_CAP_SUPPORTED_IRS: {
+-      uint32_t irs = 1 << PIPE_SHADER_IR_TGSI |
+-                     1 << PIPE_SHADER_IR_NIR;
++      uint32_t irs = 1 << PIPE_SHADER_IR_NIR |
++         ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI);
+       if (screen->force_enable_cl)
+          irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
+       return irs;
+@@ -467,6 +469,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
+    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+       return shader != PIPE_SHADER_FRAGMENT;
+    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
++      /* HW doesn't support indirect addressing of fragment program inputs
++       * on Volta.  The binary driver generates a function to handle every
++       * possible indirection, and indirectly calls the function to handle
++       * this instead.
++       */
++      if (class_3d >= GV100_3D_CLASS)
++         return shader != PIPE_SHADER_FRAGMENT;
++      return 1;
+    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+       return 1;
+@@ -717,6 +727,26 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+    return pos + size;
+ }
+ 
++static int
++tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
++                     unsigned size, const uint32_t *data)
++{
++   struct nouveau_pushbuf *push = screen->base.pushbuf;
++
++   size /= 4;
++
++   assert((pos + size) <= 0x800);
++
++   BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
++   PUSH_DATA (push, (m - 0x3800) / 8);
++   PUSH_DATA (push, pos);
++   BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
++   PUSH_DATA (push, pos);
++   PUSH_DATAp(push, data, size);
++
++   return pos + (size / 3);
++}
++
+ static void
+ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
+ {
+@@ -728,8 +758,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
+    BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
+    PUSH_DATA (push, 0xff);
+    PUSH_DATA (push, 0xff);
+-   BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
+-   PUSH_DATA (push, 0x3f);
++   if (obj_class < GV100_3D_CLASS) {
++      BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
++      PUSH_DATA (push, 0x3f);
++   }
+ 
+    BEGIN_NVC0(push, SUBC_3D(0x16a8), 1);
+    PUSH_DATA (push, (3 << 16) | 3);
+@@ -761,8 +793,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
+    BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
+    PUSH_DATA (push, 3);
+ 
+-   BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
+-   PUSH_DATA (push, 0x3fffff);
++   if (obj_class < GV100_3D_CLASS) {
++      BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
++      PUSH_DATA (push, 0x3fffff);
++   }
+    BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
+    PUSH_DATA (push, 1);
+    BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
+@@ -822,6 +856,8 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
+    case 0x110:
+    case 0x120:
+    case 0x130:
++   case 0x140:
++   case 0x160:
+       return nve4_screen_compute_setup(screen, screen->base.pushbuf);
+    default:
+       return -1;
+@@ -893,13 +929,15 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
+    nouveau_heap_init(&screen->text_heap, 0, size - 0x100);
+ 
+    /* update the code segment setup */
+-   BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
+-   PUSH_DATAh(push, screen->text->offset);
+-   PUSH_DATA (push, screen->text->offset);
+-   if (screen->compute) {
+-      BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
++   if (screen->eng3d->oclass < GV100_3D_CLASS) {
++      BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
+       PUSH_DATAh(push, screen->text->offset);
+       PUSH_DATA (push, screen->text->offset);
++      if (screen->compute) {
++         BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
++         PUSH_DATAh(push, screen->text->offset);
++         PUSH_DATA (push, screen->text->offset);
++      }
+    }
+ 
+    return 0;
+@@ -939,74 +977,14 @@ nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize,
+    IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0));
+ }
+ 
+-static const nir_shader_compiler_options nir_options = {
+-   .lower_fdiv = false,
+-   .lower_ffma = false,
+-   .fuse_ffma = false, /* nir doesn't track mad vs fma */
+-   .lower_flrp32 = true,
+-   .lower_flrp64 = true,
+-   .lower_fpow = false,
+-   .lower_fsat = false,
+-   .lower_fsqrt = false, // TODO: only before gm200
+-   .lower_fmod = true,
+-   .lower_bitfield_extract = false,
+-   .lower_bitfield_extract_to_shifts = false,
+-   .lower_bitfield_insert = false,
+-   .lower_bitfield_insert_to_shifts = false,
+-   .lower_bitfield_reverse = false,
+-   .lower_bit_count = false,
+-   .lower_ifind_msb = false,
+-   .lower_find_lsb = false,
+-   .lower_uadd_carry = true, // TODO
+-   .lower_usub_borrow = true, // TODO
+-   .lower_mul_high = false,
+-   .lower_negate = false,
+-   .lower_sub = true,
+-   .lower_scmp = true, // TODO: not implemented yet
+-   .lower_idiv = true,
+-   .lower_isign = false, // TODO
+-   .fdot_replicates = false, // TODO
+-   .lower_ffloor = false, // TODO
+-   .lower_ffract = true,
+-   .lower_fceil = false, // TODO
+-   .lower_ldexp = true,
+-   .lower_pack_half_2x16 = true,
+-   .lower_pack_unorm_2x16 = true,
+-   .lower_pack_snorm_2x16 = true,
+-   .lower_pack_unorm_4x8 = true,
+-   .lower_pack_snorm_4x8 = true,
+-   .lower_unpack_half_2x16 = true,
+-   .lower_unpack_unorm_2x16 = true,
+-   .lower_unpack_snorm_2x16 = true,
+-   .lower_unpack_unorm_4x8 = true,
+-   .lower_unpack_snorm_4x8 = true,
+-   .lower_extract_byte = true,
+-   .lower_extract_word = true,
+-   .lower_all_io_to_temps = false,
+-   .vertex_id_zero_based = false,
+-   .lower_base_vertex = false,
+-   .lower_helper_invocation = false,
+-   .lower_cs_local_index_from_id = true,
+-   .lower_cs_local_id_from_index = false,
+-   .lower_device_index_to_zero = false, // TODO
+-   .lower_wpos_pntc = false, // TODO
+-   .lower_hadd = true, // TODO
+-   .lower_add_sat = true, // TODO
+-   .use_interpolated_input_intrinsics = true,
+-   .lower_mul_2x32_64 = true, // TODO
+-   .max_unroll_iterations = 32,
+-   .lower_int64_options = nir_lower_ufind_msb64|nir_lower_divmod64, // TODO
+-   .lower_doubles_options = nir_lower_dmod, // TODO
+-   .lower_to_scalar = true,
+-};
+-
+ static const void *
+ nvc0_screen_get_compiler_options(struct pipe_screen *pscreen,
+                                  enum pipe_shader_ir ir,
+                                  enum pipe_shader_type shader)
+ {
++   struct nvc0_screen *screen = nvc0_screen(pscreen);
+    if (ir == PIPE_SHADER_IR_NIR)
+-      return &nir_options;
++      return nv50_ir_nir_shader_compiler_options(screen->base.device->chipset);
+    return NULL;
+ }
+ 
+@@ -1038,6 +1016,8 @@ nvc0_screen_create(struct nouveau_device *dev)
+    case 0x110:
+    case 0x120:
+    case 0x130:
++   case 0x140:
++   case 0x160:
+       break;
+    default:
+       return NULL;
+@@ -1104,16 +1084,19 @@ nvc0_screen_create(struct nouveau_device *dev)
+    screen->base.fence.emit = nvc0_screen_fence_emit;
+    screen->base.fence.update = nvc0_screen_fence_update;
+ 
++   if (dev->chipset < 0x140) {
++      ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
++                               NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
++      if (ret)
++         FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
+ 
+-   ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
+-                            NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
+-   if (ret)
+-      FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
+-
+-   BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
+-   PUSH_DATA (push, screen->nvsw->handle);
++      BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
++      PUSH_DATA (push, screen->nvsw->handle);
++   }
+ 
+    switch (dev->chipset & ~0xf) {
++   case 0x160:
++   case 0x140:
+    case 0x130:
+    case 0x120:
+    case 0x110:
+@@ -1167,6 +1150,12 @@ nvc0_screen_create(struct nouveau_device *dev)
+    PUSH_DATA (push, screen->fence.bo->offset + 16);
+ 
+    switch (dev->chipset & ~0xf) {
++   case 0x160:
++      obj_class = TU102_3D_CLASS;
++      break;
++   case 0x140:
++      obj_class = GV100_3D_CLASS;
++      break;
+    case 0x130:
+       switch (dev->chipset) {
+       case 0x130:
+@@ -1414,25 +1403,47 @@ nvc0_screen_create(struct nouveau_device *dev)
+       PUSH_DATA (push, 16384 << 16);
+    }
+ 
++   if (screen->eng3d->oclass < TU102_3D_CLASS) {
+ #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
+ 
+-   i = 0;
+-   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
+-   MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
+-   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
+-   MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
+-   MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
+-   MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
+-   MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
+-   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
+-   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
+-   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
+-   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+-   MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+-   MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
+-   MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
+-   MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
+-   MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
++      i = 0;
++      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
++      MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
++      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
++      MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
++      MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
++      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
++      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
++      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
++      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
++      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
++      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
++      MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
++      MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
++      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
++      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
++      MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
++   } else {
++#undef MK_MACRO
++#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n);
++
++      i = 0;
++      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf);
++      MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables);
++      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select);
++      MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select);
++      MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select);
++      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front);
++      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back);
++      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect);
++      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect);
++      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count);
++      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count);
++      MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write);
++      MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state);
++      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter);
++      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query);
++   }
+ 
+    BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
+    PUSH_DATA (push, 1);
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+index b7e0c8a930f..490026b2c00 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+@@ -64,6 +64,22 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
+    return true; /* stream output info only */
+ }
+ 
++void
++nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,
++                         struct nvc0_program *prog)
++{
++   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
++
++   if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {
++      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
++      PUSH_DATA (push, prog->code_base);
++   } else {
++      BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);
++      PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);
++      PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);
++   }
++}
++
+ void
+ nvc0_vertprog_validate(struct nvc0_context *nvc0)
+ {
+@@ -74,9 +90,9 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)
+          return;
+    nvc0_program_update_context_state(nvc0, vp, 0);
+ 
+-   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
++   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1);
+    PUSH_DATA (push, 0x11);
+-   PUSH_DATA (push, vp->code_base);
++   nvc0_program_sp_start_id(nvc0, 1, vp);
+    BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
+    PUSH_DATA (push, vp->num_gprs);
+ 
+@@ -152,9 +168,9 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
+                  fp->fp.post_depth_coverage);
+    }
+ 
+-   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
++   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1);
+    PUSH_DATA (push, 0x51);
+-   PUSH_DATA (push, fp->code_base);
++   nvc0_program_sp_start_id(nvc0, 5, fp);
+    BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
+    PUSH_DATA (push, fp->num_gprs);
+ 
+@@ -176,9 +192,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
+          BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
+          PUSH_DATA (push, tp->tp.tess_mode);
+       }
+-      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
++      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+       PUSH_DATA (push, 0x21);
+-      PUSH_DATA (push, tp->code_base);
++      nvc0_program_sp_start_id(nvc0, 2, tp);
+       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
+       PUSH_DATA (push, tp->num_gprs);
+    } else {
+@@ -186,9 +202,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
+       /* not a whole lot we can do to handle this failure */
+       if (!nvc0_program_validate(nvc0, tp))
+          assert(!"unable to validate empty tcp");
+-      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
++      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+       PUSH_DATA (push, 0x20);
+-      PUSH_DATA (push, tp->code_base);
++      nvc0_program_sp_start_id(nvc0, 2, tp);
+    }
+    nvc0_program_update_context_state(nvc0, tp, 1);
+ }
+@@ -206,8 +222,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0)
+       }
+       BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
+       PUSH_DATA (push, 0x31);
+-      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
+-      PUSH_DATA (push, tp->code_base);
++      nvc0_program_sp_start_id(nvc0, 3, tp);
+       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
+       PUSH_DATA (push, tp->num_gprs);
+    } else {
+@@ -227,8 +242,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
+    if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
+       BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
+       PUSH_DATA (push, 0x41);
+-      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
+-      PUSH_DATA (push, gp->code_base);
++      nvc0_program_sp_start_id(nvc0, 4, gp);
+       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
+       PUSH_DATA (push, gp->num_gprs);
+    } else {
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+index 538effdb531..731b0b5dbf8 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+@@ -29,6 +29,8 @@
+ #include "util/format/u_format.h"
+ #include "util/u_surface.h"
+ 
++#include "tgsi/tgsi_ureg.h"
++
+ #include "os/os_thread.h"
+ 
+ #include "nvc0/nvc0_context.h"
+@@ -138,6 +140,11 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
+       PUSH_DATA (push, bo->offset + offset);
+    }
+ 
++   if (dst) {
++      IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE),
++                 util_format_is_depth_or_stencil(pformat));
++   }
++
+ #if 0
+    if (dst) {
+       BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4);
+@@ -772,7 +779,7 @@ gm200_evaluate_depth_buffer(struct pipe_context *pipe)
+ struct nvc0_blitter
+ {
+    struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES];
+-   struct nvc0_program vp;
++   struct nvc0_program *vp;
+ 
+    struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
+ 
+@@ -785,6 +792,7 @@ struct nvc0_blitctx
+ {
+    struct nvc0_context *nvc0;
+    struct nvc0_program *fp;
++   struct nvc0_program *vp;
+    uint8_t mode;
+    uint16_t color_mask;
+    uint8_t filter;
+@@ -809,78 +817,27 @@ struct nvc0_blitctx
+    struct nvc0_rasterizer_stateobj rast;
+ };
+ 
+-static void
+-nvc0_blitter_make_vp(struct nvc0_blitter *blit)
++static void *
++nvc0_blitter_make_vp(struct pipe_context *pipe)
+ {
+-   static const uint32_t code_nvc0[] =
+-   {
+-      0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
+-      0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
+-      0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
+-      0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
+-      0x00001de7, 0x80000000, /* exit */
+-   };
+-   static const uint32_t code_nve4[] =
+-   {
+-      0x00000007, 0x20000000, /* sched */
+-      0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
+-      0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
+-      0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
+-      0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
+-      0x00001de7, 0x80000000, /* exit */
+-   };
+-   static const uint32_t code_gk110[] =
+-   {
+-      0x00000000, 0x08000000, /* sched */
+-      0x401ffc12, 0x7ec7fc00, /* ld b64 $r4d a[0x80] 0x0 0x0 */
+-      0x481ffc02, 0x7ecbfc00, /* ld b96 $r0t a[0x90] 0x0 0x0 */
+-      0x381ffc12, 0x7f07fc00, /* st b64 a[0x70] $r4d 0x0 0x0 */
+-      0x401ffc02, 0x7f0bfc00, /* st b96 a[0x80] $r0t 0x0 0x0 */
+-      0x001c003c, 0x18000000, /* exit */
+-   };
+-   static const uint32_t code_gm107[] =
+-   {
+-      0xe4200701, 0x001d0400, /* sched (st 0x1 wr 0x0) (st 0x1 wr 0x1) (st 0x1 wr 0x2) */
+-      0x0807ff00, 0xefd87f80, /* ld b32 $r0 a[0x80] 0x0 */
+-      0x0847ff01, 0xefd87f80, /* ld b32 $r1 a[0x84] 0x0 */
+-      0x0907ff02, 0xefd87f80, /* ld b32 $r2 a[0x90] 0x0 */
+-      0xf0200761, 0x003f8400, /* sched (st 0x1 wr 0x3) (st 0x1 wr 0x4) (st 0x1 wt 0x1) */
+-      0x0947ff03, 0xefd87f80, /* ld b32 $r3 a[0x94] 0x0 */
+-      0x0987ff04, 0xefd87f80, /* ld b32 $r4 a[0x98] 0x0 */
+-      0x0707ff00, 0xeff07f80, /* st b32 a[0x70] $r0 0x0 */
+-      0xfc2017e1, 0x011f8404, /* sched (st 0x1 wt 0x2) (st 0x1 wt 0x4) (st 0x1 wt 0x8) */
+-      0x0747ff01, 0xeff07f80, /* st b32 a[0x74] $r1 0x0 */
+-      0x0807ff02, 0xeff07f80, /* st b32 a[0x80] $r2 0x0 */
+-      0x0847ff03, 0xeff07f80, /* st b32 a[0x84] $r3 0x0 */
+-      0xfde087e1, 0x001f8000, /* sched (st 0x1 wt 0x10) (st 0xf) (st 0x0) */
+-      0x0887ff04, 0xeff07f80, /* st b32 a[0x88] $r4 0x0 */
+-      0x0007000f, 0xe3000000, /* exit */
+-   };
+-
+-   blit->vp.type = PIPE_SHADER_VERTEX;
+-   blit->vp.translated = true;
+-   if (blit->screen->base.class_3d >= GM107_3D_CLASS) {
+-      blit->vp.code = (uint32_t *)code_gm107; /* const_cast */
+-      blit->vp.code_size = sizeof(code_gm107);
+-   } else
+-   if (blit->screen->base.class_3d >= NVF0_3D_CLASS) {
+-      blit->vp.code = (uint32_t *)code_gk110; /* const_cast */
+-      blit->vp.code_size = sizeof(code_gk110);
+-   } else
+-   if (blit->screen->base.class_3d >= NVE4_3D_CLASS) {
+-      blit->vp.code = (uint32_t *)code_nve4; /* const_cast */
+-      blit->vp.code_size = sizeof(code_nve4);
+-   } else {
+-      blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
+-      blit->vp.code_size = sizeof(code_nvc0);
+-   }
+-   blit->vp.num_gprs = 6;
+-   blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
++   struct ureg_program *ureg;
++   struct ureg_src ipos, itex;
++   struct ureg_dst opos, otex;
++
++   ureg = ureg_create(PIPE_SHADER_VERTEX);
++   if (!ureg)
++      return NULL;
++
++   opos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
++   ipos = ureg_DECL_vs_input(ureg, 0);
++   otex = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
++   itex = ureg_DECL_vs_input(ureg, 1);
++
++   ureg_MOV(ureg, ureg_writemask(opos, TGSI_WRITEMASK_XY ), ipos);
++   ureg_MOV(ureg, ureg_writemask(otex, TGSI_WRITEMASK_XYZ), itex);
++   ureg_END(ureg);
+ 
+-   blit->vp.hdr[0]  = 0x00020461; /* vertprog magic */
+-   blit->vp.hdr[4]  = 0x000ff000; /* no outputs read */
+-   blit->vp.hdr[6]  = 0x00000073; /* a[0x80].xy, a[0x90].xyz */
+-   blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */
++   return ureg_create_shader_and_destroy(ureg, pipe);
+ }
+ 
+ static void
+@@ -910,6 +867,20 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
+       G80_TSC_1_MIP_FILTER_NONE;
+ }
+ 
++static void
++nvc0_blit_select_vp(struct nvc0_blitctx *ctx)
++{
++   struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter;
++
++   if (!blitter->vp) {
++      mtx_lock(&blitter->mutex);
++      if (!blitter->vp)
++         blitter->vp = nvc0_blitter_make_vp(&ctx->nvc0->base.pipe);
++      mtx_unlock(&blitter->mutex);
++   }
++   ctx->vp = blitter->vp;
++}
++
+ static void
+ nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info)
+ {
+@@ -1082,7 +1053,7 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx,
+ 
+    nvc0->rast = &ctx->rast;
+ 
+-   nvc0->vertprog = &blitter->vp;
++   nvc0->vertprog = ctx->vp;
+    nvc0->tctlprog = NULL;
+    nvc0->tevlprog = NULL;
+    nvc0->gmtyprog = NULL;
+@@ -1221,6 +1192,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+    blit->filter = nv50_blit_get_filter(info);
+    blit->render_condition_enable = info->render_condition_enable;
+ 
++   nvc0_blit_select_vp(blit);
+    nvc0_blit_select_fp(blit, info);
+    nvc0_blitctx_pre_blit(blit, info);
+ 
+@@ -1266,6 +1238,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+       }
+    }
+ 
++   if (screen->eng3d->oclass >= TU102_3D_CLASS) {
++      IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE),
++                 util_format_is_depth_or_stencil(info->dst.format));
++   }
++
+    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
+    IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
+               NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
+@@ -1326,7 +1303,10 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+    PUSH_DATAh(push, vtxbuf);
+    PUSH_DATA (push, vtxbuf);
+    PUSH_DATA (push, 0);
+-   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
++   if (screen->eng3d->oclass < TU102_3D_CLASS)
++      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
++   else
++      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+    PUSH_DATAh(push, vtxbuf + length - 1);
+    PUSH_DATA (push, vtxbuf + length - 1);
+ 
+@@ -1403,6 +1383,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+ 
+    /* restore viewport transform */
+    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
++   if (screen->eng3d->oclass >= TU102_3D_CLASS)
++      IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0);
+ }
+ 
+ static void
+@@ -1697,7 +1679,6 @@ nvc0_blitter_create(struct nvc0_screen *screen)
+ 
+    (void) mtx_init(&screen->blitter->mutex, mtx_plain);
+ 
+-   nvc0_blitter_make_vp(screen->blitter);
+    nvc0_blitter_make_sampler(screen->blitter);
+ 
+    return true;
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+index 92bd7eb5b8e..8287d8431b1 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+@@ -360,7 +360,11 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
+          PUSH_DATAh(push, res->address + offset);
+          PUSH_DATA (push, res->address + offset);
+       }
+-      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
++
++      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
++         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
++      else
++         BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+       PUSH_DATAh(push, res->address + limit);
+       PUSH_DATA (push, res->address + limit);
+ 
+@@ -406,7 +410,11 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
+       PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+       PUSH_DATAh(push, buf->address + offset);
+       PUSH_DATA (push, buf->address + offset);
+-      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
++
++      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
++         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
++      else
++         BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+       PUSH_DATAh(push, buf->address + limit);
+       PUSH_DATA (push, buf->address + limit);
+ 
+@@ -961,12 +969,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+       assert(nouveau_resource_mapped_by_gpu(&buf->base));
+ 
+       PUSH_SPACE(push, 6);
+-      BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
+-      PUSH_DATAh(push, buf->address);
+-      PUSH_DATA (push, buf->address);
+-      PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+-      PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+-      PUSH_DATA (push, info->index_size >> 1);
++      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) {
++         BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
++         PUSH_DATAh(push, buf->address);
++         PUSH_DATA (push, buf->address);
++         PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
++         PUSH_DATA (push, buf->address + buf->base.width0 - 1);
++         PUSH_DATA (push, info->index_size >> 1);
++      } else {
++         BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 2);
++         PUSH_DATAh(push, buf->address);
++         PUSH_DATA (push, buf->address);
++         BEGIN_NVC0(push, SUBC_3D(TU102_3D_INDEX_ARRAY_LIMIT_HIGH), 2);
++         PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
++         PUSH_DATA (push, buf->address + buf->base.width0 - 1);
++         BEGIN_NVC0(push, NVC0_3D(INDEX_FORMAT), 1);
++         PUSH_DATA (push, info->index_size >> 1);
++      }
+ 
+       BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
+    }
+diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+index 8aa7088dfec..d49a5dfd2cf 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+@@ -228,7 +228,11 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
+    BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
+    PUSH_DATAh(push, va);
+    PUSH_DATA (push, va);
+-   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
++
++   if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
++      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
++   else
++      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+    PUSH_DATAh(push, va + size - 1);
+    PUSH_DATA (push, va + size - 1);
+ 
+@@ -771,7 +775,11 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx,
+    PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
+    PUSH_DATAh(push, va);
+    PUSH_DATA (push, va);
+-   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
++
++   if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
++      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
++   else
++      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+    PUSH_DATAh(push, va + info->count * index_size - 1);
+    PUSH_DATA (push, va + info->count * index_size - 1);
+ 
+diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+index 146eeb35f85..d4687b652ba 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
++++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+@@ -27,11 +27,18 @@
+ 
+ #include "codegen/nv50_ir_driver.h"
+ 
+-#ifndef NDEBUG
+-static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
+-static void gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *);
+-#endif
+-
++#include "drf.h"
++#include "qmd.h"
++#include "cla0c0qmd.h"
++#include "clc0c0qmd.h"
++#include "clc3c0qmd.h"
++
++#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
++#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
++#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
++#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
++#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
++#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
+ 
+ int
+ nve4_screen_compute_setup(struct nvc0_screen *screen,
+@@ -45,6 +52,12 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
+    uint64_t address;
+ 
+    switch (dev->chipset & ~0xf) {
++   case 0x160:
++      obj_class = TU102_COMPUTE_CLASS;
++      break;
++   case 0x140:
++      obj_class = GV100_COMPUTE_CLASS;
++      break;
+    case 0x100:
+    case 0xf0:
+       obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
+@@ -88,24 +101,35 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
+    PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+    PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+    PUSH_DATA (push, 0xff);
+-   BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
+-   PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+-   PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+-   PUSH_DATA (push, 0xff);
++   if (obj_class < GV100_COMPUTE_CLASS) {
++      BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
++      PUSH_DATAh(push, screen->tls->size / screen->mp_count);
++      PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
++      PUSH_DATA (push, 0xff);
++   }
+ 
+    /* Unified address space ? Who needs that ? Certainly not OpenCL.
+     *
+     * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
+     *  accessible. We cannot prevent that at the moment, so expect failure.
+     */
+-   BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
+-   PUSH_DATA (push, 0xff << 24);
+-   BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
+-   PUSH_DATA (push, 0xfe << 24);
+-
+-   BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
+-   PUSH_DATAh(push, screen->text->offset);
+-   PUSH_DATA (push, screen->text->offset);
++   if (obj_class < GV100_COMPUTE_CLASS) {
++      BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
++      PUSH_DATA (push, 0xff << 24);
++      BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
++      PUSH_DATA (push, 0xfe << 24);
++
++      BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
++      PUSH_DATAh(push, screen->text->offset);
++      PUSH_DATA (push, screen->text->offset);
++   } else {
++      BEGIN_NVC0(push, SUBC_CP(0x2a0), 2);
++      PUSH_DATAh(push, 0xfeULL << 24);
++      PUSH_DATA (push, 0xfeULL << 24);
++      BEGIN_NVC0(push, SUBC_CP(0x7b0), 2);
++      PUSH_DATAh(push, 0xffULL << 24);
++      PUSH_DATA (push, 0xffULL << 24);
++   }
+ 
+    BEGIN_NVC0(push, SUBC_CP(0x0310), 1);
+    PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
+@@ -542,14 +566,35 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
+    PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+ }
+ 
+-static inline uint8_t
+-nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
++static inline void
++gp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
++                            struct nouveau_bo *bo, uint32_t base, uint32_t size)
++{
++   uint64_t address = bo->offset + base;
++
++   assert(index < 8);
++   assert(!(base & 0xff));
++
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index,
++                                 DIV_ROUND_UP(size, 16));
++   NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
++}
++
++static inline void
++nve4_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, struct nouveau_bo *bo,
++                           uint32_t base, uint32_t size)
+ {
+-   if (shared_size > (32 << 10))
+-      return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
+-   if (shared_size > (16 << 10))
+-      return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
+-   return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
++   uint64_t address = bo->offset + base;
++
++   assert(index < 8);
++   assert(!(base & 0xff));
++
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
+ }
+ 
+ static void
+@@ -577,92 +622,182 @@ nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc)
+ }
+ 
+ static void
+-nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
+-                               struct nve4_cp_launch_desc *desc,
++nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
+                                const struct pipe_grid_info *info)
+ {
+    const struct nvc0_screen *screen = nvc0->screen;
+    const struct nvc0_program *cp = nvc0->compprog;
+ 
+-   nve4_cp_launch_desc_init_default(desc);
+-
+-   desc->entry = nvc0_program_symbol_offset(cp, info->pc);
+-
+-   desc->griddim_x = info->grid[0];
+-   desc->griddim_y = info->grid[1];
+-   desc->griddim_z = info->grid[2];
+-   desc->blockdim_x = info->block[0];
+-   desc->blockdim_y = info->block[1];
+-   desc->blockdim_z = info->block[2];
+-
+-   desc->shared_size = align(cp->cp.smem_size, 0x100);
+-   desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10);
+-   desc->local_size_n = 0;
+-   desc->cstack_size = 0x800;
+-   desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);
++   NVA0C0_QMDV00_06_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
++
++   NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET,
++                                 nvc0_program_symbol_offset(cp, info->pc));
++
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
++
++   NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE,
++                                 align(cp->cp.smem_size, 0x100));
++   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
++                                 (cp->hdr[1] & 0xfffff0) +
++                                 align(cp->cp.lmem_size, 0x10));
++   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
++
++   if (cp->cp.smem_size > (32 << 10))
++      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
++                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
++   else
++   if (cp->cp.smem_size > (16 << 10))
++      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
++                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
++   else
++      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
++                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
+ 
+-   desc->gpr_alloc = cp->num_gprs;
+-   desc->bar_alloc = cp->num_barriers;
++   NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);
++   NVA0C0_QMDV00_06_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
+ 
+    // Only bind user uniforms and the driver constant buffer through the
+    // launch descriptor because UBOs are sticked to the driver cb to avoid the
+    // limitation of 8 CBs.
+    if (nvc0->constbuf[5][0].user || cp->parm_size) {
+-      nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
++      nve4_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
+                                  NVC0_CB_USR_INFO(5), 1 << 16);
+ 
+       // Later logic will attempt to bind a real buffer at position 0. That
+       // should not happen if we've bound a user buffer.
+       assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
+    }
+-   nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
++   nve4_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
+                               NVC0_CB_AUX_INFO(5), 1 << 11);
+ 
+-   nve4_compute_setup_buf_cb(nvc0, false, desc);
++   nve4_compute_setup_buf_cb(nvc0, false, qmd);
+ }
+ 
+ static void
+-gp100_compute_setup_launch_desc(struct nvc0_context *nvc0,
+-                                struct gp100_cp_launch_desc *desc,
++gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
+                                 const struct pipe_grid_info *info)
+ {
+    const struct nvc0_screen *screen = nvc0->screen;
+    const struct nvc0_program *cp = nvc0->compprog;
+ 
+-   gp100_cp_launch_desc_init_default(desc);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
++   NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
++   NVC0C0_QMDV02_01_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);
++   NVC0C0_QMDV02_01_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
++
++   NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET,
++                                 nvc0_program_symbol_offset(cp, info->pc));
++
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
++
++   NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE,
++                                 align(cp->cp.smem_size, 0x100));
++   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
++                                 (cp->hdr[1] & 0xfffff0) +
++                                 align(cp->cp.lmem_size, 0x10));
++   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
+ 
+-   desc->entry = nvc0_program_symbol_offset(cp, info->pc);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);
++   NVC0C0_QMDV02_01_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
+ 
+-   desc->griddim_x = info->grid[0];
+-   desc->griddim_y = info->grid[1];
+-   desc->griddim_z = info->grid[2];
+-   desc->blockdim_x = info->block[0];
+-   desc->blockdim_y = info->block[1];
+-   desc->blockdim_z = info->block[2];
++   // Only bind user uniforms and the driver constant buffer through the
++   // launch descriptor because UBOs are sticked to the driver cb to avoid the
++   // limitation of 8 CBs.
++   if (nvc0->constbuf[5][0].user || cp->parm_size) {
++      gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
++                                  NVC0_CB_USR_INFO(5), 1 << 16);
+ 
+-   desc->shared_size = align(cp->cp.smem_size, 0x100);
+-   desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10);
+-   desc->local_size_n = 0;
+-   desc->cstack_size = 0x800;
++      // Later logic will attempt to bind a real buffer at position 0. That
++      // should not happen if we've bound a user buffer.
++      assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
++   }
++   gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
++                               NVC0_CB_AUX_INFO(5), 1 << 11);
++
++   nve4_compute_setup_buf_cb(nvc0, true, qmd);
++}
++
++static int
++gv100_sm_config_smem_size(u32 size)
++{
++   if      (size > 64 * 1024) size = 96 * 1024;
++   else if (size > 32 * 1024) size = 64 * 1024;
++   else if (size > 16 * 1024) size = 32 * 1024;
++   else if (size >  8 * 1024) size = 16 * 1024;
++   else                       size =  8 * 1024;
++   return (size / 4096) + 1;
++}
+ 
+-   desc->gpr_alloc = cp->num_gprs;
+-   desc->bar_alloc = cp->num_barriers;
++static void
++gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,
++                                const struct pipe_grid_info *info)
++{
++   struct nvc0_program *cp = nvc0->compprog;
++   struct nvc0_screen *screen = nvc0->screen;
++   uint64_t entry =
++      screen->text->offset + nvc0_program_symbol_offset(cp, info->pc);
++
++   NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
++   NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
++   NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, VIA_HEADER_INDEX);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
++                                  align(cp->cp.smem_size, 0x100));
++   NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
++                                  gv100_sm_config_smem_size(8 * 1024));
++   NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
++                                  gv100_sm_config_smem_size(96 * 1024));
++   NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
++                                  gv100_sm_config_smem_size(cp->cp.smem_size));
++
++   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
+ 
+    // Only bind user uniforms and the driver constant buffer through the
+    // launch descriptor because UBOs are sticked to the driver cb to avoid the
+    // limitation of 8 CBs.
+    if (nvc0->constbuf[5][0].user || cp->parm_size) {
+-      gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
++      gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
+                                   NVC0_CB_USR_INFO(5), 1 << 16);
+ 
+       // Later logic will attempt to bind a real buffer at position 0. That
+       // should not happen if we've bound a user buffer.
+       assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
+    }
+-   gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
++   gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
+                                NVC0_CB_AUX_INFO(5), 1 << 11);
+ 
+-   nve4_compute_setup_buf_cb(nvc0, true, desc);
++   nve4_compute_setup_buf_cb(nvc0, true, qmd);
++
++   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff);
++   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32);
+ }
+ 
+ static inline void *
+@@ -677,6 +812,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
+       ptr += adj;
+       *pgpuaddr += adj;
+    }
++   memset(ptr, 0x00, 256);
+    return ptr;
+ }
+ 
+@@ -734,6 +870,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
+    if (ret)
+       goto out;
+ 
++   if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
++      gv100_compute_setup_launch_desc(nvc0, desc, info);
++   else
+    if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
+       gp100_compute_setup_launch_desc(nvc0, desc, info);
+    else
+@@ -743,10 +882,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
+ 
+ #ifndef NDEBUG
+    if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
++      debug_printf("Queue Meta Data:\n");
++      if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
++         NVC3C0QmdDump_V02_02(desc);
++      else
+       if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
+-         gp100_compute_dump_launch_desc(desc);
++         NVC0C0QmdDump_V02_01(desc);
+       else
+-         nve4_compute_dump_launch_desc(desc);
++         NVA0C0QmdDump_V00_06(desc);
+    }
+ #endif
+ 
+@@ -877,115 +1020,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
+    nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
+ }
+ 
+-
+-#ifndef NDEBUG
+-static const char *nve4_cache_split_name(unsigned value)
+-{
+-   switch (value) {
+-   case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
+-   case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
+-   case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
+-   default:
+-      return "(invalid)";
+-   }
+-}
+-
+-static void
+-nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
+-{
+-   const uint32_t *data = (const uint32_t *)desc;
+-   unsigned i;
+-   bool zero = false;
+-
+-   debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
+-
+-   for (i = 0; i < sizeof(*desc); i += 4) {
+-      if (data[i / 4]) {
+-         debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
+-         zero = false;
+-      } else
+-      if (!zero) {
+-         debug_printf("...\n");
+-         zero = true;
+-      }
+-   }
+-
+-   debug_printf("entry = 0x%x\n", desc->entry);
+-   debug_printf("grid dimensions = %ux%ux%u\n",
+-                desc->griddim_x, desc->griddim_y, desc->griddim_z);
+-   debug_printf("block dimensions = %ux%ux%u\n",
+-                desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
+-   debug_printf("s[] size: 0x%x\n", desc->shared_size);
+-   debug_printf("l[] size: -0x%x / +0x%x\n",
+-                desc->local_size_n, desc->local_size_p);
+-   debug_printf("stack size: 0x%x\n", desc->cstack_size);
+-   debug_printf("barrier count: %u\n", desc->bar_alloc);
+-   debug_printf("$r count: %u\n", desc->gpr_alloc);
+-   debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
+-   debug_printf("linked tsc: %d\n", desc->linked_tsc);
+-
+-   for (i = 0; i < 8; ++i) {
+-      uint64_t address;
+-      uint32_t size = desc->cb[i].size;
+-      bool valid = !!(desc->cb_mask & (1 << i));
+-
+-      address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
+-
+-      if (!valid && !address && !size)
+-         continue;
+-      debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
+-                   i, address, size, valid ? "" : "  (invalid)");
+-   }
+-}
+-
+-static void
+-gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *desc)
+-{
+-   const uint32_t *data = (const uint32_t *)desc;
+-   unsigned i;
+-   bool zero = false;
+-
+-   debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
+-
+-   for (i = 0; i < sizeof(*desc); i += 4) {
+-      if (data[i / 4]) {
+-         debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
+-         zero = false;
+-      } else
+-      if (!zero) {
+-         debug_printf("...\n");
+-         zero = true;
+-      }
+-   }
+-
+-   debug_printf("entry = 0x%x\n", desc->entry);
+-   debug_printf("grid dimensions = %ux%ux%u\n",
+-                desc->griddim_x, desc->griddim_y, desc->griddim_z);
+-   debug_printf("block dimensions = %ux%ux%u\n",
+-                desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
+-   debug_printf("s[] size: 0x%x\n", desc->shared_size);
+-   debug_printf("l[] size: -0x%x / +0x%x\n",
+-                desc->local_size_n, desc->local_size_p);
+-   debug_printf("stack size: 0x%x\n", desc->cstack_size);
+-   debug_printf("barrier count: %u\n", desc->bar_alloc);
+-   debug_printf("$r count: %u\n", desc->gpr_alloc);
+-   debug_printf("linked tsc: %d\n", desc->linked_tsc);
+-
+-   for (i = 0; i < 8; ++i) {
+-      uint64_t address;
+-      uint32_t size = desc->cb[i].size_sh4 << 4;
+-      bool valid = !!(desc->cb_mask & (1 << i));
+-
+-      address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
+-
+-      if (!valid && !address && !size)
+-         continue;
+-      debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
+-                   i, address, size, valid ? "" : "  (invalid)");
+-   }
+-}
+-#endif
+-
+ #ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
+ static void
+ nve4_compute_trap_info(struct nvc0_context *nvc0)
+diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+index 7ff6935cc3d..d2599f7a71d 100644
+--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
++++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+@@ -4,142 +4,6 @@
+ 
+ #include "nvc0/nve4_compute.xml.h"
+ 
+-struct nve4_cp_launch_desc
+-{
+-   u32 unk0[8];
+-   u32 entry;
+-   u32 unk9[2];
+-   u32 unk11_0      : 30;
+-   u32 linked_tsc   : 1;
+-   u32 unk11_31     : 1;
+-   u32 griddim_x    : 31;
+-   u32 unk12        : 1;
+-   u16 griddim_y;
+-   u16 griddim_z;
+-   u32 unk14[3];
+-   u16 shared_size; /* must be aligned to 0x100 */
+-   u16 unk17;
+-   u16 unk18;
+-   u16 blockdim_x;
+-   u16 blockdim_y;
+-   u16 blockdim_z;
+-   u32 cb_mask      : 8;
+-   u32 unk20_8      : 21;
+-   u32 cache_split  : 2;
+-   u32 unk20_31     : 1;
+-   u32 unk21[8];
+-   struct {
+-      u32 address_l;
+-      u32 address_h : 8;
+-      u32 reserved  : 7;
+-      u32 size      : 17;
+-   } cb[8];
+-   u32 local_size_p : 20;
+-   u32 unk45_20     : 7;
+-   u32 bar_alloc    : 5;
+-   u32 local_size_n : 20;
+-   u32 unk46_20     : 4;
+-   u32 gpr_alloc    : 8;
+-   u32 cstack_size  : 20;
+-   u32 unk47_20     : 12;
+-   u32 unk48[16];
+-};
+-
+-struct gp100_cp_launch_desc
+-{
+-   u32 unk0[8];
+-   u32 entry;
+-   u32 unk9[2];
+-   u32 unk11_0      : 30;
+-   u32 linked_tsc   : 1;
+-   u32 unk11_31     : 1;
+-   u32 griddim_x    : 31;
+-   u32 unk12        : 1;
+-   u16 griddim_y;
+-   u16 unk13;
+-   u16 griddim_z;
+-   u16 unk14;
+-   u32 unk15[2];
+-   u32 shared_size  : 18;
+-   u32 unk17        : 14;
+-   u16 unk18;
+-   u16 blockdim_x;
+-   u16 blockdim_y;
+-   u16 blockdim_z;
+-   u32 cb_mask      : 8;
+-   u32 unk20        : 24;
+-   u32 unk21[8];
+-   u32 local_size_p : 24;
+-   u32 unk29        : 3;
+-   u32 bar_alloc    : 5;
+-   u32 local_size_n : 24;
+-   u32 gpr_alloc    : 8;
+-   u32 cstack_size  : 24;
+-   u32 unk31        : 8;
+-   struct {
+-      u32 address_l;
+-      u32 address_h : 17;
+-      u32 reserved  : 2;
+-      u32 size_sh4  : 13;
+-   } cb[8];
+-   u32 unk48[16];
+-};
+-
+-static inline void
+-nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
+-{
+-   memset(desc, 0, sizeof(*desc));
+-
+-   desc->unk0[7]  = 0xbc000000;
+-   desc->unk11_0  = 0x04014000;
+-   desc->unk47_20 = 0x300;
+-}
+-
+-static inline void
+-nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
+-                           unsigned index,
+-                           struct nouveau_bo *bo,
+-                           uint32_t base, uint32_t size)
+-{
+-   uint64_t address = bo->offset + base;
+-
+-   assert(index < 8);
+-   assert(!(base & 0xff));
+-
+-   desc->cb[index].address_l = address;
+-   desc->cb[index].address_h = address >> 32;
+-   desc->cb[index].size = size;
+-
+-   desc->cb_mask |= 1 << index;
+-}
+-
+-static inline void
+-gp100_cp_launch_desc_init_default(struct gp100_cp_launch_desc *desc)
+-{
+-   memset(desc, 0, sizeof(*desc));
+-
+-   desc->unk0[4]  = 0x40;
+-   desc->unk11_0  = 0x04014000;
+-}
+-
+-static inline void
+-gp100_cp_launch_desc_set_cb(struct gp100_cp_launch_desc *desc,
+-                            unsigned index,
+-                            struct nouveau_bo *bo,
+-                            uint32_t base, uint32_t size)
+-{
+-   uint64_t address = bo->offset + base;
+-
+-   assert(index < 8);
+-   assert(!(base & 0xff));
+-
+-   desc->cb[index].address_l = address;
+-   desc->cb[index].address_h = address >> 32;
+-   desc->cb[index].size_sh4 = DIV_ROUND_UP(size, 16);
+-
+-   desc->cb_mask |= 1 << index;
+-}
+-
+ struct nve4_mp_trap_info {
+    u32 lock;
+    u32 pc;
+diff --git a/src/gallium/drivers/nouveau/nvc0/qmd.h b/src/gallium/drivers/nouveau/nvc0/qmd.h
+new file mode 100644
+index 00000000000..86c290fe836
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/qmd.h
+@@ -0,0 +1,68 @@
++#ifndef __NVHW_QMD_H__
++#define __NVHW_QMD_H__
++#include <stdio.h>
++#include <stdint.h>
++#include "util/u_debug.h"
++#include "drf.h"
++
++#define NVQMD_ENUM_1(X,drf,v0)                                                 \
++   [drf##_##v0] = #v0
++#define NVQMD_ENUM_2(X,drf,v0,v1)                                              \
++   [drf##_##v0] = #v0,                                                         \
++   [drf##_##v1] = #v1
++#define NVQMD_ENUM_3(X,drf,v0,v1,v2)                                           \
++   [drf##_##v0] = #v0,                                                         \
++   [drf##_##v1] = #v1,                                                         \
++   [drf##_##v2] = #v2
++#define NVQMD_ENUM_8(X,drf,v0,v1,v2,v3,v4,v5,v6,v7)                            \
++   [drf##_##v0] = #v0,                                                         \
++   [drf##_##v1] = #v1,                                                         \
++   [drf##_##v2] = #v2,                                                         \
++   [drf##_##v3] = #v3,                                                         \
++   [drf##_##v4] = #v4,                                                         \
++   [drf##_##v5] = #v5,                                                         \
++   [drf##_##v6] = #v6,                                                         \
++   [drf##_##v7] = #v7
++
++#define NVQMD_ENUM_(X,_1,_2,_3,_4,_5,_6,_7,_8,_9,IMPL,...) IMPL
++#define NVQMD_ENUM(A...) NVQMD_ENUM_(X, ##A, NVQMD_ENUM_8, NVQMD_ENUM_7,       \
++                                             NVQMD_ENUM_6, NVQMD_ENUM_5,       \
++                                             NVQMD_ENUM_4, NVQMD_ENUM_3,       \
++                                             NVQMD_ENUM_2, NVQMD_ENUM_1)(X, ##A)
++
++#define NVQMD_VAL_N(X,d,r,p,f,o) do {                                          \
++   uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f);                          \
++   debug_printf("   %-36s: "o"\n", #f, val);                                   \
++} while(0)
++#define NVQMD_VAL_I(X,d,r,p,f,i,o) do {                                        \
++   uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f(i));                       \
++   char name[80];                                                              \
++   snprintf(name, sizeof(name), "%s(%d)", #f, i);                              \
++   debug_printf("   %-36s: "o"\n", name, val);                                 \
++} while(0)
++#define NVQMD_VAL_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
++#define NVQMD_VAL(A...) NVQMD_VAL_(X, ##A, NVQMD_VAL_I, NVQMD_VAL_N)(X, ##A)
++
++#define NVQMD_DEF(d,r,p,f,e...) do {                                           \
++   static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) };                \
++   uint32_t val = NVVAL_MW_GET((p), d, r, f);                                  \
++   if (val < ARRAY_SIZE(ev) && ev[val])                                        \
++      debug_printf("   %-36s: %s\n", #f, ev[val]);                             \
++   else                                                                        \
++      debug_printf("   %-36s: UNKNOWN 0x%x\n", #f, val);                       \
++} while(0)
++#define NVQMD_IDX(d,r,p,f,i,e...) do {                                         \
++   static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) };                \
++   char name[80];                                                              \
++   snprintf(name, sizeof(name), "%s(%d)", #f, i);                              \
++   uint32_t val = NVVAL_MW_GET((p), d, r, f, i);                               \
++   if (val < ARRAY_SIZE(ev) && ev[val])                                        \
++      debug_printf("   %-36s: %s\n", name, ev[val]);                           \
++   else                                                                        \
++      debug_printf("   %-36s: UNKNOWN 0x%x\n", name, val);                     \
++} while(0)
++
++void NVA0C0QmdDump_V00_06(uint32_t *);
++void NVC0C0QmdDump_V02_01(uint32_t *);
++void NVC3C0QmdDump_V02_02(uint32_t *);
++#endif
+diff --git a/src/gallium/drivers/nouveau/nvc0/qmda0c0.c b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c
+new file mode 100644
+index 00000000000..7103a893af5
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c
+@@ -0,0 +1,166 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#include "qmd.h"
++#include "cla0c0qmd.h"
++
++#define NVA0C0_QMDV00_06_VAL(a...) NVQMD_VAL(NVA0C0, QMDV00_06, ##a)
++#define NVA0C0_QMDV00_06_DEF(a...) NVQMD_DEF(NVA0C0, QMDV00_06, ##a)
++#define NVA0C0_QMDV00_06_IDX(a...) NVQMD_IDX(NVA0C0, QMDV00_06, ##a)
++
++void
++NVA0C0QmdDump_V00_06(uint32_t *qmd)
++{
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_A, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_B, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_C, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_D, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_E, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_F, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_G, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_H, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A_A, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_I, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_J, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_K, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_L, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_B, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_M, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_N, FALSE, TRUE);
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_O, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_C, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
++   NVA0C0_QMDV00_06_VAL(qmd, PROGRAM_OFFSET, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_P, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Q, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_D, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_R, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_S, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
++   NVA0C0_QMDV00_06_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
++   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_T, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_U, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, THROTTLED, FALSE, TRUE);
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_A, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_B, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
++   NVA0C0_QMDV00_06_DEF(qmd, SHARED_MEMORY_BANK_MAPPING, FOUR_BYTES_PER_BANK,
++                                                         EIGHT_BYTES_PER_BANK);
++   NVA0C0_QMDV00_06_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E3_A, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_V, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_F, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_W, FALSE, TRUE);
++   NVA0C0_QMDV00_06_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_G, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_VERSION, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_H, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
++   for (int i = 0; i < 8; i++)
++      NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_I, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, L1_CONFIGURATION,
++                             DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB,
++                             DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB,
++                             DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_X, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Y, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_J, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
++                                                    RED_MIN,
++                                                    RED_MAX,
++                                                    RED_INC,
++                                                    RED_DEC,
++                                                    RED_AND,
++                                                    RED_OR,
++                                                    RED_XOR);
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_K, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
++   NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_L, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
++                                                    RED_MIN,
++                                                    RED_MAX,
++                                                    RED_INC,
++                                                    RED_DEC,
++                                                    RED_AND,
++                                                    RED_OR,
++                                                    RED_XOR);
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_M, "0x%x");
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
++   NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
++   NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
++   for (int i = 0; i < 8; i++) {
++      NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
++      NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
++      NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
++      NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
++      NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_SIZE, i, "0x%x");
++   }
++   NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_N, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, BARRIER_COUNT, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, REGISTER_COUNT, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, SASS_VERSION, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_A, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_B, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_C, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_D, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_E, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_F, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_G, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_H, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_I, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_J, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_K, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_L, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_M, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_N, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
++   NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
++}
+diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c
+new file mode 100644
+index 00000000000..945439ee0c8
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c
+@@ -0,0 +1,165 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#include "qmd.h"
++#include "clc0c0qmd.h"
++
++#define NVC0C0_QMDV02_01_VAL(a...) NVQMD_VAL(NVC0C0, QMDV02_01, ##a)
++#define NVC0C0_QMDV02_01_DEF(a...) NVQMD_DEF(NVC0C0, QMDV02_01, ##a)
++#define NVC0C0_QMDV02_01_IDX(a...) NVQMD_IDX(NVC0C0, QMDV02_01, ##a)
++
++void
++NVC0C0QmdDump_V02_01(uint32_t *qmd)
++{
++   NVC0C0_QMDV02_01_VAL(qmd, OUTER_PUT, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, OUTER_OVERFLOW, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, OUTER_GET, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, INNER_GET, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, INNER_OVERFLOW, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, INNER_PUT, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_GROUP_ID, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x");
++   NVC0C0_QMDV02_01_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, IS_QUEUE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID);
++   NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE);
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_B, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_C, "0x%x");
++   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, PROGRAM_OFFSET, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_D, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x");
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
++   NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
++   NVC0C0_QMDV02_01_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, THROTTLED, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
++   NVC0C0_QMDV02_01_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED13A, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED14A, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_G, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_VERSION, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_H, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
++   for (int i = 0; i < 8; i++)
++      NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_I, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_J, "0x%x");
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
++                                                    RED_MIN,
++                                                    RED_MAX,
++                                                    RED_INC,
++                                                    RED_DEC,
++                                                    RED_AND,
++                                                    RED_OR,
++                                                    RED_XOR);
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_K, "0x%x");
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
++   NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_L, "0x%x");
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
++                                                    RED_MIN,
++                                                    RED_MAX,
++                                                    RED_INC,
++                                                    RED_DEC,
++                                                    RED_AND,
++                                                    RED_OR,
++                                                    RED_XOR);
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_M, "0x%x");
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
++   NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
++   NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_N, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, BARRIER_COUNT, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, REGISTER_COUNT, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, SASS_VERSION, "0x%x");
++   for (int i = 0; i < 8; i++) {
++      NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
++      NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
++      NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
++      NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
++      NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x");
++   }
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_R, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_S, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_GET, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_Q, "0x%x");
++   NVC0C0_QMDV02_01_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE);
++   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_G, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_H, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_I, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_J, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_K, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_L, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_M, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_N, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
++   NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
++}
+diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
+new file mode 100644
+index 00000000000..c9bd8966114
+--- /dev/null
++++ b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
+@@ -0,0 +1,168 @@
++/*
++ * Copyright 2020 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++#include "qmd.h"
++#include "clc3c0qmd.h"
++
++#define NVC3C0_QMDV02_02_VAL(a...) NVQMD_VAL(NVC3C0, QMDV02_02, ##a)
++#define NVC3C0_QMDV02_02_DEF(a...) NVQMD_DEF(NVC3C0, QMDV02_02, ##a)
++#define NVC3C0_QMDV02_02_IDX(a...) NVQMD_IDX(NVC3C0, QMDV02_02, ##a)
++
++void
++NVC3C0QmdDump_V02_02(uint32_t *qmd)
++{
++   NVC3C0_QMDV02_02_VAL(qmd, OUTER_PUT, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, OUTER_OVERFLOW, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, OUTER_GET, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, INNER_GET, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, INNER_OVERFLOW, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, INNER_PUT, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_GROUP_ID, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x");
++   NVC3C0_QMDV02_02_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, IS_QUEUE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID);
++   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE);
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_B, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_C, "0x%x");
++   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_OFFSET, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_D, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x");
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
++   NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
++   NVC3C0_QMDV02_02_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
++   NVC3C0_QMDV02_02_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED13A, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED14A, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_VERSION, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_H, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
++   for (int i = 0; i < 8; i++)
++      NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
++   NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT_V, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, FREE_CTA_SLOTS_EMPTY_SM, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_J, "0x%x");
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
++                                                    RED_MIN,
++                                                    RED_MAX,
++                                                    RED_INC,
++                                                    RED_DEC,
++                                                    RED_AND,
++                                                    RED_OR,
++                                                    RED_XOR);
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_K, "0x%x");
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
++   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_L, "0x%x");
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
++                                                    RED_MIN,
++                                                    RED_MAX,
++                                                    RED_INC,
++                                                    RED_DEC,
++                                                    RED_AND,
++                                                    RED_OR,
++                                                    RED_XOR);
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_M, "0x%x");
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
++   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
++   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_N, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, BARRIER_COUNT, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, SASS_VERSION, "0x%x");
++   for (int i = 0; i < 8; i++) {
++      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
++      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
++      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
++      NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
++      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x");
++   }
++   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_LOWER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_UPPER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_S, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_GET, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_Q, "0x%x");
++   NVC3C0_QMDV02_02_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE);
++   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_G, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_H, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_I, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_J, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_K, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_L, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_M, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_N, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
++   NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
++}
+diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+index 5c43518afcb..d123c8a1c17 100644
+--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
++++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+@@ -104,6 +104,8 @@ nouveau_drm_screen_create(int fd)
+ 	case 0x110:
+ 	case 0x120:
+ 	case 0x130:
++	case 0x140:
++	case 0x160:
+ 		init = nvc0_screen_create;
+ 		break;
+ 	default:
diff --git a/SPECS/mesa.spec b/SPECS/mesa.spec
index 571fc8d..4e6c79d 100644
--- a/SPECS/mesa.spec
+++ b/SPECS/mesa.spec
@@ -12,6 +12,7 @@
 %define platform_drivers ,i965
 %define with_vmware 1
 %define with_xa     1
+%define with_iris   1
 %endif
 
 %ifarch %{ix86} x86_64
@@ -36,11 +37,11 @@
 
 %global sanitize 0
 
-%global rctag rc4
+#global rctag rc4
 
 Name:           mesa
 Summary:        Mesa graphics libraries
-Version:        19.3.0
+Version:        20.1.2
 Release:        3%{?rctag:.%{rctag}}%{?dist}
 
 License:        MIT
@@ -55,7 +56,15 @@ Source3:        Makefile
 # Fedora opts to ignore the optional part of clause 2 and treat that code as 2 clause BSD.
 Source4:        Mesa-MLAA-License-Clarification-Email.txt
 
-Patch1:		0001-llvmpipe-ppc-fix-if-ifdef-confusion-in-backport.patch
+# fix llvmpipe big-endian (#1847064)
+Patch1: 0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch
+Patch2: 0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch
+Patch3: 0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch
+
+# Add support for TU11x nvidia
+Patch10: 0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch
+Patch11: nouveau-tu1xx-support.patch
+
 BuildRequires:  gcc
 BuildRequires:  gcc-c++
 
@@ -323,7 +332,7 @@ export ASFLAGS="--generate-missing-build-notes=yes"
   -Ddri3=true \
   -Ddri-drivers=%{?dri_drivers} \
 %if 0%{?with_hardware}
-  -Dgallium-drivers=swrast,virgl,nouveau%{?with_vmware:,svga},radeonsi,r600%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_kmsro:,kmsro} \
+  -Dgallium-drivers=swrast%{?with_iris:,iris},virgl,nouveau%{?with_vmware:,svga},radeonsi,r600%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_kmsro:,kmsro} \
 %else
   -Dgallium-drivers=swrast,virgl \
 %endif
@@ -350,6 +359,7 @@ export ASFLAGS="--generate-missing-build-notes=yes"
   -Dbuild-tests=false \
   -Dselinux=true \
   -Dosmesa=gallium \
+  -Dvulkan-device-select-layer=true \
   %{nil}
 %meson_build
 
@@ -484,6 +494,7 @@ done
 %{_libdir}/dri/radeonsi_dri.so
 %ifarch %{ix86} x86_64
 %{_libdir}/dri/i965_dri.so
+%{_libdir}/dri/iris_dri.so
 %endif
 %if 0%{?with_vc4}
 %{_libdir}/dri/vc4_dri.so
@@ -532,12 +543,43 @@ done
 %{_datadir}/vulkan/icd.d/intel_icd.i686.json
 %{_datadir}/vulkan/icd.d/radeon_icd.i686.json
 %endif
+%{_libdir}/libVkLayer_MESA_device_select.so
+%{_datadir}/vulkan/implicit_layer.d/VkLayer_MESA_device_select.json
 
 %files vulkan-devel
 %{_includedir}/vulkan/
 %endif
 
 %changelog
+* Mon Jun 29 2020 Dave Airlie <airlied@redhat.com> - 20.1.2-3
+- a fix on top of the big-endian fix (#1847064)
+
+* Mon Jun 29 2020 Dave Airlie <airlied@redhat.com> - 20.1.2-2
+- add another fix for big-endian llvmpipe (#1847064)
+
+* Mon Jun 29 2020 Dave Airlie <airlied@redhat.com> - 20.1.2-1
+- Update to 20.1.2
+- add fix for big-endian llvmpipe (#1847064)
+
+* Thu Jun 11 2020 Dave Airlie <airlied@redhat.com> - 20.1.1-1
+- Update to 20.1.1
+- Add support for turing
+
+* Thu May 28 2020 Dave Airlie <airlied@redhat.com> - 20.1.0-1
+- Update to 20.1.0 final
+
+* Mon May 25 2020 Dave Airlie <airlied@redhat.com> - 20.1.0-0.1.rc4
+- Update to 20.1.0-rc4
+
+* Thu Feb 20 2020 Dave Airlie <airlied@redhat.com> - 19.3.4-2
+- Fix put image shm fallback path.
+
+* Sat Feb 15 2020 Dave Airlie <airlied@redhat.com> - 19.3.4-1
+- Update to 19.3.4 release (s390x fix)
+
+* Thu Jan 30 2020 Dave Airlie <airlied@redhat.com> - 19.3.3-1
+- Update to 19.3.3 release
+
 * Mon Nov 25 2019 Dave Airlie <airlied@redhat.com> - 19.3.0-3
 - drop khr-devel subpackage from here