diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 6c360992a53..9de8168fbd9 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -151,6 +151,14 @@ NVC0_CODEGEN_SOURCES := \
codegen/nv50_ir_target_nvc0.h
NVC0_C_SOURCES := \
+ nvc0/cla0c0qmd.h \
+ nvc0/clc0c0qmd.h \
+ nvc0/clc3c0qmd.h \
+ nvc0/drf.h \
+ nvc0/qmd.h \
+ nvc0/qmda0c0.c \
+ nvc0/qmdc0c0.c \
+ nvc0/qmdc3c0.c \
nvc0/gm107_texture.xml.h \
nvc0/nvc0_3d.xml.h \
nvc0/nvc0_compute.c \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 42ee969c66b..d58c0d206ec 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -67,8 +67,10 @@ enum operation
OP_AND,
OP_OR,
OP_XOR,
+ OP_LOP3_LUT,
OP_SHL,
OP_SHR,
+ OP_SHF,
OP_MAX,
OP_MIN,
OP_SAT, // CLAMP(f32, 0.0, 1.0)
@@ -116,6 +118,7 @@ enum operation
OP_PINTERP,
OP_EMIT, // emit vertex
OP_RESTART, // restart primitive
+ OP_FINAL, // finish emitting primitives
OP_TEX,
OP_TXB, // texture bias
OP_TXL, // texure lod
@@ -151,7 +154,10 @@ enum operation
OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
OP_BFIND, // find highest/lowest set bit
+ OP_BREV, // bitfield reverse
+ OP_BMSK, // bitfield mask
OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
+ OP_SGXT,
OP_ATOM,
OP_BAR, // execution barrier, sources = { id, thread count, predicate }
OP_VADD, // byte/word vector operations
@@ -167,6 +173,7 @@ enum operation
OP_SHFL, // warp shuffle
OP_VOTE,
OP_BUFQ, // buffer query
+ OP_WARPSYNC,
OP_LAST
};
@@ -254,11 +261,29 @@ enum operation
#define NV50_IR_SUBOP_VOTE_ALL 0
#define NV50_IR_SUBOP_VOTE_ANY 1
#define NV50_IR_SUBOP_VOTE_UNI 2
+#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0
+#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc
+#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa
+#define NV50_IR_SUBOP_LOP3_LUT(exp) ({ \
+ uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \
+ uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \
+ uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \
+ (uint8_t)(exp); \
+})
+#define NV50_IR_SUBOP_BMSK_C (0 << 0)
+#define NV50_IR_SUBOP_BMSK_W (1 << 0)
#define NV50_IR_SUBOP_MINMAX_LOW 1
#define NV50_IR_SUBOP_MINMAX_MED 2
#define NV50_IR_SUBOP_MINMAX_HIGH 3
+#define NV50_IR_SUBOP_SHF_L (0 << 0)
+#define NV50_IR_SUBOP_SHF_R (1 << 0)
+#define NV50_IR_SUBOP_SHF_LO (0 << 1)
+#define NV50_IR_SUBOP_SHF_HI (1 << 1)
+#define NV50_IR_SUBOP_SHF_C (0 << 2)
+#define NV50_IR_SUBOP_SHF_W (1 << 2)
+
// xmad(src0, src1, 0) << 16 + src2
#define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
// (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
@@ -900,7 +925,7 @@ public:
uint16_t subOp; // quadop, 1 for mul-high, etc.
- unsigned encSize : 4; // encoding size in bytes
+ unsigned encSize : 5; // encoding size in bytes
unsigned saturate : 1; // to [0.0f, 1.0f]
unsigned join : 1; // converge control flow (use OP_JOIN until end)
unsigned fixed : 1; // prevent dead code elimination
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 5dc0e24c5dc..63ea7f5e7e8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -29,6 +29,8 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
+struct nir_shader_compiler_options;
+
/*
* This struct constitutes linkage information in TGSI terminology.
*
@@ -70,10 +72,12 @@ struct nv50_ir_prog_symbol
uint32_t offset;
};
+#define NVISA_GF100_CHIPSET 0xc0
#define NVISA_GK104_CHIPSET 0xe0
#define NVISA_GK20A_CHIPSET 0xea
#define NVISA_GM107_CHIPSET 0x110
#define NVISA_GM200_CHIPSET 0x120
+#define NVISA_GV100_CHIPSET 0x140
struct nv50_ir_prog_info
{
@@ -200,6 +204,9 @@ struct nv50_ir_prog_info
extern "C" {
#endif
+const struct nir_shader_compiler_options *
+nv50_ir_nir_shader_compiler_options(int chipset);
+
extern int nv50_ir_generate_code(struct nv50_ir_prog_info *);
extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index e244bd0d610..dd8e1ab86c4 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -23,6 +23,7 @@
*/
#include "codegen/nv50_ir_target_gm107.h"
+#include "codegen/nv50_ir_sched_gm107.h"
//#define GM107_DEBUG_SCHED_DATA
@@ -170,6 +171,7 @@ private:
void emitBFI();
void emitBFE();
void emitFLO();
+ void emitPRMT();
void emitLDSTs(int, DataType);
void emitLDSTc(int);
@@ -2371,6 +2373,33 @@ CodeEmitterGM107::emitFLO()
emitGPR (0x00, insn->def(0));
}
+void
+CodeEmitterGM107::emitPRMT()
+{
+ switch (insn->src(1).getFile()) {
+ case FILE_GPR:
+ emitInsn(0x5bc00000);
+ emitGPR (0x14, insn->src(1));
+ break;
+ case FILE_MEMORY_CONST:
+ emitInsn(0x4bc00000);
+ emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
+ break;
+ case FILE_IMMEDIATE:
+ emitInsn(0x36c00000);
+ emitIMMD(0x14, 19, insn->src(1));
+ break;
+ default:
+ assert(!"bad src1 file");
+ break;
+ }
+
+ emitField(0x30, 3, insn->subOp);
+ emitGPR (0x27, insn->src(2));
+ emitGPR (0x08, insn->src(0));
+ emitGPR (0x00, insn->def(0));
+}
+
/*******************************************************************************
* memory
******************************************************************************/
@@ -3537,6 +3566,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
case OP_BFIND:
emitFLO();
break;
+ case OP_PERMT:
+ emitPRMT();
+ break;
case OP_SLCT:
if (isFloatType(insn->dType))
emitFCMP();
@@ -3742,156 +3774,6 @@ CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
* sched data calculator
******************************************************************************/
-class SchedDataCalculatorGM107 : public Pass
-{
-public:
- SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
-
-private:
- struct RegScores
- {
- struct ScoreData {
- int r[256];
- int p[8];
- int c;
- } rd, wr;
- int base;
-
- void rebase(const int base)
- {
- const int delta = this->base - base;
- if (!delta)
- return;
- this->base = 0;
-
- for (int i = 0; i < 256; ++i) {
- rd.r[i] += delta;
- wr.r[i] += delta;
- }
- for (int i = 0; i < 8; ++i) {
- rd.p[i] += delta;
- wr.p[i] += delta;
- }
- rd.c += delta;
- wr.c += delta;
- }
- void wipe()
- {
- memset(&rd, 0, sizeof(rd));
- memset(&wr, 0, sizeof(wr));
- }
- int getLatest(const ScoreData& d) const
- {
- int max = 0;
- for (int i = 0; i < 256; ++i)
- if (d.r[i] > max)
- max = d.r[i];
- for (int i = 0; i < 8; ++i)
- if (d.p[i] > max)
- max = d.p[i];
- if (d.c > max)
- max = d.c;
- return max;
- }
- inline int getLatestRd() const
- {
- return getLatest(rd);
- }
- inline int getLatestWr() const
- {
- return getLatest(wr);
- }
- inline int getLatest() const
- {
- return MAX2(getLatestRd(), getLatestWr());
- }
- void setMax(const RegScores *that)
- {
- for (int i = 0; i < 256; ++i) {
- rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
- wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
- }
- for (int i = 0; i < 8; ++i) {
- rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
- wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
- }
- rd.c = MAX2(rd.c, that->rd.c);
- wr.c = MAX2(wr.c, that->wr.c);
- }
- void print(int cycle)
- {
- for (int i = 0; i < 256; ++i) {
- if (rd.r[i] > cycle)
- INFO("rd $r%i @ %i\n", i, rd.r[i]);
- if (wr.r[i] > cycle)
- INFO("wr $r%i @ %i\n", i, wr.r[i]);
- }
- for (int i = 0; i < 8; ++i) {
- if (rd.p[i] > cycle)
- INFO("rd $p%i @ %i\n", i, rd.p[i]);
- if (wr.p[i] > cycle)
- INFO("wr $p%i @ %i\n", i, wr.p[i]);
- }
- if (rd.c > cycle)
- INFO("rd $c @ %i\n", rd.c);
- if (wr.c > cycle)
- INFO("wr $c @ %i\n", wr.c);
- }
- };
-
- RegScores *score; // for current BB
- std::vector<RegScores> scoreBoards;
-
- const TargetGM107 *targ;
- bool visit(Function *);
- bool visit(BasicBlock *);
-
- void commitInsn(const Instruction *, int);
- int calcDelay(const Instruction *, int) const;
- void setDelay(Instruction *, int, const Instruction *);
- void recordWr(const Value *, int, int);
- void checkRd(const Value *, int, int&) const;
-
- inline void emitYield(Instruction *);
- inline void emitStall(Instruction *, uint8_t);
- inline void emitReuse(Instruction *, uint8_t);
- inline void emitWrDepBar(Instruction *, uint8_t);
- inline void emitRdDepBar(Instruction *, uint8_t);
- inline void emitWtDepBar(Instruction *, uint8_t);
-
- inline int getStall(const Instruction *) const;
- inline int getWrDepBar(const Instruction *) const;
- inline int getRdDepBar(const Instruction *) const;
- inline int getWtDepBar(const Instruction *) const;
-
- void setReuseFlag(Instruction *);
-
- inline void printSchedInfo(int, const Instruction *) const;
-
- struct LiveBarUse {
- LiveBarUse(Instruction *insn, Instruction *usei)
- : insn(insn), usei(usei) { }
- Instruction *insn;
- Instruction *usei;
- };
-
- struct LiveBarDef {
- LiveBarDef(Instruction *insn, Instruction *defi)
- : insn(insn), defi(defi) { }
- Instruction *insn;
- Instruction *defi;
- };
-
- bool insertBarriers(BasicBlock *);
-
- bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
- Instruction *findFirstUse(const Instruction *) const;
- Instruction *findFirstDef(const Instruction *) const;
-
- bool needRdDepBar(const Instruction *) const;
- bool needWrDepBar(const Instruction *) const;
-};
-
inline void
SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
{
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
new file mode 100644
index 00000000000..ef33743e610
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
@@ -0,0 +1,2052 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "codegen/nv50_ir_emit_gv100.h"
+#include "codegen/nv50_ir_sched_gm107.h"
+
+namespace nv50_ir {
+
+/*******************************************************************************
+ * instruction format helpers
+ ******************************************************************************/
+
+#define FA_NODEF (1 << 0)
+#define FA_RRR (1 << 1)
+#define FA_RRI (1 << 2)
+#define FA_RRC (1 << 3)
+#define FA_RIR (1 << 4)
+#define FA_RCR (1 << 5)
+
+#define FA_SRC_MASK 0x0ff
+#define FA_SRC_NEG 0x100
+#define FA_SRC_ABS 0x200
+
+#define EMPTY -1
+#define __(a) (a) // no source modifiers
+#define _A(a) ((a) | FA_SRC_ABS)
+#define N_(a) ((a) | FA_SRC_NEG)
+#define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS)
+
+void
+CodeEmitterGV100::emitFormA_I32(int src)
+{
+ emitIMMD(32, 32, insn->src(src));
+ if (insn->src(src).mod.abs())
+ code[1] &= 0x7fffffff;
+ if (insn->src(src).mod.neg())
+ code[1] ^= 0x80000000;
+}
+
+void
+CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2)
+{
+ emitInsn(op);
+ if (src1 >= 0) {
+ emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
+ emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
+ emitGPR (64, insn->src(src1 & FA_SRC_MASK));
+ }
+ if (src2 >= 0) {
+ emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
+ emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
+ emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK));
+ }
+}
+
+void
+CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2)
+{
+ emitInsn(op);
+ if (src1 >= 0) {
+ emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
+ emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
+ emitGPR (64, insn->src(src1 & FA_SRC_MASK));
+ }
+ if (src2 >= 0)
+ emitFormA_I32(src2 & FA_SRC_MASK);
+}
+
+void
+CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2)
+{
+ emitInsn(op);
+ if (src2 >= 0) {
+ emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
+ emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
+ emitGPR (64, insn->src(src2 & FA_SRC_MASK));
+ }
+
+ if (src1 >= 0) {
+ emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
+ emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
+ emitGPR (32, insn->src(src1 & FA_SRC_MASK));
+ }
+}
+
+void
+CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms,
+ int src0, int src1, int src2)
+{
+ switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) {
+ case FILE_GPR:
+ switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) {
+ case FILE_GPR:
+ assert(forms & FA_RRR);
+ emitFormA_RRR((1 << 9) | op, src1, src2);
+ break;
+ case FILE_IMMEDIATE:
+ assert(forms & FA_RRI);
+ emitFormA_RRI((2 << 9) | op, src1, src2);
+ break;
+ case FILE_MEMORY_CONST:
+ assert(forms & FA_RRC);
+ emitFormA_RRC((3 << 9) | op, src1, src2);
+ break;
+ default:
+ assert(!"bad src2 file");
+ break;
+ }
+ break;
+ case FILE_IMMEDIATE:
+ assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
+ assert(forms & FA_RIR);
+ emitFormA_RRI((4 << 9) | op, src2, src1);
+ break;
+ case FILE_MEMORY_CONST:
+ assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
+ assert(forms & FA_RCR);
+ emitFormA_RRC((5 << 9) | op, src2, src1);
+ break;
+ default:
+ assert(!"bad src1 file");
+ break;
+ }
+
+ if (src0 >= 0) {
+ assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR);
+ emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS));
+ emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG));
+ emitGPR(24, insn->src(src0 & FA_SRC_MASK));
+ }
+
+ if (!(forms & FA_NODEF))
+ emitGPR(16, insn->def(0));
+}
+
+/*******************************************************************************
+ * control
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitBRA()
+{
+ const FlowInstruction *insn = this->insn->asFlow();
+ int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4;
+
+ assert(!insn->indirect && !insn->absolute);
+
+ emitInsn (0x947);
+ emitField(34, 48, target);
+ emitPRED (87);
+ emitField(86, 2, 0); // ./.INC/.DEC
+}
+
+void
+CodeEmitterGV100::emitEXIT()
+{
+ emitInsn (0x94d);
+ emitNOT (90);
+ emitPRED (87);
+ emitField(85, 1, 0); // .NO_ATEXIT
+ emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3
+}
+
+void
+CodeEmitterGV100::emitKILL()
+{
+ emitInsn(0x95b);
+ emitPRED(87);
+}
+
+void
+CodeEmitterGV100::emitNOP()
+{
+ emitInsn(0x918);
+}
+
+void
+CodeEmitterGV100::emitWARPSYNC()
+{
+ emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
+ emitNOT (90);
+ emitPRED (87);
+}
+
+/*******************************************************************************
+ * movement / conversion
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitCS2R()
+{
+ emitInsn(0x805);
+ emitSYS (72, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitF2F()
+{
+ if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
+ emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
+ else
+ emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
+ emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
+ emitFMZ (80, 1);
+ emitRND (78);
+ emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
+ emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3
+}
+
+void
+CodeEmitterGV100::emitF2I()
+{
+ if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
+ emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
+ else
+ emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
+ emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
+ emitFMZ (80, 1);
+ emitRND (78);
+ emitField(77, 1, 0); // .NTZ
+ emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
+ emitField(72, 1, isSignedType(insn->dType));
+}
+
+void
+CodeEmitterGV100::emitFRND()
+{
+ int subop = 0;
+
+ switch (insn->op) {
+ case OP_CVT:
+ switch (insn->rnd) {
+ case ROUND_NI: subop = 0; break;
+ case ROUND_MI: subop = 1; break;
+ case ROUND_PI: subop = 2; break;
+ case ROUND_ZI: subop = 3; break;
+ default:
+ assert(!"invalid FRND mode");
+ break;
+ }
+ break;
+ case OP_FLOOR: subop = 1; break;
+ case OP_CEIL : subop = 2; break;
+ case OP_TRUNC: subop = 3; break;
+ default:
+ assert(!"invalid FRND opcode");
+ break;
+ }
+
+ if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
+ emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
+ else
+ emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
+ emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
+ emitFMZ (80, 1);
+ emitField(78, 2, subop);
+ emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
+}
+
+void
+CodeEmitterGV100::emitI2F()
+{
+ if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
+ emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
+ else
+ emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
+ emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
+ emitRND (78);
+ emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
+ emitField(74, 1, isSignedType(insn->sType));
+ if (typeSizeof(insn->sType) == 2)
+ emitField(60, 2, insn->subOp >> 1);
+ else
+ emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3
+}
+
+void
+CodeEmitterGV100::emitMOV()
+{
+ switch (insn->def(0).getFile()) {
+ case FILE_GPR:
+ switch (insn->src(0).getFile()) {
+ case FILE_GPR:
+ case FILE_MEMORY_CONST:
+ case FILE_IMMEDIATE:
+ emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
+ emitField(72, 4, insn->lanes);
+ break;
+ case FILE_PREDICATE:
+ emitInsn (0x807);
+ emitGPR (16, insn->def(0));
+ emitGPR (24);
+ emitField(32, 32, 0xffffffff);
+ emitField(90, 1, 1);
+ emitPRED (87, insn->src(0));
+ break;
+ default:
+ assert(!"bad src file");
+ break;
+ }
+ break;
+ case FILE_PREDICATE:
+ emitInsn (0x20c);
+ emitPRED (87);
+ emitPRED (84);
+ emitNOT (71);
+ emitPRED (68);
+ emitPRED (81, insn->def(0));
+ emitCond3(76, CC_NE);
+ emitGPR (24, insn->src(0));
+ emitGPR (32);
+ break;
+ default:
+ assert(!"bad dst file");
+ break;
+ }
+}
+
+void
+CodeEmitterGV100::emitPRMT()
+{
+ emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
+ emitField(72, 3, insn->subOp);
+}
+
+void
+CodeEmitterGV100::emitS2R()
+{
+ emitInsn(0x919);
+ emitSYS (72, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+static void
+selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+{
+ int loc = entry->loc;
+ if (data.force_persample_interp)
+ code[loc + 2] |= 1 << 26;
+ else
+ code[loc + 2] &= ~(1 << 26);
+}
+
+void
+CodeEmitterGV100::emitSEL()
+{
+ emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
+ emitNOT (90, insn->src(2));
+ emitPRED (87, insn->src(2));
+ if (insn->subOp == 1)
+ addInterp(0, 0, selpFlip);
+}
+
+void
+CodeEmitterGV100::emitSHFL()
+{
+ switch (insn->src(1).getFile()) {
+ case FILE_GPR:
+ switch (insn->src(2).getFile()) {
+ case FILE_GPR:
+ emitInsn(0x389);
+ emitGPR (64, insn->src(2));
+ break;
+ case FILE_IMMEDIATE:
+ emitInsn(0x589);
+ emitIMMD(40, 13, insn->src(2));
+ break;
+ default:
+ assert(!"bad src2 file");
+ break;
+ }
+ emitGPR(32, insn->src(1));
+ break;
+ case FILE_IMMEDIATE:
+ switch (insn->src(2).getFile()) {
+ case FILE_GPR:
+ emitInsn(0x989);
+ emitGPR (64, insn->src(2));
+ break;
+ case FILE_IMMEDIATE:
+ emitInsn(0xf89);
+ emitIMMD(40, 13, insn->src(2));
+ break;
+ default:
+ assert(!"bad src2 file");
+ break;
+ }
+ emitIMMD(53, 5, insn->src(1));
+ break;
+ default:
+ assert(!"bad src1 file");
+ break;
+ }
+
+ if (insn->defExists(1))
+ emitPRED(81, insn->def(1));
+ else
+ emitPRED(81);
+
+ emitField(58, 2, insn->subOp);
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+/*******************************************************************************
+ * fp32
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitFADD()
+{
+ if (insn->src(1).getFile() == FILE_GPR)
+ emitFormA(0x021, FA_RRR , NA(0), NA(1), EMPTY);
+ else
+ emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
+ emitFMZ (80, 1);
+ emitRND (78);
+ emitSAT (77);
+}
+
+void
+CodeEmitterGV100::emitFFMA()
+{
+ emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
+ emitField(80, 1, insn->ftz);
+ emitRND (78);
+ emitSAT (77);
+ emitField(76, 1, insn->dnz);
+}
+
+void
+CodeEmitterGV100::emitFMNMX()
+{
+ emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
+ emitField(90, 1, insn->op == OP_MAX);
+ emitPRED (87);
+ emitFMZ (80, 1);
+}
+
+void
+CodeEmitterGV100::emitFMUL()
+{
+ emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
+ emitField(80, 1, insn->ftz);
+ emitPDIV (84);
+ emitRND (78);
+ emitSAT (77);
+ emitField(76, 1, insn->dnz);
+}
+
+void
+CodeEmitterGV100::emitFSET_BF()
+{
+ const CmpInstruction *insn = this->insn->asCmp();
+
+ emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
+ emitFMZ (80, 1);
+ emitCond4(76, insn->setCond);
+
+ if (insn->op != OP_SET) {
+ switch (insn->op) {
+ case OP_SET_AND: emitField(74, 2, 0); break;
+ case OP_SET_OR : emitField(74, 2, 1); break;
+ case OP_SET_XOR: emitField(74, 2, 2); break;
+ default:
+ assert(!"invalid set op");
+ break;
+ }
+ emitNOT (90, insn->src(2));
+ emitPRED(87, insn->src(2));
+ } else {
+ emitPRED(87);
+ }
+}
+
+void
+CodeEmitterGV100::emitFSETP()
+{
+ const CmpInstruction *insn = this->insn->asCmp();
+
+ emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
+ emitFMZ (80, 1);
+ emitCond4(76, insn->setCond);
+
+ if (insn->op != OP_SET) {
+ switch (insn->op) {
+ case OP_SET_AND: emitField(74, 2, 0); break;
+ case OP_SET_OR : emitField(74, 2, 1); break;
+ case OP_SET_XOR: emitField(74, 2, 2); break;
+ default:
+ assert(!"invalid set op");
+ break;
+ }
+ emitNOT (90, insn->src(2));
+ emitPRED(87, insn->src(2));
+ } else {
+ emitPRED(87);
+ }
+
+ if (insn->defExists(1))
+ emitPRED(84, insn->def(1));
+ else
+ emitPRED(84);
+ emitPRED(81, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitFSWZADD()
+{
+ uint8_t subOp = 0;
+
+ // NP/PN swapped vs SM60
+ for (int i = 0; i < 4; i++) {
+ uint8_t p = ((insn->subOp >> (i * 2)) & 3);
+ if (p == 1 || p == 2)
+ p ^= 3;
+ subOp |= p << (i * 2);
+ }
+
+ emitInsn (0x822);
+ emitFMZ (80, 1);
+ emitRND (78);
+ emitField(77, 1, insn->lanes); /* abused for .ndv */
+ emitGPR (64, insn->src(1));
+ emitField(32, 8, subOp);
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitMUFU()
+{
+ int mufu = 0;
+
+ switch (insn->op) {
+ case OP_COS : mufu = 0; break;
+ case OP_SIN : mufu = 1; break;
+ case OP_EX2 : mufu = 2; break;
+ case OP_LG2 : mufu = 3; break;
+ case OP_RCP : mufu = 4 + 2 * insn->subOp; break;
+ case OP_RSQ : mufu = 5 + 2 * insn->subOp; break;
+ case OP_SQRT: mufu = 8; break;
+ default:
+ assert(!"invalid mufu");
+ break;
+ }
+
+ emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
+ emitField(74, 4, mufu);
+}
+
+/*******************************************************************************
+ * fp64
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitDADD()
+{
+ emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
+ emitRND(78);
+}
+
+void
+CodeEmitterGV100::emitDFMA()
+{
+ emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
+ emitRND(78);
+}
+
+void
+CodeEmitterGV100::emitDMUL()
+{
+ emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
+ emitRND(78);
+}
+
+void
+CodeEmitterGV100::emitDSETP()
+{
+ const CmpInstruction *insn = this->insn->asCmp();
+
+ if (insn->src(1).getFile() == FILE_GPR)
+ emitFormA(0x02a, FA_NODEF | FA_RRR , NA(0), NA(1), EMPTY);
+ else
+ emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
+
+ if (insn->op != OP_SET) {
+ switch (insn->op) {
+ case OP_SET_AND: emitField(74, 2, 0); break;
+ case OP_SET_OR : emitField(74, 2, 1); break;
+ case OP_SET_XOR: emitField(74, 2, 2); break;
+ default:
+ assert(!"invalid set op");
+ break;
+ }
+ emitNOT (90, insn->src(2));
+ emitPRED(87, insn->src(2));
+ } else {
+ emitPRED(87);
+ }
+
+ if (insn->defExists(1))
+ emitPRED(84, insn->def(1));
+ else
+ emitPRED(84);
+ emitPRED (81, insn->def(0));
+ emitCond4(76, insn->setCond);
+}
+
+/*******************************************************************************
+ * integer
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitBMSK()
+{
+ emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
+ emitField(75, 1, insn->subOp); // .C/.W
+}
+
+void
+CodeEmitterGV100::emitBREV()
+{
+ emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
+}
+
+void
+CodeEmitterGV100::emitFLO()
+{
+ emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
+ emitPRED (81);
+ emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
+ emitField(73, 1, isSignedType(insn->dType));
+ emitNOT (63, insn->src(0));
+}
+
+void
+CodeEmitterGV100::emitIABS()
+{
+ emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
+}
+
+void
+CodeEmitterGV100::emitIADD3()
+{
+// emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2));
+ emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY);
+ emitGPR (64); //XXX: fix when switching back to N_(2)
+ emitPRED (84, NULL); // .CC1
+ emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL);
+ if (insn->flagsSrc >= 0) {
+ emitField(74, 1, 1); // .X
+ emitPRED (87, insn->getSrc(insn->flagsSrc));
+ emitField(77, 4, 0xf); // .X1
+ }
+}
+
+void
+CodeEmitterGV100::emitIMAD()
+{
+ emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
+ emitField(73, 1, isSignedType(insn->sType));
+}
+
+void
+CodeEmitterGV100::emitIMAD_WIDE()
+{
+ emitFormA(0x025, FA_RRR | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
+ emitPRED (81);
+ emitField(73, 1, isSignedType(insn->sType));
+}
+
+void
+CodeEmitterGV100::emitISETP()
+{
+ const CmpInstruction *insn = this->insn->asCmp();
+
+ emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
+
+ if (insn->op != OP_SET) {
+ switch (insn->op) {
+ case OP_SET_AND: emitField(74, 2, 0); break;
+ case OP_SET_OR : emitField(74, 2, 1); break;
+ case OP_SET_XOR: emitField(74, 2, 2); break;
+ default:
+ assert(!"invalid set op");
+ break;
+ }
+ emitNOT (90, insn->src(2));
+ emitPRED(87, insn->src(2));
+ } else {
+ emitPRED(87);
+ }
+
+ //XXX: CC->pred
+ if (insn->flagsSrc >= 0) {
+ assert(0);
+ emitField(68, 4, 6);
+ } else {
+ emitNOT (71);
+ if (!insn->subOp)
+ emitPRED(68);
+ }
+
+ if (insn->defExists(1))
+ emitPRED(84, insn->def(1));
+ else
+ emitPRED(84);
+ emitPRED (81, insn->def(0));
+ emitCond3(76, insn->setCond);
+ emitField(73, 1, isSignedType(insn->sType));
+
+ if (insn->subOp) { // .EX
+ assert(0);
+ emitField(72, 1, 1);
+ emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2));
+ }
+}
+
+void
+CodeEmitterGV100::emitLEA()
+{
+ assert(insn->src(1).get()->asImm());
+
+ emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY);
+ emitPRED (81);
+ emitIMMD (75, 5, insn->src(1));
+ emitGPR (64);
+}
+
+void
+CodeEmitterGV100::emitLOP3_LUT()
+{
+ emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2));
+ emitField(90, 1, 1);
+ emitPRED (87);
+ emitPRED (81);
+ emitField(80, 1, 0); // .PAND
+ emitField(72, 8, insn->subOp);
+}
+
+void
+CodeEmitterGV100::emitPOPC()
+{
+ emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
+ emitNOT (63, insn->src(0));
+}
+
+void
+CodeEmitterGV100::emitSGXT()
+{
+ emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
+ emitField(75, 1, 0); // .W
+ emitField(73, 1, 1); // /.U32
+}
+
+void
+CodeEmitterGV100::emitSHF()
+{
+ emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
+ emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI));
+ emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R));
+ emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W));
+
+ switch (insn->sType) {
+ case TYPE_S64: emitField(73, 2, 0); break;
+ case TYPE_U64: emitField(73, 2, 1); break;
+ case TYPE_S32: emitField(73, 2, 2); break;
+ case TYPE_U32:
+ default:
+ emitField(73, 2, 3);
+ break;
+ }
+}
+
+/*******************************************************************************
+ * load/stores
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitALD()
+{
+ emitInsn (0x321);
+ emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
+ emitGPR (32, insn->src(0).getIndirect(1));
+ emitO (79);
+ emitP (76);
+ emitADDR (24, 40, 10, 0, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitAST()
+{
+ emitInsn (0x322);
+ emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1);
+ emitGPR (64, insn->src(0).getIndirect(1));
+ emitP (76);
+ emitADDR (24, 40, 10, 0, insn->src(0));
+ emitGPR (32, insn->src(1));
+}
+
+void
+CodeEmitterGV100::emitATOM()
+{
+ unsigned subOp, dType;
+
+ if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) {
+ emitInsn(0x38a);
+
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
+ subOp = 8;
+ else
+ subOp = insn->subOp;
+ emitField(87, 4, subOp);
+
+ switch (insn->dType) {
+ case TYPE_U32 : dType = 0; break;
+ case TYPE_S32 : dType = 1; break;
+ case TYPE_U64 : dType = 2; break;
+ case TYPE_F32 : dType = 3; break;
+ case TYPE_B128: dType = 4; break;
+ case TYPE_S64 : dType = 5; break;
+ default:
+ assert(!"unexpected dType");
+ dType = 0;
+ break;
+ }
+ emitField(73, 3, dType);
+ } else {
+ emitInsn(0x38b);
+
+ switch (insn->dType) {
+ case TYPE_U32: dType = 0; break;
+ case TYPE_U64: dType = 2; break;
+ default:
+ assert(!"unexpected dType");
+ dType = 0;
+ break;
+ }
+ emitField(73, 3, dType);
+ emitGPR (64, insn->src(2));
+ }
+
+ emitPRED (81);
+ emitField(79, 2, 1);
+ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+ emitGPR (32, insn->src(1));
+ emitADDR (24, 40, 24, 0, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitATOMS()
+{
+ unsigned dType, subOp;
+
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ switch (insn->dType) {
+ case TYPE_U32: dType = 0; break;
+ case TYPE_S32: dType = 1; break;
+ case TYPE_U64: dType = 2; break;
+ default: assert(!"unexpected dType"); dType = 0; break;
+ }
+
+ emitInsn (0x38d);
+ emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST
+ emitField(73, 2, dType);
+ emitGPR (64, insn->src(2));
+ } else {
+ emitInsn(0x38c);
+
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
+ subOp = 8;
+ else
+ subOp = insn->subOp;
+ emitField(87, 4, subOp);
+
+ switch (insn->dType) {
+ case TYPE_U32: dType = 0; break;
+ case TYPE_S32: dType = 1; break;
+ case TYPE_U64: dType = 2; break;
+ default: assert(!"unexpected dType"); dType = 0; break;
+ }
+
+ emitField(73, 2, dType);
+ }
+
+ emitGPR (32, insn->src(1));
+ emitADDR (24, 40, 24, 0, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+static void
+interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
+{
+ int ipa = entry->ipa;
+ int loc = entry->loc;
+
+ if (data.force_persample_interp &&
+ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
+ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
+ ipa |= NV50_IR_INTERP_CENTROID;
+ }
+
+ int sample;
+ switch (ipa & NV50_IR_INTERP_SAMPLE_MASK) {
+ case NV50_IR_INTERP_DEFAULT : sample = 0; break;
+ case NV50_IR_INTERP_CENTROID: sample = 1; break;
+ case NV50_IR_INTERP_OFFSET : sample = 2; break;
+ default: assert(!"invalid sample mode");
+ }
+
+ int interp;
+ switch (ipa & NV50_IR_INTERP_MODE_MASK) {
+ case NV50_IR_INTERP_LINEAR :
+ case NV50_IR_INTERP_PERSPECTIVE: interp = 0; break;
+ case NV50_IR_INTERP_FLAT : interp = 1; break;
+ case NV50_IR_INTERP_SC : interp = 2; break;
+ default: assert(!"invalid ipa mode");
+ }
+
+ code[loc + 2] &= ~(0xf << 12);
+ code[loc + 2] |= sample << 12;
+ code[loc + 2] |= interp << 14;
+}
+
+void
+CodeEmitterGV100::emitIPA()
+{
+ emitInsn (0x326);
+ emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL);
+
+ switch (insn->getInterpMode()) {
+ case NV50_IR_INTERP_LINEAR :
+ case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break;
+ case NV50_IR_INTERP_FLAT : emitField(78, 2, 1); break;
+ case NV50_IR_INTERP_SC : emitField(78, 2, 2); break;
+ default:
+ assert(!"invalid ipa mode");
+ break;
+ }
+
+ switch (insn->getSampleMode()) {
+ case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break;
+ case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break;
+ case NV50_IR_INTERP_OFFSET : emitField(76, 2, 2); break;
+ default:
+ assert(!"invalid sample mode");
+ break;
+ }
+
+ if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) {
+ emitGPR (32);
+ addInterp(insn->ipa, 0xff, interpApply);
+ } else {
+ emitGPR (32, insn->src(1));
+ addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply);
+ }
+
+ assert(!insn->src(0).isIndirect(0));
+ emitADDR (-1, 64, 8, 2, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitISBERD()
+{
+ emitInsn(0x923);
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitLDSTc(int posm, int poso)
+{
+ int mode = 0;
+ int order = 1;
+
+ switch (insn->cache) {
+ case CACHE_CA: mode = 0; order = 1; break;
+ case CACHE_CG: mode = 2; order = 2; break;
+ case CACHE_CV: mode = 3; order = 2; break;
+ default:
+ assert(!"invalid caching mode");
+ break;
+ }
+
+ emitField(poso, 2, order);
+ emitField(posm, 2, mode);
+}
+
+void
+CodeEmitterGV100::emitLDSTs(int pos, DataType type)
+{
+ int data = 0;
+
+ switch (typeSizeof(type)) {
+ case 1: data = isSignedType(type) ? 1 : 0; break;
+ case 2: data = isSignedType(type) ? 3 : 2; break;
+ case 4: data = 4; break;
+ case 8: data = 5; break;
+ case 16: data = 6; break;
+ default:
+ assert(!"bad type");
+ break;
+ }
+
+ emitField(pos, 3, data);
+}
+
+void
+CodeEmitterGV100::emitLD()
+{
+ emitInsn (0x980);
+ emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO
+ emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
+ emitLDSTs(73, insn->dType);
+ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+ emitADDR (24, 32, 32, 0, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitLDC()
+{
+ emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY);
+ emitField(78, 2, insn->subOp);
+ emitLDSTs(73, insn->dType);
+ emitGPR (24, insn->src(0).getIndirect(0));
+}
+
+void
+CodeEmitterGV100::emitLDL()
+{
+ emitInsn (0x983);
+ emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
+ emitLDSTs(73, insn->dType);
+ emitADDR (24, 40, 24, 0, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitLDS()
+{
+ emitInsn (0x984);
+ emitLDSTs(73, insn->dType);
+ emitADDR (24, 40, 24, 0, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitOUT()
+{
+ const int cut = insn->op == OP_RESTART || insn->subOp;
+ const int emit = insn->op == OP_EMIT;
+
+ if (insn->op != OP_FINAL)
+ emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY);
+ else
+ emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY);
+ emitField(78, 2, (cut << 1) | emit);
+}
+
+void
+CodeEmitterGV100::emitRED()
+{
+ unsigned dType;
+
+ switch (insn->dType) {
+ case TYPE_U32: dType = 0; break;
+ case TYPE_S32: dType = 1; break;
+ case TYPE_U64: dType = 2; break;
+ case TYPE_F32: dType = 3; break;
+ case TYPE_B128: dType = 4; break;
+ case TYPE_S64: dType = 5; break;
+ default: assert(!"unexpected dType"); dType = 0; break;
+ }
+
+ emitInsn (0x98e);
+ emitField(87, 3, insn->subOp);
+ emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
+ emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3
+ emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
+ emitField(73, 3, dType);
+ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+ emitGPR (32, insn->src(1));
+ emitADDR (24, 40, 24, 0, insn->src(0));
+}
+
+void
+CodeEmitterGV100::emitST()
+{
+ emitInsn (0x385);
+ emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO
+ emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
+ emitLDSTs(73, insn->dType);
+ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+ emitGPR (64, insn->src(1));
+ emitADDR (24, 32, 32, 0, insn->src(0));
+}
+
+void
+CodeEmitterGV100::emitSTL()
+{
+ emitInsn (0x387);
+ emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
+ emitLDSTs(73, insn->dType);
+ emitADDR (24, 40, 24, 0, insn->src(0));
+ emitGPR (32, insn->src(1));
+}
+
+void
+CodeEmitterGV100::emitSTS()
+{
+ emitInsn (0x388);
+ emitLDSTs(73, insn->dType);
+ emitADDR (24, 40, 24, 0, insn->src(0));
+ emitGPR (32, insn->src(1));
+}
+
+/*******************************************************************************
+ * texture
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitTEXs(int pos)
+{
+ int src1 = insn->predSrc == 1 ? 2 : 1;
+ if (insn->srcExists(src1))
+ emitGPR(pos, insn->src(src1));
+ else
+ emitGPR(pos);
+}
+
+void
+CodeEmitterGV100::emitTEX()
+{
+ const TexInstruction *insn = this->insn->asTex();
+ int lodm = 0;
+
+ if (!insn->tex.levelZero) {
+ switch (insn->op) {
+ case OP_TEX: lodm = 0; break;
+ case OP_TXB: lodm = 2; break;
+ case OP_TXL: lodm = 3; break;
+ default:
+ assert(!"invalid tex op");
+ break;
+ }
+ } else {
+ lodm = 1;
+ }
+
+ if (insn->tex.rIndirectSrc < 0) {
+ emitInsn (0xb60);
+ emitField(54, 5, prog->driver->io.auxCBSlot);
+ emitField(40, 14, insn->tex.r);
+ } else {
+ emitInsn (0x361);
+ emitField(59, 1, 1); // .B
+ }
+ emitField(90, 1, insn->tex.liveOnly); // .NODEP
+ emitField(87, 3, lodm);
+ emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
+ emitField(78, 1, insn->tex.target.isShadow()); // .DC
+ emitField(77, 1, insn->tex.derivAll); // .NDV
+ emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI
+ emitPRED (81);
+ emitGPR (64, insn->def(1));
+ emitGPR (16, insn->def(0));
+ emitGPR (24, insn->src(0));
+ emitTEXs (32);
+ emitField(63, 1, insn->tex.target.isArray());
+ emitField(61, 2, insn->tex.target.isCube() ? 3 :
+ insn->tex.target.getDim() - 1);
+ emitField(72, 4, insn->tex.mask);
+}
+
+void
+CodeEmitterGV100::emitTLD()
+{
+ const TexInstruction *insn = this->insn->asTex();
+
+ if (insn->tex.rIndirectSrc < 0) {
+ emitInsn (0xb66);
+ emitField(54, 5, prog->driver->io.auxCBSlot);
+ emitField(40, 14, insn->tex.r);
+ } else {
+ emitInsn (0x367);
+ emitField(59, 1, 1); // .B
+ }
+ emitField(90, 1, insn->tex.liveOnly);
+ emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */);
+ emitPRED (81);
+ emitField(78, 1, insn->tex.target.isMS());
+ emitField(76, 1, insn->tex.useOffsets == 1);
+ emitField(72, 4, insn->tex.mask);
+ emitGPR (64, insn->def(1));
+ emitField(63, 1, insn->tex.target.isArray());
+ emitField(61, 2, insn->tex.target.isCube() ? 3 :
+ insn->tex.target.getDim() - 1);
+ emitTEXs (32);
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitTLD4()
+{
+ const TexInstruction *insn = this->insn->asTex();
+
+ int offsets = 0;
+ switch (insn->tex.useOffsets) {
+ case 4: offsets = 2; break;
+ case 1: offsets = 1; break;
+ case 0: offsets = 0; break;
+ default: assert(!"invalid offsets count"); break;
+ }
+
+ if (insn->tex.rIndirectSrc < 0) {
+ emitInsn (0xb63);
+ emitField(54, 5, prog->driver->io.auxCBSlot);
+ emitField(40, 14, insn->tex.r);
+ } else {
+ emitInsn (0x364);
+ emitField(59, 1, 1); // .B
+ }
+ emitField(90, 1, insn->tex.liveOnly);
+ emitField(87, 2, insn->tex.gatherComp);
+ emitField(84, 1, 1); // !.EF
+ emitPRED (81);
+ emitField(78, 1, insn->tex.target.isShadow());
+ emitField(76, 2, offsets);
+ emitField(72, 4, insn->tex.mask);
+ emitGPR (64, insn->def(1));
+ emitField(63, 1, insn->tex.target.isArray());
+ emitField(61, 2, insn->tex.target.isCube() ? 3 :
+ insn->tex.target.getDim() - 1);
+ emitTEXs (32);
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitTMML()
+{
+ const TexInstruction *insn = this->insn->asTex();
+
+ if (insn->tex.rIndirectSrc < 0) {
+ emitInsn (0xb69);
+ emitField(54, 5, prog->driver->io.auxCBSlot);
+ emitField(40, 14, insn->tex.r);
+ } else {
+ emitInsn (0x36a);
+ emitField(59, 1, 1); // .B
+ }
+ emitField(90, 1, insn->tex.liveOnly);
+ emitField(77, 1, insn->tex.derivAll);
+ emitField(72, 4, insn->tex.mask);
+ emitGPR (64, insn->def(1));
+ emitField(63, 1, insn->tex.target.isArray());
+ emitField(61, 2, insn->tex.target.isCube() ? 3 :
+ insn->tex.target.getDim() - 1);
+ emitTEXs (32);
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitTXD()
+{
+ const TexInstruction *insn = this->insn->asTex();
+
+ if (insn->tex.rIndirectSrc < 0) {
+ emitInsn (0xb6c);
+ emitField(54, 5, prog->driver->io.auxCBSlot);
+ emitField(40, 14, insn->tex.r);
+ } else {
+ emitInsn (0x36d);
+ emitField(59, 1, 1); // .B
+ }
+ emitField(90, 1, insn->tex.liveOnly);
+ emitPRED (81);
+ emitField(76, 1, insn->tex.useOffsets == 1);
+ emitField(72, 4, insn->tex.mask);
+ emitGPR (64, insn->def(1));
+ emitField(63, 1, insn->tex.target.isArray());
+ emitField(61, 2, insn->tex.target.isCube() ? 3 :
+ insn->tex.target.getDim() - 1);
+ emitTEXs (32);
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitTXQ()
+{
+ const TexInstruction *insn = this->insn->asTex();
+ int type = 0;
+
+ switch (insn->tex.query) {
+ case TXQ_DIMS : type = 0x00; break;
+ case TXQ_TYPE : type = 0x01; break;
+ case TXQ_SAMPLE_POSITION: type = 0x02; break;
+ default:
+ assert(!"invalid txq query");
+ break;
+ }
+
+ if (insn->tex.rIndirectSrc < 0) {
+ emitInsn (0xb6f);
+ emitField(54, 5, prog->driver->io.auxCBSlot);
+ emitField(40, 14, insn->tex.r);
+ } else {
+ emitInsn (0x370);
+ emitField(59, 1, 1); // .B
+ }
+ emitField(90, 1, insn->tex.liveOnly);
+ emitField(72, 4, insn->tex.mask);
+ emitGPR (64, insn->def(1));
+ emitField(62, 2, type);
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+}
+
+/*******************************************************************************
+ * surface
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitSUHandle(const int s)
+{
+ const TexInstruction *insn = this->insn->asTex();
+
+ assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
+
+ if (insn->src(s).getFile() == FILE_GPR) {
+ emitGPR(64, insn->src(s));
+ } else {
+ assert(0);
+ //XXX: not done
+ ImmediateValue *imm = insn->getSrc(s)->asImm();
+ assert(imm);
+ emitField(0x33, 1, 1);
+ emitField(0x24, 13, imm->reg.data.u32);
+ }
+}
+
+void
+CodeEmitterGV100::emitSUTarget()
+{
+ const TexInstruction *insn = this->insn->asTex();
+ int target = 0;
+
+ assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
+
+ if (insn->tex.target == TEX_TARGET_BUFFER) {
+ target = 1;
+ } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
+ target = 2;
+ } else if (insn->tex.target == TEX_TARGET_2D ||
+ insn->tex.target == TEX_TARGET_RECT) {
+ target = 3;
+ } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
+ insn->tex.target == TEX_TARGET_CUBE ||
+ insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
+ target = 4;
+ } else if (insn->tex.target == TEX_TARGET_3D) {
+ target = 5;
+ } else {
+ assert(insn->tex.target == TEX_TARGET_1D);
+ }
+ emitField(61, 3, target);
+}
+
+void
+CodeEmitterGV100::emitSUATOM()
+{
+ const TexInstruction *insn = this->insn->asTex();
+ uint8_t type = 0, subOp;
+
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
+ emitInsn(0x396); // SUATOM.D.CAS
+ else
+ emitInsn(0x394); // SUATOM.D
+
+ emitSUTarget();
+
+ // destination type
+ switch (insn->dType) {
+ case TYPE_S32: type = 1; break;
+ case TYPE_U64: type = 2; break;
+ case TYPE_F32: type = 3; break;
+ case TYPE_S64: type = 5; break;
+ default:
+ assert(insn->dType == TYPE_U32);
+ break;
+ }
+
+ // atomic operation
+ if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ subOp = 0;
+ } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
+ subOp = 8;
+ } else {
+ subOp = insn->subOp;
+ }
+
+ emitField(87, 4, subOp);
+ emitPRED (81);
+ emitField(79, 2, 1);
+ emitField(73, 3, type);
+ emitField(72, 1, 0); // .BA
+ emitGPR (32, insn->src(1));
+ emitGPR (24, insn->src(0));
+ emitGPR (16, insn->def(0));
+
+ emitSUHandle(2);
+}
+
+void
+CodeEmitterGV100::emitSULD()
+{
+ const TexInstruction *insn = this->insn->asTex();
+ int type = 0;
+
+ if (insn->op == OP_SULDB) {
+ emitInsn(0x99a);
+ emitSUTarget();
+
+ switch (insn->dType) {
+ case TYPE_U8: type = 0; break;
+ case TYPE_S8: type = 1; break;
+ case TYPE_U16: type = 2; break;
+ case TYPE_S16: type = 3; break;
+ case TYPE_U32: type = 4; break;
+ case TYPE_U64: type = 5; break;
+ case TYPE_B128: type = 6; break;
+ default:
+ assert(0);
+ break;
+ }
+ emitField(73, 3, type);
+ } else {
+ emitInsn(0x998);
+ emitSUTarget();
+ emitField(72, 4, 0xf); // rgba
+ }
+
+ emitPRED (81);
+ emitLDSTc(77, 79);
+
+ emitGPR (16, insn->def(0));
+ emitGPR (24, insn->src(0));
+
+ emitSUHandle(1);
+}
+
+void
+CodeEmitterGV100::emitSUST()
+{
+ const TexInstruction *insn = this->insn->asTex();
+
+ emitInsn(0x99c); // SUST.P
+#if 0
+ if (insn->op == OP_SUSTB)
+ emitField(0x34, 1, 1);
+#endif
+ emitSUTarget();
+
+ emitLDSTc(77, 79);
+ emitField(72, 4, 0xf); // rgba
+ emitGPR(32, insn->src(1));
+ emitGPR(24, insn->src(0));
+ emitSUHandle(2);
+}
+
+/*******************************************************************************
+ * misc
+ ******************************************************************************/
+
+void
+CodeEmitterGV100::emitAL2P()
+{
+ emitInsn (0x920);
+ emitO (79);
+ emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
+ emitField(40, 11, insn->src(0).get()->reg.data.offset);
+ emitGPR (24, insn->src(0).getIndirect(0));
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitBAR()
+{
+ uint8_t subop, redop = 0x00;
+
+ // 80
+ // 01: DEFER_BLOCKING
+ // 78:77
+ // 00: SYNC
+ // 01: ARV
+ // 02: RED
+ // 03: SCAN
+ // 75:74
+ // 00: RED.POPC
+ // 01: RED.AND
+ // 02: RED.OR
+
+ switch (insn->subOp) {
+ case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break;
+ case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break;
+ case NV50_IR_SUBOP_BAR_RED_OR : subop = 0x02; redop = 0x02; break;
+ case NV50_IR_SUBOP_BAR_ARRIVE : subop = 0x01; break;
+ default:
+ subop = 0x00;
+ assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
+ break;
+ }
+
+ if (insn->src(0).getFile() == FILE_GPR) {
+ emitInsn ((1 << 9) | 0x11d);
+ emitGPR (32, insn->src(0)); //XXX: nvdisasm shows src0==src1
+ } else {
+ ImmediateValue *imm = insn->getSrc(0)->asImm();
+ assert(imm);
+ if (insn->src(1).getFile() == FILE_GPR) {
+ emitInsn ((4 << 9) | 0x11d);
+ emitGPR (32, insn->src(1));
+ } else {
+ emitInsn ((5 << 9) | 0x11d);
+ }
+ emitField(54, 4, imm->reg.data.u32);
+ }
+
+ emitField(77, 2, subop);
+ emitField(74, 2, redop);
+
+ if (insn->srcExists(2) && (insn->predSrc != 2)) {
+ emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
+ emitPRED (87, insn->src(2));
+ } else {
+ emitField(87, 3, 7);
+ }
+}
+
+void
+CodeEmitterGV100::emitCCTL()
+{
+ if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL)
+ emitInsn(0x98f);
+ else
+ emitInsn(0x990);
+ emitField(87, 4, insn->subOp);
+ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
+ emitADDR (24, 32, 32, 0, insn->src(0));
+}
+
+void
+CodeEmitterGV100::emitMEMBAR()
+{
+ emitInsn (0x992);
+ switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) {
+ case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break;
+ case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break;
+ case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break;
+ default:
+ assert(!"invalid scope");
+ break;
+ }
+}
+
+void
+CodeEmitterGV100::emitPIXLD()
+{
+ emitInsn (0x925);
+ switch (insn->subOp) {
+ case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK
+ case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX
+ default:
+ assert(0);
+ break;
+ }
+ emitPRED (71);
+ emitGPR (16, insn->def(0));
+}
+
+void
+CodeEmitterGV100::emitPLOP3_LUT()
+{
+ uint8_t op[2] = {};
+
+ switch (insn->op) {
+ case OP_AND: op[0] = 0xf0 & 0xcc; break;
+ case OP_OR : op[0] = 0xf0 | 0xcc; break;
+ case OP_XOR: op[0] = 0xf0 ^ 0xcc; break;
+ default:
+ assert(!"invalid PLOP3");
+ break;
+ }
+
+ emitInsn(0x81c);
+ emitNOT (90, insn->src(0));
+ emitPRED(87, insn->src(0));
+ emitPRED(84); // def(1)
+ emitPRED(81, insn->def(0));
+ emitNOT (80, insn->src(1));
+ emitPRED(77, insn->src(1));
+ emitField(72, 5, op[0] >> 3);
+ emitNOT (71); // src(2)
+ emitPRED(68); // src(2)
+ emitField(64, 3, op[0] & 7);
+ emitField(16, 8, op[1]);
+}
+
+void
+CodeEmitterGV100::emitVOTE()
+{
+ const ImmediateValue *imm;
+ uint32_t u32;
+
+ int r = -1, p = -1;
+ for (int i = 0; insn->defExists(i); i++) {
+ if (insn->def(i).getFile() == FILE_GPR)
+ r = i;
+ else if (insn->def(i).getFile() == FILE_PREDICATE)
+ p = i;
+ }
+
+ emitInsn (0x806);
+ emitField(72, 2, insn->subOp);
+ if (r >= 0)
+ emitGPR (16, insn->def(r));
+ else
+ emitGPR (16);
+ if (p >= 0)
+ emitPRED (81, insn->def(p));
+ else
+ emitPRED (81);
+
+ switch (insn->src(0).getFile()) {
+ case FILE_PREDICATE:
+ emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
+ emitPRED (87, insn->src(0));
+ break;
+ case FILE_IMMEDIATE:
+ imm = insn->getSrc(0)->asImm();
+ assert(imm);
+ u32 = imm->reg.data.u32;
+ assert(u32 == 0 || u32 == 1);
+ emitField(90, 1, u32 == 0);
+ emitPRED (87);
+ break;
+ default:
+ assert(!"Unhandled src");
+ break;
+ }
+}
+
+bool
+CodeEmitterGV100::emitInstruction(Instruction *i)
+{
+ insn = i;
+
+ switch (insn->op) {
+ case OP_ABS:
+ assert(!isFloatType(insn->dType));
+ emitIABS();
+ break;
+ case OP_ADD:
+ if (isFloatType(insn->dType)) {
+ if (insn->dType == TYPE_F32)
+ emitFADD();
+ else
+ emitDADD();
+ } else {
+ emitIADD3();
+ }
+ break;
+ case OP_AFETCH:
+ emitAL2P();
+ break;
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ if (insn->def(0).getFile() == FILE_PREDICATE) {
+ emitPLOP3_LUT();
+ } else {
+ assert(!"invalid logop");
+ emitNOP();
+ }
+ break;
+ case OP_ATOM:
+ if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
+ emitATOMS();
+ else
+ if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
+ emitRED();
+ else
+ emitATOM();
+ break;
+ case OP_BAR:
+ emitBAR();
+ break;
+ case OP_BFIND:
+ emitFLO();
+ break;
+ case OP_BMSK:
+ emitBMSK();
+ break;
+ case OP_BREV:
+ emitBREV();
+ break;
+ case OP_BRA:
+ case OP_JOIN: //XXX
+ emitBRA();
+ break;
+ case OP_CCTL:
+ emitCCTL();
+ break;
+ case OP_CEIL:
+ case OP_CVT:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
+ insn->src(0).getFile() == FILE_PREDICATE)) {
+ emitMOV();
+ } else if (isFloatType(insn->dType)) {
+ if (isFloatType(insn->sType)) {
+ if (insn->sType == insn->dType)
+ emitFRND();
+ else
+ emitF2F();
+ } else {
+ emitI2F();
+ }
+ } else {
+ if (isFloatType(insn->sType)) {
+ emitF2I();
+ } else {
+ assert(!"I2I");
+ emitNOP();
+ }
+ }
+ break;
+ case OP_COS:
+ case OP_EX2:
+ case OP_LG2:
+ case OP_RCP:
+ case OP_RSQ:
+ case OP_SIN:
+ case OP_SQRT:
+ emitMUFU();
+ break;
+ case OP_DISCARD:
+ emitKILL();
+ break;
+ case OP_EMIT:
+ case OP_FINAL:
+ case OP_RESTART:
+ emitOUT();
+ break;
+ case OP_EXIT:
+ emitEXIT();
+ break;
+ case OP_EXPORT:
+ emitAST();
+ break;
+ case OP_FMA:
+ case OP_MAD:
+ if (isFloatType(insn->dType)) {
+ if (insn->dType == TYPE_F32)
+ emitFFMA();
+ else
+ emitDFMA();
+ } else {
+ if (typeSizeof(insn->dType) != 8)
+ emitIMAD();
+ else
+ emitIMAD_WIDE();
+ }
+ break;
+ case OP_JOINAT: //XXX
+ emitNOP();
+ break;
+ case OP_LINTERP:
+ emitIPA();
+ break;
+ case OP_LOAD:
+ switch (insn->src(0).getFile()) {
+ case FILE_MEMORY_CONST : emitLDC(); break;
+ case FILE_MEMORY_LOCAL : emitLDL(); break;
+ case FILE_MEMORY_SHARED: emitLDS(); break;
+ case FILE_MEMORY_GLOBAL: emitLD(); break;
+ default:
+ assert(!"invalid load");
+ emitNOP();
+ break;
+ }
+ break;
+ case OP_LOP3_LUT:
+ emitLOP3_LUT();
+ break;
+ case OP_MAX:
+ case OP_MIN:
+ if (isFloatType(insn->dType)) {
+ if (insn->dType == TYPE_F32) {
+ emitFMNMX();
+ } else {
+ assert(!"invalid FMNMX");
+ emitNOP();
+ }
+ } else {
+ assert(!"invalid MNMX");
+ emitNOP();
+ }
+ break;
+ case OP_MEMBAR:
+ emitMEMBAR();
+ break;
+ case OP_MOV:
+ emitMOV();
+ break;
+ case OP_MUL:
+ if (isFloatType(insn->dType)) {
+ if (insn->dType == TYPE_F32)
+ emitFMUL();
+ else
+ emitDMUL();
+ } else {
+ assert(!"invalid IMUL");
+ emitNOP();
+ }
+ break;
+ case OP_PERMT:
+ emitPRMT();
+ break;
+ case OP_PFETCH:
+ emitISBERD();
+ break;
+ case OP_PIXLD:
+ emitPIXLD();
+ break;
+ case OP_POPCNT:
+ emitPOPC();
+ break;
+ case OP_QUADOP:
+ emitFSWZADD();
+ break;
+ case OP_RDSV:
+ if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
+ emitCS2R();
+ else
+ emitS2R();
+ break;
+ case OP_SELP:
+ emitSEL();
+ break;
+ case OP_SET:
+ case OP_SET_AND:
+ case OP_SET_OR:
+ case OP_SET_XOR:
+ if (insn->def(0).getFile() != FILE_PREDICATE) {
+ if (isFloatType(insn->dType)) {
+ if (insn->dType == TYPE_F32) {
+ emitFSET_BF();
+ } else {
+ assert(!"invalid FSET");
+ emitNOP();
+ }
+ } else {
+ assert(!"invalid SET");
+ emitNOP();
+ }
+ } else {
+ if (isFloatType(insn->sType))
+ if (insn->sType == TYPE_F64)
+ emitDSETP();
+ else
+ emitFSETP();
+ else
+ emitISETP();
+ }
+ break;
+ case OP_SGXT:
+ emitSGXT();
+ break;
+ case OP_SHF:
+ emitSHF();
+ break;
+ case OP_SHFL:
+ emitSHFL();
+ break;
+ case OP_SHLADD:
+ emitLEA();
+ break;
+ case OP_STORE:
+ switch (insn->src(0).getFile()) {
+ case FILE_MEMORY_LOCAL : emitSTL(); break;
+ case FILE_MEMORY_SHARED: emitSTS(); break;
+ case FILE_MEMORY_GLOBAL: emitST(); break;
+ default:
+ assert(!"invalid store");
+ emitNOP();
+ break;
+ }
+ break;
+ case OP_SULDB:
+ case OP_SULDP:
+ emitSULD();
+ break;
+ case OP_SUREDB:
+ case OP_SUREDP:
+ emitSUATOM();
+ break;
+ case OP_SUSTB:
+ case OP_SUSTP:
+ emitSUST();
+ break;
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL:
+ emitTEX();
+ break;
+ case OP_TXD:
+ emitTXD();
+ break;
+ case OP_TXF:
+ emitTLD();
+ break;
+ case OP_TXG:
+ emitTLD4();
+ break;
+ case OP_TXLQ:
+ emitTMML();
+ break;
+ case OP_TXQ:
+ emitTXQ();
+ break;
+ case OP_VFETCH:
+ emitALD();
+ break;
+ case OP_VOTE:
+ emitVOTE();
+ break;
+ case OP_WARPSYNC:
+ emitWARPSYNC();
+ break;
+ default:
+ assert(!"invalid opcode");
+ emitNOP();
+ break;
+ }
+
+ code[3] &= 0x000001ff;
+ code[3] |= insn->sched << 9;
+ code += 4;
+ codeSize += 16;
+ return true;
+}
+
+void
+CodeEmitterGV100::prepareEmission(BasicBlock *bb)
+{
+ Function *func = bb->getFunction();
+ Instruction *i;
+ int j;
+
+ for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
+
+ for (; j >= 0; --j) {
+ BasicBlock *in = func->bbArray[j];
+ Instruction *exit = in->getExit();
+
+ if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
+ in->binSize -= 16;
+ func->binSize -= 16;
+
+ for (++j; j < func->bbCount; ++j)
+ func->bbArray[j]->binPos -= 16;
+
+ in->remove(exit);
+ }
+ bb->binPos = in->binPos + in->binSize;
+ if (in->binSize) // no more no-op branches to bb
+ break;
+ }
+ func->bbArray[func->bbCount++] = bb;
+
+ if (!bb->getExit())
+ return;
+
+ for (i = bb->getEntry(); i; i = i->next) {
+ i->encSize = getMinEncodingSize(i);
+ bb->binSize += i->encSize;
+ }
+
+ assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16));
+
+ func->binSize += bb->binSize;
+}
+
+void
+CodeEmitterGV100::prepareEmission(Function *func)
+{
+ SchedDataCalculatorGM107 sched(targ);
+ CodeEmitter::prepareEmission(func);
+ sched.run(func, true, true);
+}
+
+void
+CodeEmitterGV100::prepareEmission(Program *prog)
+{
+ for (ArrayList::Iterator fi = prog->allFuncs.iterator();
+ !fi.end(); fi.next()) {
+ Function *func = reinterpret_cast<Function *>(fi.get());
+ func->binPos = prog->binSize;
+ prepareEmission(func);
+ prog->binSize += func->binSize;
+ }
+
+ this->prog = prog;
+}
+
+CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target)
+ : CodeEmitter(target), targ(target)
+{
+ code = NULL;
+ codeSize = codeSizeLimit = 0;
+ relocInfo = NULL;
+}
+};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h
new file mode 100644
index 00000000000..15ab717e460
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h
@@ -0,0 +1,403 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NV50_IR_EMIT_GV100_H__
+#define __NV50_IR_EMIT_GV100_H__
+#include "codegen/nv50_ir_target_gv100.h"
+
+namespace nv50_ir {
+
+class CodeEmitterGV100 : public CodeEmitter {
+public:
+ CodeEmitterGV100(TargetGV100 *target);
+
+ virtual bool emitInstruction(Instruction *);
+ virtual uint32_t getMinEncodingSize(const Instruction *) const { return 16; }
+
+private:
+ const Program *prog;
+ const TargetGV100 *targ;
+ const Instruction *insn;
+
+ virtual void prepareEmission(Program *);
+ virtual void prepareEmission(Function *);
+ virtual void prepareEmission(BasicBlock *);
+
+ inline void emitInsn(uint32_t op) {
+ code[0] = op;
+ code[1] = 0;
+ code[2] = 0;
+ code[3] = 0;
+ if (insn->predSrc >= 0) {
+ emitField(12, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
+ emitField(15, 1, insn->cc == CC_NOT_P);
+ } else {
+ emitField(12, 3, 7);
+ }
+ };
+
+ inline void emitField(int b, int s, uint64_t v) {
+ if (b >= 0) {
+ uint64_t m = ~0ULL >> (64 - s);
+ uint64_t d = v & m;
+ assert(!(v & ~m) || (v & ~m) == ~m);
+ if (b < 64 && b + s > 64) {
+ *(uint64_t *)&code[0] |= d << b;
+ *(uint64_t *)&code[2] |= d >> (64 - b);
+ } else {
+ *(uint64_t *)&code[(b/64*2)] |= d << (b & 0x3f);
+ }
+ }
+ };
+
+ inline void emitABS(int pos, int src, bool supported)
+ {
+ if (insn->src(src).mod.abs()) {
+ assert(supported);
+ emitField(pos, 1, 1);
+ }
+ }
+
+ inline void emitABS(int pos, int src)
+ {
+ emitABS(pos, src, true);
+ }
+
+ inline void emitNEG(int pos, int src, bool supported) {
+ if (insn->src(src).mod.neg()) {
+ assert(supported);
+ emitField(pos, 1, 1);
+ }
+ }
+
+ inline void emitNEG(int pos, int src) {
+ emitNEG(pos, src, true);
+ }
+
+ inline void emitNOT(int pos) {
+ emitField(pos, 1, 0);
+ };
+
+ inline void emitNOT(int pos, const ValueRef &ref) {
+ emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
+ }
+
+ inline void emitSAT(int pos) {
+ emitField(pos, 1, insn->saturate);
+ }
+
+ inline void emitRND(int rmp, RoundMode rnd, int rip) {
+ int rm = 0, ri = 0;
+ switch (rnd) {
+ case ROUND_NI: ri = 1;
+ case ROUND_N : rm = 0; break;
+ case ROUND_MI: ri = 1;
+ case ROUND_M : rm = 1; break;
+ case ROUND_PI: ri = 1;
+ case ROUND_P : rm = 2; break;
+ case ROUND_ZI: ri = 1;
+ case ROUND_Z : rm = 3; break;
+ default:
+ assert(!"invalid round mode");
+ break;
+ }
+ emitField(rip, 1, ri);
+ emitField(rmp, 2, rm);
+ }
+
+ inline void emitRND(int pos) {
+ emitRND(pos, insn->rnd, -1);
+ }
+
+ inline void emitFMZ(int pos, int len) {
+ emitField(pos, len, insn->dnz << 1 | insn->ftz);
+ }
+
+ inline void emitPDIV(int pos) {
+ emitField(pos, 3, insn->postFactor + 4);
+ }
+
+ inline void emitO(int pos) {
+ emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
+ }
+
+ inline void emitP(int pos) {
+ emitField(pos, 1, insn->perPatch);
+ }
+
+ inline void emitCond3(int pos, CondCode code) {
+ int data = 0;
+
+ switch (code) {
+ case CC_FL : data = 0x00; break;
+ case CC_LTU:
+ case CC_LT : data = 0x01; break;
+ case CC_EQU:
+ case CC_EQ : data = 0x02; break;
+ case CC_LEU:
+ case CC_LE : data = 0x03; break;
+ case CC_GTU:
+ case CC_GT : data = 0x04; break;
+ case CC_NEU:
+ case CC_NE : data = 0x05; break;
+ case CC_GEU:
+ case CC_GE : data = 0x06; break;
+ case CC_TR : data = 0x07; break;
+ default:
+ assert(!"invalid cond3");
+ break;
+ }
+
+ emitField(pos, 3, data);
+ }
+
+ inline void emitCond4(int pos, CondCode code) {
+ int data = 0;
+
+ switch (code) {
+ case CC_FL: data = 0x00; break;
+ case CC_LT: data = 0x01; break;
+ case CC_EQ: data = 0x02; break;
+ case CC_LE: data = 0x03; break;
+ case CC_GT: data = 0x04; break;
+ case CC_NE: data = 0x05; break;
+ case CC_GE: data = 0x06; break;
+ // case CC_NUM: data = 0x07; break;
+ // case CC_NAN: data = 0x08; break;
+ case CC_LTU: data = 0x09; break;
+ case CC_EQU: data = 0x0a; break;
+ case CC_LEU: data = 0x0b; break;
+ case CC_GTU: data = 0x0c; break;
+ case CC_NEU: data = 0x0d; break;
+ case CC_GEU: data = 0x0e; break;
+ case CC_TR: data = 0x0f; break;
+ default:
+ assert(!"invalid cond4");
+ break;
+ }
+
+ emitField(pos, 4, data);
+ }
+
+ inline void emitSYS(int pos, const Value *val) {
+ int id = val ? val->reg.data.id : -1;
+
+ switch (id) {
+ case SV_LANEID : id = 0x00; break;
+ case SV_VERTEX_COUNT : id = 0x10; break;
+ case SV_INVOCATION_ID : id = 0x11; break;
+ case SV_THREAD_KILL : id = 0x13; break;
+ case SV_INVOCATION_INFO: id = 0x1d; break;
+ case SV_COMBINED_TID : id = 0x20; break;
+ case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
+ case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
+ case SV_LANEMASK_EQ : id = 0x38; break;
+ case SV_LANEMASK_LT : id = 0x39; break;
+ case SV_LANEMASK_LE : id = 0x3a; break;
+ case SV_LANEMASK_GT : id = 0x3b; break;
+ case SV_LANEMASK_GE : id = 0x3c; break;
+ case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
+ default:
+ assert(!"invalid system value");
+ id = 0;
+ break;
+ }
+
+ emitField(pos, 8, id);
+ }
+
+ inline void emitSYS(int pos, const ValueRef &ref) {
+ emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
+ }
+
+ inline void emitGPR(int pos, const Value *val, int off) {
+ emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
+ val->reg.data.id + off: 255);
+ }
+
+ inline void emitGPR(int pos, const Value *v) {
+ emitGPR(pos, v, 0);
+ }
+
+ inline void emitGPR(int pos) {
+ emitGPR(pos, (const Value *)NULL);
+ }
+
+ inline void emitGPR(int pos, const ValueRef &ref) {
+ emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
+ }
+
+ inline void emitGPR(int pos, const ValueRef *ref) {
+ emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
+ }
+
+ inline void emitGPR(int pos, const ValueDef &def) {
+ emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
+ }
+
+ inline void emitGPR(int pos, const ValueDef &def, int off) {
+ emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL, off);
+ }
+
+ inline void emitPRED(int pos, const Value *val) {
+ emitField(pos, 3, val ? val->reg.data.id : 7);
+ };
+
+ inline void emitPRED(int pos) {
+ emitPRED(pos, (const Value *)NULL);
+ }
+
+ inline void emitPRED(int pos, const ValueRef &ref) {
+ emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
+ }
+
+ inline void emitPRED(int pos, const ValueDef &def) {
+ emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
+ }
+
+ inline void emitCBUF(int buf, int gpr, int off, int len, int align,
+ const ValueRef &ref) {
+ const Value *v = ref.get();
+ const Symbol *s = v->asSym();
+
+ assert(!(s->reg.data.offset & ((1 << align) - 1)));
+
+ emitField(buf, 5, v->reg.fileIndex);
+ if (gpr >= 0)
+ emitGPR(gpr, ref.getIndirect(0));
+ emitField(off, 16, s->reg.data.offset);
+ }
+
+ inline void emitIMMD(int pos, int len, const ValueRef &ref) {
+ const ImmediateValue *imm = ref.get()->asImm();
+ uint32_t val = imm->reg.data.u32;
+
+ if (insn->sType == TYPE_F64) {
+ assert(!(imm->reg.data.u64 & 0x00000000ffffffffULL));
+ val = imm->reg.data.u64 >> 32;
+ }
+
+ emitField(pos, len, val);
+ }
+
+ inline void emitADDR(int gpr, int off, int len, int shr,
+ const ValueRef &ref) {
+ const Value *v = ref.get();
+ assert(!(v->reg.data.offset & ((1 << shr) - 1)));
+ if (gpr >= 0)
+ emitGPR(gpr, ref.getIndirect(0));
+ emitField(off, len, v->reg.data.offset >> shr);
+ }
+
+ inline void emitFormA(uint16_t op, uint8_t forms, int src0, int src1, int src2);
+ inline void emitFormA_RRR(uint16_t op, int src1, int src2);
+ inline void emitFormA_RRI(uint16_t op, int src1, int src2);
+ inline void emitFormA_RRC(uint16_t op, int src1, int src2);
+ inline void emitFormA_I32(int src);
+
+ void emitBRA();
+ void emitEXIT();
+ void emitKILL();
+ void emitNOP();
+ void emitWARPSYNC();
+
+ void emitCS2R();
+ void emitF2F();
+ void emitF2I();
+ void emitFRND();
+ void emitI2F();
+ void emitMOV();
+ void emitPRMT();
+ void emitS2R();
+ void emitSEL();
+ void emitSHFL();
+
+ void emitFADD();
+ void emitFFMA();
+ void emitFMNMX();
+ void emitFMUL();
+ void emitFSET_BF();
+ void emitFSETP();
+ void emitFSWZADD();
+ void emitMUFU();
+
+ void emitDADD();
+ void emitDFMA();
+ void emitDMUL();
+ void emitDSETP();
+
+ void emitBMSK();
+ void emitBREV();
+ void emitFLO();
+ void emitIABS();
+ void emitIADD3();
+ void emitIMAD();
+ void emitIMAD_WIDE();
+ void emitISETP();
+ void emitLEA();
+ void emitLOP3_LUT();
+ void emitPOPC();
+ void emitSGXT();
+ void emitSHF();
+
+ void emitALD();
+ void emitAST();
+ void emitATOM();
+ void emitATOMS();
+ void emitIPA();
+ void emitISBERD();
+ void emitLDSTc(int, int);
+ void emitLDSTs(int, DataType);
+ void emitLD();
+ void emitLDC();
+ void emitLDL();
+ void emitLDS();
+ void emitOUT();
+ void emitRED();
+ void emitST();
+ void emitSTL();
+ void emitSTS();
+
+ void emitTEXs(int);
+ void emitTEX();
+ void emitTLD();
+ void emitTLD4();
+ void emitTMML();
+ void emitTXD();
+ void emitTXQ();
+
+ void emitSUHandle(const int);
+ void emitSUTarget();
+ void emitSUATOM();
+ void emitSULD();
+ void emitSUST();
+
+ void emitAL2P();
+ void emitBAR();
+ void emitCCTL();
+ void emitMEMBAR();
+ void emitPIXLD();
+ void emitPLOP3_LUT();
+ void emitVOTE();
+};
+
+};
+#endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index bd78b76f384..eee9aa67256 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -170,6 +170,7 @@ private:
NirArrayLMemOffsets regToLmemOffset;
NirBlockMap blocks;
unsigned int curLoopDepth;
+ unsigned int curIfDepth;
BasicBlock *exit;
Value *zero;
@@ -188,6 +189,7 @@ Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info)
: ConverterCommon(prog, info),
nir(nir),
curLoopDepth(0),
+ curIfDepth(0),
clipVertexOutput(-1)
{
zero = mkImm((uint32_t)0);
@@ -571,6 +573,10 @@ Converter::getSubOp(nir_op op)
case nir_op_imul_high:
case nir_op_umul_high:
return NV50_IR_SUBOP_MUL_HIGH;
+ case nir_op_ishl:
+ case nir_op_ishr:
+ case nir_op_ushr:
+ return NV50_IR_SUBOP_SHIFT_WRAP;
default:
return 0;
}
@@ -909,7 +915,7 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
uint16_t slots;
switch (stage) {
case Program::TYPE_GEOMETRY:
- slots = type->uniform_locations();
+ slots = type->count_attribute_slots(false);
if (input)
slots /= info.gs.vertices_in;
break;
@@ -917,9 +923,9 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
case Program::TYPE_TESSELLATION_EVAL:
// remove first dimension
if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL))
- slots = type->uniform_locations();
+ slots = type->count_attribute_slots(false);
else
- slots = type->fields.array->uniform_locations();
+ slots = type->fields.array->count_attribute_slots(false);
break;
default:
slots = type->count_attribute_slots(false);
@@ -929,6 +935,24 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info,
return slots;
}
+static uint8_t
+getMaskForType(const glsl_type *type, uint8_t slot) {
+ uint16_t comp = type->without_array()->components();
+ comp = comp ? comp : 4;
+
+ if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
+ comp *= 2;
+ if (comp > 4) {
+ if (slot % 2)
+ comp -= 4;
+ else
+ comp = 4;
+ }
+ }
+
+ return (1 << comp) - 1;
+}
+
bool Converter::assignSlots() {
unsigned name;
unsigned index;
@@ -981,16 +1005,8 @@ bool Converter::assignSlots() {
const glsl_type *type = var->type;
int slot = var->data.location;
uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var);
- uint32_t comp = type->is_array() ? type->without_array()->component_slots()
- : type->component_slots();
- uint32_t frac = var->data.location_frac;
uint32_t vary = var->data.driver_location;
- if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
- if (comp > 2)
- slots *= 2;
- }
-
assert(vary + slots <= PIPE_MAX_SHADER_INPUTS);
switch(prog->getType()) {
@@ -1014,6 +1030,8 @@ bool Converter::assignSlots() {
info->numPatchConstants = MAX2(info->numPatchConstants, index + slots);
break;
case Program::TYPE_VERTEX:
+ if (slot >= VERT_ATTRIB_GENERIC0)
+ slot = VERT_ATTRIB_GENERIC0 + vary;
vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index);
switch (name) {
case TGSI_SEMANTIC_EDGEFLAG:
@@ -1029,17 +1047,12 @@ bool Converter::assignSlots() {
}
for (uint16_t i = 0u; i < slots; ++i, ++vary) {
- info->in[vary].id = vary;
- info->in[vary].patch = var->data.patch;
- info->in[vary].sn = name;
- info->in[vary].si = index + i;
- if (glsl_base_type_is_64bit(type->without_array()->base_type))
- if (i & 0x1)
- info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
- else
- info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
- else
- info->in[vary].mask |= ((1 << comp) - 1) << frac;
+ nv50_ir_varying *v = &info->in[vary];
+
+ v->patch = var->data.patch;
+ v->sn = name;
+ v->si = index + i;
+ v->mask |= getMaskForType(type, i) << var->data.location_frac;
}
info->numInputs = std::max<uint8_t>(info->numInputs, vary);
}
@@ -1048,16 +1061,8 @@ bool Converter::assignSlots() {
const glsl_type *type = var->type;
int slot = var->data.location;
uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var);
- uint32_t comp = type->is_array() ? type->without_array()->component_slots()
- : type->component_slots();
- uint32_t frac = var->data.location_frac;
uint32_t vary = var->data.driver_location;
- if (glsl_base_type_is_64bit(type->without_array()->base_type)) {
- if (comp > 2)
- slots *= 2;
- }
-
assert(vary < PIPE_MAX_SHADER_OUTPUTS);
switch(prog->getType()) {
@@ -1067,7 +1072,11 @@ bool Converter::assignSlots() {
case TGSI_SEMANTIC_COLOR:
if (!var->data.fb_fetch_output)
info->prop.fp.numColourResults++;
- info->prop.fp.separateFragData = true;
+
+ if (var->data.location == FRAG_RESULT_COLOR &&
+ nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
+ info->prop.fp.separateFragData = true;
+
// sometimes we get FRAG_RESULT_DATAX with data.index 0
// sometimes we get FRAG_RESULT_DATA0 with data.index X
index = index == 0 ? var->data.index : index;
@@ -1118,20 +1127,14 @@ bool Converter::assignSlots() {
}
for (uint16_t i = 0u; i < slots; ++i, ++vary) {
- info->out[vary].id = vary;
- info->out[vary].patch = var->data.patch;
- info->out[vary].sn = name;
- info->out[vary].si = index + i;
- if (glsl_base_type_is_64bit(type->without_array()->base_type))
- if (i & 0x1)
- info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4);
- else
- info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf);
- else
- info->out[vary].mask |= ((1 << comp) - 1) << frac;
+ nv50_ir_varying *v = &info->out[vary];
+ v->patch = var->data.patch;
+ v->sn = name;
+ v->si = index + i;
+ v->mask |= getMaskForType(type, i) << var->data.location_frac;
if (nir->info.outputs_read & 1ull << slot)
- info->out[vary].oread = 1;
+ v->oread = 1;
}
info->numOutputs = std::max<uint8_t>(info->numOutputs, vary);
}
@@ -1275,6 +1278,7 @@ Converter::parseNIR()
info->bin.tlsSpace = 0;
info->io.clipDistances = nir->info.clip_distance_array_size;
info->io.cullDistances = nir->info.cull_distance_array_size;
+ info->io.layer_viewport_relative = nir->info.layer_viewport_relative;
switch(prog->getType()) {
case Program::TYPE_COMPUTE:
@@ -1291,7 +1295,7 @@ Converter::parseNIR()
info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage;
info->prop.fp.readsSampleLocations =
(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS);
- info->prop.fp.usesDiscard = nir->info.fs.uses_discard;
+ info->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote;
info->prop.fp.usesSampleMaskIn =
!!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN);
break;
@@ -1426,64 +1430,69 @@ Converter::visit(nir_block *block)
bool
Converter::visit(nir_if *nif)
{
+ curIfDepth++;
+
DataType sType = getSType(nif->condition, false, false);
Value *src = getSrc(&nif->condition, 0);
nir_block *lastThen = nir_if_last_then_block(nif);
nir_block *lastElse = nir_if_last_else_block(nif);
- assert(!lastThen->successors[1]);
- assert(!lastElse->successors[1]);
-
+ BasicBlock *headBB = bb;
BasicBlock *ifBB = convert(nir_if_first_then_block(nif));
BasicBlock *elseBB = convert(nir_if_first_else_block(nif));
bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE);
bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE);
- // we only insert joinats, if both nodes end up at the end of the if again.
- // the reason for this to not happens are breaks/continues/ret/... which
- // have their own handling
- if (lastThen->successors[0] == lastElse->successors[0])
- bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]),
- CC_ALWAYS, NULL);
-
+ bool insertJoins = lastThen->successors[0] == lastElse->successors[0];
mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType);
foreach_list_typed(nir_cf_node, node, node, &nif->then_list) {
if (!visit(node))
return false;
}
+
setPosition(convert(lastThen), true);
- if (!bb->getExit() ||
- !bb->getExit()->asFlow() ||
- bb->getExit()->asFlow()->op == OP_JOIN) {
+ if (!bb->isTerminated()) {
BasicBlock *tailBB = convert(lastThen->successors[0]);
mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
+ } else {
+ insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
}
foreach_list_typed(nir_cf_node, node, node, &nif->else_list) {
if (!visit(node))
return false;
}
+
setPosition(convert(lastElse), true);
- if (!bb->getExit() ||
- !bb->getExit()->asFlow() ||
- bb->getExit()->asFlow()->op == OP_JOIN) {
+ if (!bb->isTerminated()) {
BasicBlock *tailBB = convert(lastElse->successors[0]);
mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL);
bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD);
+ } else {
+ insertJoins = insertJoins && bb->getExit()->op == OP_BRA;
}
- if (lastThen->successors[0] == lastElse->successors[0]) {
- setPosition(convert(lastThen->successors[0]), true);
+ /* only insert joins for the most outer if */
+ if (--curIfDepth)
+ insertJoins = false;
+
+ /* we made sure that all threads would converge at the same block */
+ if (insertJoins) {
+ BasicBlock *conv = convert(lastThen->successors[0]);
+ setPosition(headBB->getExit(), false);
+ headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL);
+ setPosition(conv, false);
mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1;
}
return true;
}
+// TODO: add convergency
bool
Converter::visit(nir_loop *loop)
{
@@ -1491,8 +1500,8 @@ Converter::visit(nir_loop *loop)
func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth);
BasicBlock *loopBB = convert(nir_loop_first_block(loop));
- BasicBlock *tailBB =
- convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
+ BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)));
+
bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE);
mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL);
@@ -1503,19 +1512,15 @@ Converter::visit(nir_loop *loop)
if (!visit(node))
return false;
}
- Instruction *insn = bb->getExit();
- if (bb->cfg.incidentCount() != 0) {
- if (!insn || !insn->asFlow()) {
- mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
- bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
- } else if (insn && insn->op == OP_BRA && !insn->getPredicate() &&
- tailBB->cfg.incidentCount() == 0) {
- // RA doesn't like having blocks around with no incident edge,
- // so we create a fake one to make it happy
- bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
- }
+
+ if (!bb->isTerminated()) {
+ mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL);
+ bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
}
+ if (tailBB->cfg.incidentCount() == 0)
+ loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE);
+
curLoopDepth -= 1;
return true;
@@ -1560,6 +1565,7 @@ Converter::convert(nir_intrinsic_op intr)
return SV_DRAWID;
case nir_intrinsic_load_front_face:
return SV_FACE;
+ case nir_intrinsic_is_helper_invocation:
case nir_intrinsic_load_helper_invocation:
return SV_THREAD_KILL;
case nir_intrinsic_load_instance_id:
@@ -1617,6 +1623,7 @@ Converter::visit(nir_intrinsic_instr *insn)
{
nir_intrinsic_op op = insn->intrinsic;
const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
+ unsigned dest_components = nir_intrinsic_dest_components(insn);
switch (op) {
case nir_intrinsic_load_uniform: {
@@ -1624,7 +1631,7 @@ Converter::visit(nir_intrinsic_instr *insn)
const DataType dType = getDType(insn);
Value *indirect;
uint32_t coffset = getIndirect(insn, 0, 0, indirect);
- for (uint8_t i = 0; i < insn->num_components; ++i) {
+ for (uint8_t i = 0; i < dest_components; ++i) {
loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
}
break;
@@ -1635,7 +1642,7 @@ Converter::visit(nir_intrinsic_instr *insn)
DataType dType = getSType(insn->src[0], false, false);
uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
if (!((1u << i) & nir_intrinsic_write_mask(insn)))
continue;
@@ -1652,6 +1659,7 @@ Converter::visit(nir_intrinsic_instr *insn)
break;
}
case Program::TYPE_GEOMETRY:
+ case Program::TYPE_TESSELLATION_EVAL:
case Program::TYPE_VERTEX: {
if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) {
mkMov(clipVtx[i], src);
@@ -1688,7 +1696,7 @@ Converter::visit(nir_intrinsic_instr *insn)
srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < dest_components; ++i) {
defs.push_back(newDefs[i]);
mask |= 1 << i;
}
@@ -1715,15 +1723,25 @@ Converter::visit(nir_intrinsic_instr *insn)
// see load_barycentric_* handling
if (prog->getType() == Program::TYPE_FRAGMENT) {
- mode = translateInterpMode(&vary, nvirOp);
if (op == nir_intrinsic_load_interpolated_input) {
ImmediateValue immMode;
if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode))
- mode |= immMode.reg.data.u32;
+ mode = immMode.reg.data.u32;
+ }
+ if (mode == NV50_IR_INTERP_DEFAULT)
+ mode |= translateInterpMode(&vary, nvirOp);
+ else {
+ if (vary.linear) {
+ nvirOp = OP_LINTERP;
+ mode |= NV50_IR_INTERP_LINEAR;
+ } else {
+ nvirOp = OP_PINTERP;
+ mode |= NV50_IR_INTERP_PERSPECTIVE;
+ }
}
}
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < dest_components; ++i) {
uint32_t address = getSlotAddress(insn, idx, i);
Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
if (prog->getType() == Program::TYPE_FRAGMENT) {
@@ -1814,9 +1832,11 @@ Converter::visit(nir_intrinsic_instr *insn)
loadImm(newDefs[1], mode);
break;
}
+ case nir_intrinsic_demote:
case nir_intrinsic_discard:
mkOp(OP_DISCARD, TYPE_NONE, NULL);
break;
+ case nir_intrinsic_demote_if:
case nir_intrinsic_discard_if: {
Value *pred = getSSA(1, FILE_PREDICATE);
if (insn->num_components > 1) {
@@ -1832,6 +1852,7 @@ Converter::visit(nir_intrinsic_instr *insn)
case nir_intrinsic_load_base_instance:
case nir_intrinsic_load_draw_id:
case nir_intrinsic_load_front_face:
+ case nir_intrinsic_is_helper_invocation:
case nir_intrinsic_load_helper_invocation:
case nir_intrinsic_load_instance_id:
case nir_intrinsic_load_invocation_id:
@@ -1858,7 +1879,7 @@ Converter::visit(nir_intrinsic_instr *insn)
SVSemantic sv = convert(op);
LValues &newDefs = convert(&insn->dest);
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
Value *def;
if (typeSizeof(dType) == 8)
def = getSSA();
@@ -1910,12 +1931,12 @@ Converter::visit(nir_intrinsic_instr *insn)
if (op == nir_intrinsic_read_first_invocation) {
mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
- mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
} else
tmp = getSrc(&insn->src[1], 0);
- for (uint8_t i = 0; i < insn->num_components; ++i) {
+ for (uint8_t i = 0; i < dest_components; ++i) {
mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
->subOp = NV50_IR_SUBOP_SHFL_IDX;
}
@@ -1931,7 +1952,7 @@ Converter::visit(nir_intrinsic_instr *insn)
Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
mkImm(baseVertex), indirectVertex);
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < dest_components; ++i) {
uint32_t address = getSlotAddress(insn, idx, i);
loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
indirectOffset, vtxBase, info->in[idx].patch);
@@ -1954,19 +1975,24 @@ Converter::visit(nir_intrinsic_instr *insn)
vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < dest_components; ++i) {
uint32_t address = getSlotAddress(insn, idx, i);
loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
indirectOffset, vtxBase, info->in[idx].patch);
}
break;
}
- case nir_intrinsic_emit_vertex:
+ case nir_intrinsic_emit_vertex: {
if (info->io.genUserClip > 0)
handleUserClipPlanes();
- // fallthrough
+ uint32_t idx = nir_intrinsic_stream_id(insn);
+ mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
+ break;
+ }
case nir_intrinsic_end_primitive: {
uint32_t idx = nir_intrinsic_stream_id(insn);
+ if (idx)
+ break;
mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1;
break;
}
@@ -1978,7 +2004,7 @@ Converter::visit(nir_intrinsic_instr *insn)
uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < dest_components; ++i) {
loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
indirectOffset, indirectIndex);
}
@@ -2001,7 +2027,7 @@ Converter::visit(nir_intrinsic_instr *insn)
uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
if (!((1u << i) & nir_intrinsic_write_mask(insn)))
continue;
Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
@@ -2020,7 +2046,7 @@ Converter::visit(nir_intrinsic_instr *insn)
uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
- for (uint8_t i = 0u; i < insn->num_components; ++i)
+ for (uint8_t i = 0u; i < dest_components; ++i)
loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
indirectOffset, indirectBuffer);
@@ -2314,7 +2340,7 @@ Converter::visit(nir_intrinsic_instr *insn)
Value *indirectOffset;
uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
- for (uint8_t i = 0u; i < insn->num_components; ++i) {
+ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
if (!((1u << i) & nir_intrinsic_write_mask(insn)))
continue;
Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
@@ -2328,7 +2354,7 @@ Converter::visit(nir_intrinsic_instr *insn)
Value *indirectOffset;
uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
- for (uint8_t i = 0u; i < insn->num_components; ++i)
+ for (uint8_t i = 0u; i < dest_components; ++i)
loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
break;
@@ -2367,7 +2393,7 @@ Converter::visit(nir_intrinsic_instr *insn)
Value *indirectOffset;
uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
- for (auto i = 0u; i < insn->num_components; ++i)
+ for (auto i = 0u; i < dest_components; ++i)
loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
info->io.globalAccess |= 0x1;
@@ -2376,7 +2402,7 @@ Converter::visit(nir_intrinsic_instr *insn)
case nir_intrinsic_store_global: {
DataType sType = getSType(insn->src[0], false, false);
- for (auto i = 0u; i < insn->num_components; ++i) {
+ for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
if (!((1u << i) & nir_intrinsic_write_mask(insn)))
continue;
if (typeSizeof(sType) == 8) {
@@ -2418,7 +2444,6 @@ Converter::visit(nir_jump_instr *insn)
case nir_jump_continue: {
bool isBreak = insn->type == nir_jump_break;
nir_block *block = insn->instr.block;
- assert(!block->successors[1]);
BasicBlock *target = convert(block->successors[0]);
mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL);
bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK);
@@ -2774,7 +2799,7 @@ Converter::visit(nir_alu_instr *insn)
case nir_op_bfm: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
- mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
+ mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
break;
}
case nir_op_bitfield_insert: {
@@ -2794,17 +2819,69 @@ Converter::visit(nir_alu_instr *insn)
case nir_op_bitfield_reverse: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
- mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
break;
}
case nir_op_find_lsb: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
Value *tmp = getSSA();
- mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
break;
}
+ case nir_op_extract_u8: {
+ DEFAULT_CHECKS;
+ LValues &newDefs = convert(&insn->dest);
+ Value *prmt = getSSA();
+ mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
+ mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
+ break;
+ }
+ case nir_op_extract_i8: {
+ DEFAULT_CHECKS;
+ LValues &newDefs = convert(&insn->dest);
+ Value *prmt = getSSA();
+ mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
+ mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
+ break;
+ }
+ case nir_op_extract_u16: {
+ DEFAULT_CHECKS;
+ LValues &newDefs = convert(&insn->dest);
+ Value *prmt = getSSA();
+ mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
+ mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
+ break;
+ }
+ case nir_op_extract_i16: {
+ DEFAULT_CHECKS;
+ LValues &newDefs = convert(&insn->dest);
+ Value *prmt = getSSA();
+ mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
+ mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
+ break;
+ }
+ case nir_op_urol: {
+ DEFAULT_CHECKS;
+ LValues &newDefs = convert(&insn->dest);
+ mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
+ getSrc(&insn->src[1]), getSrc(&insn->src[0]))
+ ->subOp = NV50_IR_SUBOP_SHF_L |
+ NV50_IR_SUBOP_SHF_W |
+ NV50_IR_SUBOP_SHF_HI;
+ break;
+ }
+ case nir_op_uror: {
+ DEFAULT_CHECKS;
+ LValues &newDefs = convert(&insn->dest);
+ mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
+ getSrc(&insn->src[1]), getSrc(&insn->src[0]))
+ ->subOp = NV50_IR_SUBOP_SHF_R |
+ NV50_IR_SUBOP_SHF_W |
+ NV50_IR_SUBOP_SHF_LO;
+ break;
+ }
// boolean conversions
case nir_op_b2f32: {
DEFAULT_CHECKS;
@@ -2990,14 +3067,11 @@ Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_vari
CacheMode
Converter::convert(enum gl_access_qualifier access)
{
- switch (access) {
- case ACCESS_VOLATILE:
+ if (access & ACCESS_VOLATILE)
return CACHE_CV;
- case ACCESS_COHERENT:
+ if (access & ACCESS_COHERENT)
return CACHE_CG;
- default:
- return CACHE_CA;
- }
+ return CACHE_CA;
}
CacheMode
@@ -3224,6 +3298,11 @@ Converter::run()
NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
NIR_PASS_V(nir, nir_lower_phis_to_scalar);
+ /*TODO: improve this lowering/optimisation loop so that we can use
+ * nir_opt_idiv_const effectively before this.
+ */
+ NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise);
+
do {
progress = false;
NIR_PASS(progress, nir, nir_copy_prop);
@@ -3285,3 +3364,125 @@ Program::makeFromNIR(struct nv50_ir_prog_info *info)
}
} // namespace nv50_ir
+
+static nir_shader_compiler_options
+nvir_nir_shader_compiler_options(int chipset)
+{
+ nir_shader_compiler_options op = {};
+ op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
+ op.lower_ffma = false;
+ op.fuse_ffma = false; /* nir doesn't track mad vs fma */
+ op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
+ op.lower_flrp32 = true;
+ op.lower_flrp64 = true;
+ op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it
+ op.lower_fsat = false;
+ op.lower_fsqrt = false; // TODO: only before gm200
+ op.lower_sincos = false;
+ op.lower_fmod = true;
+ op.lower_bitfield_extract = false;
+ op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET);
+ op.lower_bitfield_insert = false;
+ op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET);
+ op.lower_bitfield_insert_to_bitfield_select = false;
+ op.lower_bitfield_reverse = false;
+ op.lower_bit_count = false;
+ op.lower_ifind_msb = false;
+ op.lower_find_lsb = false;
+ op.lower_uadd_carry = true; // TODO
+ op.lower_usub_borrow = true; // TODO
+ op.lower_mul_high = false;
+ op.lower_negate = false;
+ op.lower_sub = true;
+ op.lower_scmp = true; // TODO: not implemented yet
+ op.lower_vector_cmp = false;
+ op.lower_idiv = true;
+ op.lower_bitops = false;
+ op.lower_isign = (chipset >= NVISA_GV100_CHIPSET);
+ op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET);
+ op.lower_fdph = false;
+ op.lower_fdot = false;
+ op.fdot_replicates = false; // TODO
+ op.lower_ffloor = false; // TODO
+ op.lower_ffract = true;
+ op.lower_fceil = false; // TODO
+ op.lower_ftrunc = false;
+ op.lower_ldexp = true;
+ op.lower_pack_half_2x16 = true;
+ op.lower_pack_unorm_2x16 = true;
+ op.lower_pack_snorm_2x16 = true;
+ op.lower_pack_unorm_4x8 = true;
+ op.lower_pack_snorm_4x8 = true;
+ op.lower_unpack_half_2x16 = true;
+ op.lower_unpack_unorm_2x16 = true;
+ op.lower_unpack_snorm_2x16 = true;
+ op.lower_unpack_unorm_4x8 = true;
+ op.lower_unpack_snorm_4x8 = true;
+ op.lower_pack_split = false;
+ op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
+ op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
+ op.lower_all_io_to_temps = false;
+ op.lower_all_io_to_elements = false;
+ op.vertex_id_zero_based = false;
+ op.lower_base_vertex = false;
+ op.lower_helper_invocation = false;
+ op.optimize_sample_mask_in = false;
+ op.lower_cs_local_index_from_id = true;
+ op.lower_cs_local_id_from_index = false;
+ op.lower_device_index_to_zero = false; // TODO
+ op.lower_wpos_pntc = false; // TODO
+ op.lower_hadd = true; // TODO
+ op.lower_add_sat = true; // TODO
+ op.vectorize_io = false;
+ op.lower_to_scalar = false;
+ op.unify_interfaces = false;
+ op.use_interpolated_input_intrinsics = true;
+ op.lower_mul_2x32_64 = true; // TODO
+ op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);
+ op.has_imul24 = false;
+ op.intel_vec4 = false;
+ op.max_unroll_iterations = 32;
+ op.lower_int64_options = (nir_lower_int64_options) (
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
+ nir_lower_divmod64 |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
+ ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
+ nir_lower_ufind_msb64
+ );
+ op.lower_doubles_options = (nir_lower_doubles_options) (
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
+ nir_lower_dmod |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
+ );
+ return op;
+}
+
+static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
+nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET);
+static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
+nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET);
+static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
+nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET);
+
+const nir_shader_compiler_options *
+nv50_ir_nir_shader_compiler_options(int chipset)
+{
+ if (chipset >= NVISA_GV100_CHIPSET)
+ return &gv100_nir_shader_compiler_options;
+ if (chipset >= NVISA_GM107_CHIPSET)
+ return &gm107_nir_shader_compiler_options;
+ return &gf100_nir_shader_compiler_options;
+}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 60f3d582a0b..3fd76f64de0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -3401,8 +3401,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
// ReadInvocationARB(src, findLSB(ballot(true)))
val0 = getScratch();
mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
- mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
- ->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, val0, val0);
mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
src1 = val0;
/* fallthrough */
@@ -3820,8 +3819,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
val0 = getScratch();
- geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
- geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, val0, src0);
geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
}
@@ -3836,8 +3834,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
case TGSI_OPCODE_BREV:
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
src0 = fetchSrc(0, c);
- geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
- geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ mkOp1(OP_BREV, TYPE_U32, dst0[c], src0);
}
break;
case TGSI_OPCODE_POPC:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 49a5f3b01f2..9fad1dcfe89 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -239,9 +239,8 @@ GM107LoweringPass::handlePFETCH(Instruction *i)
Value *tmp1 = bld.getScratch();
Value *tmp2 = bld.getScratch();
bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
- bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
- bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
- bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
+ bld.mkOp3(OP_PERMT, TYPE_U32, tmp1, tmp0, bld.mkImm(0x4442), bld.mkImm(0));
+ bld.mkOp3(OP_PERMT, TYPE_U32, tmp0, tmp0, bld.mkImm(0x4440), bld.mkImm(0));
if (i->getSrc(1))
bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
else
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
index 71e5ea6417a..dfa1d035dac 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
@@ -21,6 +21,7 @@ class GM107LegalizeSSA : public NVC0LegalizeSSA
private:
virtual bool visit(Instruction *);
+protected:
void handlePFETCH(Instruction *);
void handleLOAD(Instruction *);
};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
new file mode 100644
index 00000000000..644d4928327
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
@@ -0,0 +1,481 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "codegen/nv50_ir.h"
+#include "codegen/nv50_ir_build_util.h"
+
+#include "codegen/nv50_ir_target_nvc0.h"
+#include "codegen/nv50_ir_lowering_gv100.h"
+
+#include <limits>
+
+namespace nv50_ir {
+
+bool
+GV100LegalizeSSA::handleCMP(Instruction *i)
+{
+ Value *pred = bld.getSSA(1, FILE_PREDICATE);
+
+ bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred,
+ i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz;
+ bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
+ return true;
+}
+
+// NIR deals with most of these for us, but codegen generates more in pointer
+// calculations from other lowering passes.
+bool
+GV100LegalizeSSA::handleIADD64(Instruction *i)
+{
+ Value *carry = bld.getSSA(1, FILE_PREDICATE);
+ Value *def[2] = { bld.getSSA(), bld.getSSA() };
+ Value *src[2][2];
+
+ for (int s = 0; s < 2; s++) {
+ if (i->getSrc(s)->reg.size == 8) {
+ bld.mkSplit(src[s], 4, i->getSrc(s));
+ } else {
+ src[s][0] = i->getSrc(s);
+ src[s][1] = bld.mkImm(0);
+ }
+ }
+
+ bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])->
+ setFlagsDef(1, carry);
+ bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])->
+ setFlagsSrc(2, carry);
+ bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i)
+{
+ Value *def = bld.getSSA(8), *defs[2];
+ Value *src2;
+
+ if (i->srcExists(2) &&
+ (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) {
+ Value *src2s[2] = { bld.getSSA(), bld.getSSA() };
+ bld.mkMov(src2s[0], bld.mkImm(0));
+ bld.mkMov(src2s[1], i->getSrc(2));
+ src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0);
+ } else {
+ src2 = bld.mkImm(0);
+ }
+
+ bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def,
+ i->getSrc(0), i->getSrc(1), src2);
+
+ bld.mkSplit(defs, 4, def);
+ i->def(0).replace(defs[1], false);
+ return true;
+}
+
+// XXX: We should be able to do this in GV100LoweringPass, but codegen messes
+// up somehow and swaps the condcode without swapping the sources.
+// - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
+bool
+GV100LegalizeSSA::handleIMNMX(Instruction *i)
+{
+ Value *pred = bld.getSSA(1, FILE_PREDICATE);
+
+ bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred,
+ i->sType, i->getSrc(0), i->getSrc(1));
+ bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleIMUL(Instruction *i)
+{
+ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ return handleIMAD_HIGH(i);
+
+ bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
+ bld.mkImm(0));
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleLOP2(Instruction *i)
+{
+ uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0;
+ uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1;
+ uint8_t subOp;
+
+ if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
+ src0 = ~src0;
+ if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
+ src1 = ~src1;
+
+ switch (i->op) {
+ case OP_AND: subOp = src0 & src1; break;
+ case OP_OR : subOp = src0 | src1; break;
+ case OP_XOR: subOp = src0 ^ src1; break;
+ default:
+ assert(!"invalid LOP2 opcode");
+ break;
+ }
+
+ bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1),
+ bld.mkImm(0))->subOp = subOp;
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleNOT(Instruction *i)
+{
+ bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0),
+ bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1;
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handlePREEX2(Instruction *i)
+{
+ i->def(0).replace(i->src(0), false);
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleQUADON(Instruction *i)
+{
+ handleSHFL(i); // Inserts OP_WARPSYNC
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleQUADPOP(Instruction *i)
+{
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleSET(Instruction *i)
+{
+ Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
+ Value *pred = bld.getSSA(1, FILE_PREDICATE), *met;
+ Instruction *xsetp;
+
+ if (isFloatType(i->dType)) {
+ if (i->sType == TYPE_F32)
+ return false; // HW has FSET.BF
+ met = bld.mkImm(0x3f800000);
+ } else {
+ met = bld.mkImm(0xffffffff);
+ }
+
+ xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType,
+ i->getSrc(0), i->getSrc(1));
+ xsetp->src(0).mod = i->src(0).mod;
+ xsetp->src(1).mod = i->src(1).mod;
+ xsetp->setSrc(2, src2);
+ xsetp->ftz = i->ftz;
+
+ i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred);
+ i->src(2).mod = Modifier(NV50_IR_MOD_NOT);
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleSHFL(Instruction *i)
+{
+ Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE);
+ sync->fixed = 1;
+ sync->setSrc(0, bld.mkImm(0xffffffff));
+ i->bb->insertBefore(i, sync);
+ return false;
+}
+
+bool
+GV100LegalizeSSA::handleShift(Instruction *i)
+{
+ Value *zero = bld.mkImm(0);
+ Value *src1 = i->getSrc(1);
+ Value *src0, *src2;
+ uint8_t subOp = i->op == OP_SHL ? NV50_IR_SUBOP_SHF_L : NV50_IR_SUBOP_SHF_R;
+
+ if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) {
+ src0 = i->getSrc(0);
+ src2 = zero;
+ } else {
+ src0 = zero;
+ src2 = i->getSrc(0);
+ subOp |= NV50_IR_SUBOP_SHF_HI;
+ }
+ if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP)
+ subOp |= NV50_IR_SUBOP_SHF_W;
+
+ bld.mkOp3(OP_SHF, i->dType, i->getDef(0), src0, src1, src2)->subOp = subOp;
+ return true;
+}
+
+bool
+GV100LegalizeSSA::handleSUB(Instruction *i)
+{
+ Instruction *xadd =
+ bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1));
+ xadd->src(0).mod = i->src(0).mod;
+ xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
+ xadd->ftz = i->ftz;
+ return true;
+}
+
+bool
+GV100LegalizeSSA::visit(Instruction *i)
+{
+ bool lowered = false;
+
+ bld.setPosition(i, false);
+ if (i->sType == TYPE_F32 && i->dType != TYPE_F16 &&
+ prog->getType() != Program::TYPE_COMPUTE)
+ handleFTZ(i);
+
+ switch (i->op) {
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ if (i->def(0).getFile() != FILE_PREDICATE)
+ lowered = handleLOP2(i);
+ break;
+ case OP_NOT:
+ lowered = handleNOT(i);
+ break;
+ case OP_SHL:
+ case OP_SHR:
+ lowered = handleShift(i);
+ break;
+ case OP_SET:
+ case OP_SET_AND:
+ case OP_SET_OR:
+ case OP_SET_XOR:
+ if (i->def(0).getFile() != FILE_PREDICATE)
+ lowered = handleSET(i);
+ break;
+ case OP_SLCT:
+ lowered = handleCMP(i);
+ break;
+ case OP_PREEX2:
+ lowered = handlePREEX2(i);
+ break;
+ case OP_MUL:
+ if (!isFloatType(i->dType))
+ lowered = handleIMUL(i);
+ break;
+ case OP_MAD:
+ if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH)
+ lowered = handleIMAD_HIGH(i);
+ break;
+ case OP_SHFL:
+ lowered = handleSHFL(i);
+ break;
+ case OP_QUADON:
+ lowered = handleQUADON(i);
+ break;
+ case OP_QUADPOP:
+ lowered = handleQUADPOP(i);
+ break;
+ case OP_SUB:
+ lowered = handleSUB(i);
+ break;
+ case OP_MAX:
+ case OP_MIN:
+ if (!isFloatType(i->dType))
+ lowered = handleIMNMX(i);
+ break;
+ case OP_ADD:
+ if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8)
+ lowered = handleIADD64(i);
+ break;
+ case OP_PFETCH:
+ handlePFETCH(i);
+ break;
+ case OP_LOAD:
+ handleLOAD(i);
+ break;
+ default:
+ break;
+ }
+
+ if (lowered)
+ delete_Instruction(prog, i);
+
+ return true;
+}
+
+bool
+GV100LoweringPass::handleDMNMX(Instruction *i)
+{
+ Value *pred = bld.getSSA(1, FILE_PREDICATE);
+ Value *src0[2], *src1[2], *dest[2];
+
+ bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred,
+ i->sType, i->getSrc(0), i->getSrc(1));
+ bld.mkSplit(src0, 4, i->getSrc(0));
+ bld.mkSplit(src1, 4, i->getSrc(1));
+ bld.mkSplit(dest, 4, i->getDef(0));
+ bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred);
+ bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred);
+ bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]);
+ return true;
+}
+
+bool
+GV100LoweringPass::handleEXTBF(Instruction *i)
+{
+ Value *bit = bld.getScratch();
+ Value *cnt = bld.getScratch();
+ Value *mask = bld.getScratch();
+ Value *zero = bld.mkImm(0);
+
+ bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
+ bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
+ bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt);
+ bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask);
+ bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit);
+ if (isSignedType(i->dType))
+ bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt);
+
+ return true;
+}
+
+bool
+GV100LoweringPass::handleFLOW(Instruction *i)
+{
+ i->op = OP_BRA;
+ return false;
+}
+
+bool
+GV100LoweringPass::handleI2I(Instruction *i)
+{
+ bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))->
+ subOp = i->subOp;
+ bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0));
+ return true;
+}
+
+bool
+GV100LoweringPass::handleINSBF(Instruction *i)
+{
+ Value *bit = bld.getScratch();
+ Value *cnt = bld.getScratch();
+ Value *mask = bld.getScratch();
+ Value *src0 = bld.getScratch();
+ Value *zero = bld.mkImm(0);
+
+ bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
+ bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
+ bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt);
+
+ bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask);
+ bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit);
+
+ bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit);
+ bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)->
+ subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c));
+
+ return true;
+}
+
+bool
+GV100LoweringPass::handlePINTERP(Instruction *i)
+{
+ Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
+ Instruction *ipa, *mul;
+
+ ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2);
+ ipa->ipa = i->ipa;
+ mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1));
+
+ if (i->getInterpMode() == NV50_IR_INTERP_SC) {
+ ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE));
+ mul->setPredicate(CC_NOT_P, ipa->getDef(1));
+ }
+
+ return true;
+}
+
+bool
+GV100LoweringPass::handlePREFLOW(Instruction *i)
+{
+ return true;
+}
+
+bool
+GV100LoweringPass::handlePRESIN(Instruction *i)
+{
+ const float f = 1.0 / (2.0 * 3.14159265);
+ bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f));
+ return true;
+}
+
+bool
+GV100LoweringPass::visit(Instruction *i)
+{
+ bool lowered = false;
+
+ bld.setPosition(i, false);
+
+ switch (i->op) {
+ case OP_BREAK:
+ case OP_CONT:
+ lowered = handleFLOW(i);
+ break;
+ case OP_PREBREAK:
+ case OP_PRECONT:
+ lowered = handlePREFLOW(i);
+ break;
+ case OP_CVT:
+ if (i->src(0).getFile() != FILE_PREDICATE &&
+ i->def(0).getFile() != FILE_PREDICATE &&
+ !isFloatType(i->dType) && !isFloatType(i->sType))
+ lowered = handleI2I(i);
+ break;
+ case OP_EXTBF:
+ lowered = handleEXTBF(i);
+ break;
+ case OP_INSBF:
+ lowered = handleINSBF(i);
+ break;
+ case OP_MAX:
+ case OP_MIN:
+ if (i->dType == TYPE_F64)
+ lowered = handleDMNMX(i);
+ break;
+ case OP_PINTERP:
+ lowered = handlePINTERP(i);
+ break;
+ case OP_PRESIN:
+ lowered = handlePRESIN(i);
+ break;
+ default:
+ break;
+ }
+
+ if (lowered)
+ delete_Instruction(prog, i);
+
+ return true;
+}
+
+} // namespace nv50_ir
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h
new file mode 100644
index 00000000000..d918c6e83eb
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NV50_IR_LOWERING_GV100_H__
+#define __NV50_IR_LOWERING_GV100_H__
+#include "codegen/nv50_ir_lowering_gm107.h"
+
+namespace nv50_ir {
+
+class GV100LoweringPass : public Pass
+{
+public:
+ GV100LoweringPass(Program *p) {
+ bld.setProgram(p);
+ }
+
+private:
+ BuildUtil bld;
+
+ virtual bool visit(Instruction *);
+
+ bool handleDMNMX(Instruction *);
+ bool handleEXTBF(Instruction *);
+ bool handleFLOW(Instruction *);
+ bool handleI2I(Instruction *);
+ bool handleINSBF(Instruction *);
+ bool handlePINTERP(Instruction *);
+ bool handlePREFLOW(Instruction *);
+ bool handlePRESIN(Instruction *);
+};
+
+class GV100LegalizeSSA : public GM107LegalizeSSA
+{
+public:
+ GV100LegalizeSSA(Program *p) {
+ bld.setProgram(p);
+ }
+
+private:
+ virtual bool visit(Function *) { return true; }
+ virtual bool visit(BasicBlock *) { return true; }
+ virtual bool visit(Instruction *);
+
+ bool handleCMP(Instruction *);
+ bool handleIADD64(Instruction *);
+ bool handleIMAD_HIGH(Instruction *);
+ bool handleIMNMX(Instruction *);
+ bool handleIMUL(Instruction *);
+ bool handleLOP2(Instruction *);
+ bool handleNOT(Instruction *);
+ bool handlePREEX2(Instruction *);
+ bool handleQUADON(Instruction *);
+ bool handleQUADPOP(Instruction *);
+ bool handleSET(Instruction *);
+ bool handleSHFL(Instruction *);
+ bool handleShift(Instruction *);
+ bool handleSUB(Instruction *);
+};
+}
+#endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index a60881000fe..067f9abaca8 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp)
cmp->sType = hTy;
}
+void
+NVC0LegalizeSSA::handleBREV(Instruction *i)
+{
+ i->op = OP_EXTBF;
+ i->subOp = NV50_IR_SUBOP_EXTBF_REV;
+ i->setSrc(1, bld.mkImm(0x2000));
+}
+
bool
NVC0LegalizeSSA::visit(Function *fn)
{
@@ -354,6 +362,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64)
handleSET(i->asCmp());
break;
+ case OP_BREV:
+ handleBREV(i);
+ break;
default:
break;
}
@@ -856,11 +867,11 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
next = hi;
}
- if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
- replaceCvt(i);
-
if (i->op != OP_MOV && i->op != OP_PFETCH)
replaceZero(i);
+
+ if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
+ replaceCvt(i);
}
}
if (!bb->getEntry())
@@ -887,6 +898,8 @@ NVC0LoweringPass::visit(Function *fn)
gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
if (fn->cfgExit) {
bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false);
+ if (prog->getTarget()->getChipset() >= NVISA_GV100_CHIPSET)
+ bld.mkOp1(OP_FINAL, TYPE_NONE, NULL, gpEmitAddress)->fixed = 1;
bld.mkMovToReg(0, gpEmitAddress);
}
}
@@ -1714,7 +1727,8 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl)
cctl->setPredicate(cas->cc, cas->getPredicate());
}
- if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) {
+ if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS &&
+ targ->getChipset() < NVISA_GV100_CHIPSET) {
// CAS is crazy. It's 2nd source is a double reg, and the 3rd source
// should be set to the high part of the double reg or bad things will
// happen elsewhere in the universe.
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index b4c405a9ea5..8c99427d3c0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -64,12 +64,14 @@ private:
void handleDIV(Instruction *); // integer division, modulus
void handleRCPRSQLib(Instruction *, Value *[]);
void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
- void handleFTZ(Instruction *);
void handleSET(CmpInstruction *);
void handleTEXLOD(TexInstruction *);
void handleShift(Instruction *);
+ void handleBREV(Instruction *);
protected:
+ void handleFTZ(Instruction *);
+
BuildUtil bld;
};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 2f46b0e886a..3a4ec3ca561 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -558,6 +558,19 @@ ConstantFolding::expr(Instruction *i,
memset(&res.data, 0, sizeof(res.data));
switch (i->op) {
+ case OP_SGXT: {
+ int bits = b->data.u32;
+ if (bits) {
+ uint32_t data = a->data.u32 & (0xffffffff >> (32 - bits));
+ if (bits < 32 && (data & (1 << (bits - 1))))
+ data = data - (1 << bits);
+ res.data.u32 = data;
+ }
+ break;
+ }
+ case OP_BMSK:
+ res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32;
+ break;
case OP_MAD:
case OP_FMA:
case OP_MUL:
@@ -780,6 +793,23 @@ ConstantFolding::expr(Instruction *i,
memset(&res.data, 0, sizeof(res.data));
switch (i->op) {
+ case OP_LOP3_LUT:
+ for (int n = 0; n < 32; n++) {
+ uint8_t lut = ((a->data.u32 >> n) & 1) << 2 |
+ ((b->data.u32 >> n) & 1) << 1 |
+ ((c->data.u32 >> n) & 1);
+ res.data.u32 |= !!(i->subOp & (1 << lut)) << n;
+ }
+ break;
+ case OP_PERMT:
+ if (!i->subOp) {
+ uint64_t input = (uint64_t)c->data.u32 << 32 | a->data.u32;
+ uint16_t permt = b->data.u32;
+ for (int n = 0 ; n < 4; n++, permt >>= 4)
+ res.data.u32 |= ((input >> ((permt & 0xf) * 8)) & 0xff) << n * 8;
+ } else
+ return;
+ break;
case OP_INSBF: {
int offset = b->data.u32 & 0xff;
int width = (b->data.u32 >> 8) & 0xff;
@@ -1526,6 +1556,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
i->subOp = 0;
break;
}
+ case OP_BREV: {
+ uint32_t res = util_bitreverse(imm0.reg.data.u32);
+ i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
+ i->op = OP_MOV;
+ break;
+ }
case OP_POPCNT: {
// Only deal with 1-arg POPCNT here
if (i->srcExists(1))
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 5dcbf3c3e0c..ce0d2507dc1 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -93,8 +93,10 @@ const char *operationStr[OP_LAST + 1] =
"and",
"or",
"xor",
+ "lop3 lut",
"shl",
"shr",
+ "shf",
"max",
"min",
"sat",
@@ -142,6 +144,7 @@ const char *operationStr[OP_LAST + 1] =
"pinterp",
"emit",
"restart",
+ "final",
"tex",
"texbias",
"texlod",
@@ -177,7 +180,10 @@ const char *operationStr[OP_LAST + 1] =
"insbf",
"extbf",
"bfind",
+ "brev",
+ "bmsk",
"permt",
+ "sgxt",
"atom",
"bar",
"vadd",
@@ -193,6 +199,7 @@ const char *operationStr[OP_LAST + 1] =
"shfl",
"vote",
"bufq",
+ "warpsync",
"(invalid)"
};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 6df2664da22..4e5b21d9176 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -988,6 +988,8 @@ GCRA::coalesce(ArrayList& insns)
case 0x110:
case 0x120:
case 0x130:
+ case 0x140:
+ case 0x160:
ret = doCoalesce(insns, JOIN_MASK_UNION);
break;
default:
@@ -2297,13 +2299,25 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
if (isTextureOp(tex->op))
textureMask(tex);
- if (isScalarTexGM107(tex)) {
- handleScalarTexGM107(tex);
- return;
- }
+ if (targ->getChipset() < NVISA_GV100_CHIPSET) {
+ if (isScalarTexGM107(tex)) {
+ handleScalarTexGM107(tex);
+ return;
+ }
- assert(!tex->tex.scalar);
- condenseDefs(tex);
+ assert(!tex->tex.scalar);
+ condenseDefs(tex);
+ } else {
+ if (isTextureOp(tex->op)) {
+ int defCount = tex->defCount(0xff);
+ if (defCount > 3)
+ condenseDefs(tex, 2, 3);
+ if (defCount > 1)
+ condenseDefs(tex, 0, 1);
+ } else {
+ condenseDefs(tex);
+ }
+ }
if (isSurfaceOp(tex->op)) {
int s = tex->tex.target.getDim() +
@@ -2485,6 +2499,8 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
case 0x110:
case 0x120:
case 0x130:
+ case 0x140:
+ case 0x160:
texConstraintGM107(tex);
break;
default:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h
new file mode 100644
index 00000000000..54443ae2770
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h
@@ -0,0 +1,156 @@
+#ifndef __NV50_IR_SCHED_GM107_H__
+#define __NV50_IR_SCHED_GM107_H__
+namespace nv50_ir {
+
+class SchedDataCalculatorGM107 : public Pass
+{
+public:
+ SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
+
+private:
+ struct RegScores
+ {
+ struct ScoreData {
+ int r[256];
+ int p[8];
+ int c;
+ } rd, wr;
+ int base;
+
+ void rebase(const int base)
+ {
+ const int delta = this->base - base;
+ if (!delta)
+ return;
+ this->base = 0;
+
+ for (int i = 0; i < 256; ++i) {
+ rd.r[i] += delta;
+ wr.r[i] += delta;
+ }
+ for (int i = 0; i < 8; ++i) {
+ rd.p[i] += delta;
+ wr.p[i] += delta;
+ }
+ rd.c += delta;
+ wr.c += delta;
+ }
+ void wipe()
+ {
+ memset(&rd, 0, sizeof(rd));
+ memset(&wr, 0, sizeof(wr));
+ }
+ int getLatest(const ScoreData& d) const
+ {
+ int max = 0;
+ for (int i = 0; i < 256; ++i)
+ if (d.r[i] > max)
+ max = d.r[i];
+ for (int i = 0; i < 8; ++i)
+ if (d.p[i] > max)
+ max = d.p[i];
+ if (d.c > max)
+ max = d.c;
+ return max;
+ }
+ inline int getLatestRd() const
+ {
+ return getLatest(rd);
+ }
+ inline int getLatestWr() const
+ {
+ return getLatest(wr);
+ }
+ inline int getLatest() const
+ {
+ return MAX2(getLatestRd(), getLatestWr());
+ }
+ void setMax(const RegScores *that)
+ {
+ for (int i = 0; i < 256; ++i) {
+ rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
+ wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
+ }
+ for (int i = 0; i < 8; ++i) {
+ rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
+ wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
+ }
+ rd.c = MAX2(rd.c, that->rd.c);
+ wr.c = MAX2(wr.c, that->wr.c);
+ }
+ void print(int cycle)
+ {
+ for (int i = 0; i < 256; ++i) {
+ if (rd.r[i] > cycle)
+ INFO("rd $r%i @ %i\n", i, rd.r[i]);
+ if (wr.r[i] > cycle)
+ INFO("wr $r%i @ %i\n", i, wr.r[i]);
+ }
+ for (int i = 0; i < 8; ++i) {
+ if (rd.p[i] > cycle)
+ INFO("rd $p%i @ %i\n", i, rd.p[i]);
+ if (wr.p[i] > cycle)
+ INFO("wr $p%i @ %i\n", i, wr.p[i]);
+ }
+ if (rd.c > cycle)
+ INFO("rd $c @ %i\n", rd.c);
+ if (wr.c > cycle)
+ INFO("wr $c @ %i\n", wr.c);
+ }
+ };
+
+ RegScores *score; // for current BB
+ std::vector<RegScores> scoreBoards;
+
+ const TargetGM107 *targ;
+ bool visit(Function *);
+ bool visit(BasicBlock *);
+
+ void commitInsn(const Instruction *, int);
+ int calcDelay(const Instruction *, int) const;
+ void setDelay(Instruction *, int, const Instruction *);
+ void recordWr(const Value *, int, int);
+ void checkRd(const Value *, int, int&) const;
+
+ inline void emitYield(Instruction *);
+ inline void emitStall(Instruction *, uint8_t);
+ inline void emitReuse(Instruction *, uint8_t);
+ inline void emitWrDepBar(Instruction *, uint8_t);
+ inline void emitRdDepBar(Instruction *, uint8_t);
+ inline void emitWtDepBar(Instruction *, uint8_t);
+
+ inline int getStall(const Instruction *) const;
+ inline int getWrDepBar(const Instruction *) const;
+ inline int getRdDepBar(const Instruction *) const;
+ inline int getWtDepBar(const Instruction *) const;
+
+ void setReuseFlag(Instruction *);
+
+ inline void printSchedInfo(int, const Instruction *) const;
+
+ struct LiveBarUse {
+ LiveBarUse(Instruction *insn, Instruction *usei)
+ : insn(insn), usei(usei) { }
+ Instruction *insn;
+ Instruction *usei;
+ };
+
+ struct LiveBarDef {
+ LiveBarDef(Instruction *insn, Instruction *defi)
+ : insn(insn), defi(defi) { }
+ Instruction *insn;
+ Instruction *defi;
+ };
+
+ bool insertBarriers(BasicBlock *);
+
+ bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
+ Instruction *findFirstUse(const Instruction *) const;
+ Instruction *findFirstDef(const Instruction *) const;
+
+ bool needRdDepBar(const Instruction *) const;
+ bool needWrDepBar(const Instruction *) const;
+};
+
+}; // namespace nv50_ir
+#endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 5c6d0570ae2..765375a47df 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] =
2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
3, 3, // SHLADD, XMAD
1, 1, 1, // ABS, NEG, NOT
- 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR
+ 2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
2, 2, 1, // MAX, MIN, SAT
1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT
3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT
@@ -43,7 +43,7 @@ const uint8_t Target::operationSrcNr[] =
0, 0, 0, // PRERET,CONT,BREAK
0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
- 1, 1, // EMIT, RESTART
+ 1, 1, 1, // EMIT, RESTART, FINAL
1, 1, 1, // TEX, TXB, TXL,
1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
@@ -51,13 +51,15 @@ const uint8_t Target::operationSrcNr[] =
0, // TEXBAR
1, 1, // DFDX, DFDY
1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
- 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT
+ 2, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
+ 2, // SGXT
2, 2, // ATOM, BAR
2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL
3, // SHFL
1, // VOTE
1, // BUFQ
+ 1, // WARPSYNC
0
};
@@ -75,10 +77,10 @@ const OpClass Target::operationClass[] =
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH,
OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
- // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
+ // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
OPCLASS_CONVERT, OPCLASS_CONVERT,
- OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
- OPCLASS_SHIFT, OPCLASS_SHIFT,
+ OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
+ OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
// MAX, MIN
OPCLASS_COMPARE, OPCLASS_COMPARE,
// SAT, CEIL, FLOOR, TRUNC; CVT
@@ -103,8 +105,8 @@ const OpClass Target::operationClass[] =
OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
// LINTERP, PINTERP
OPCLASS_SFU, OPCLASS_SFU,
- // EMIT, RESTART
- OPCLASS_CONTROL, OPCLASS_CONTROL,
+ // EMIT, RESTART, FINAL
+ OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,
// TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
@@ -119,9 +121,9 @@ const OpClass Target::operationClass[] =
// DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
- // POPCNT, INSBF, EXTBF, BFIND; PERMT
+ // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT
+ OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
- OPCLASS_BITFIELD,
// ATOM, BAR
OPCLASS_ATOMIC, OPCLASS_CONTROL,
// VADD, VAVG, VMIN, VMAX
@@ -136,10 +138,13 @@ const OpClass Target::operationClass[] =
OPCLASS_OTHER,
// BUFQ
OPCLASS_OTHER,
+ // WARPSYNC
+ OPCLASS_OTHER,
OPCLASS_PSEUDO // LAST
};
+extern Target *getTargetGV100(unsigned int chipset);
extern Target *getTargetGM107(unsigned int chipset);
extern Target *getTargetNVC0(unsigned int chipset);
extern Target *getTargetNV50(unsigned int chipset);
@@ -149,6 +154,9 @@ Target *Target::create(unsigned int chipset)
STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
switch (chipset & ~0xf) {
+ case 0x160:
+ case 0x140:
+ return getTargetGV100(chipset);
case 0x110:
case 0x120:
case 0x130:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
index afeca14d7d1..0f7db116577 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -200,7 +200,7 @@ public:
uint8_t dstMods;
uint16_t srcFiles[3];
uint16_t dstFiles;
- unsigned int minEncSize : 4;
+ unsigned int minEncSize : 5;
unsigned int vector : 1;
unsigned int predicate : 1;
unsigned int commutative : 1;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp
new file mode 100644
index 00000000000..fd969e1ece5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp
@@ -0,0 +1,594 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "codegen/nv50_ir_target_gv100.h"
+#include "codegen/nv50_ir_lowering_gv100.h"
+#include "codegen/nv50_ir_emit_gv100.h"
+
+namespace nv50_ir {
+
+void
+TargetGV100::initOpInfo()
+{
+ unsigned int i, j;
+
+ static const operation commutative[] =
+ {
+ OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_MAX, OP_MIN,
+ OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT
+ };
+
+ static const operation noDest[] =
+ {
+ OP_EXIT
+ };
+
+ static const operation noPred[] =
+ {
+ };
+
+ for (i = 0; i < DATA_FILE_COUNT; ++i)
+ nativeFileMap[i] = (DataFile)i;
+ nativeFileMap[FILE_ADDRESS] = FILE_GPR;
+ nativeFileMap[FILE_FLAGS] = FILE_PREDICATE;
+
+ for (i = 0; i < OP_LAST; ++i) {
+ opInfo[i].variants = NULL;
+ opInfo[i].op = (operation)i;
+ opInfo[i].srcTypes = 1 << (int)TYPE_F32;
+ opInfo[i].dstTypes = 1 << (int)TYPE_F32;
+ opInfo[i].immdBits = 0;
+ opInfo[i].srcNr = operationSrcNr[i];
+
+ for (j = 0; j < opInfo[i].srcNr; ++j) {
+ opInfo[i].srcMods[j] = 0;
+ opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
+ }
+ opInfo[i].dstMods = 0;
+ opInfo[i].dstFiles = 1 << (int)FILE_GPR;
+
+ opInfo[i].hasDest = 1;
+ opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
+ opInfo[i].commutative = false; /* set below */
+ opInfo[i].pseudo = (i < OP_MOV);
+ opInfo[i].predicate = !opInfo[i].pseudo;
+ opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
+ opInfo[i].minEncSize = 16;
+ }
+ for (i = 0; i < ARRAY_SIZE(commutative); ++i)
+ opInfo[commutative[i]].commutative = true;
+ for (i = 0; i < ARRAY_SIZE(noDest); ++i)
+ opInfo[noDest[i]].hasDest = 0;
+ for (i = 0; i < ARRAY_SIZE(noPred); ++i)
+ opInfo[noPred[i]].predicate = 0;
+}
+
+struct opInfo {
+ struct {
+ uint8_t files;
+ uint8_t mods;
+ } src[3];
+};
+
+#define SRC_NONE 0
+#define SRC_R (1 << FILE_GPR)
+#define SRC_I (1 << FILE_MEMORY_CONST)
+#define SRC_C (1 << FILE_IMMEDIATE)
+#define SRC_RC (SRC_R | SRC_C)
+#define SRC_RI (SRC_R | SRC_I )
+#define SRC_RIC (SRC_R | SRC_I | SRC_C)
+
+#define MOD_NONE 0
+#define MOD_NEG NV50_IR_MOD_NEG
+#define MOD_ABS NV50_IR_MOD_ABS
+#define MOD_NOT NV50_IR_MOD_NOT
+#define MOD_NA (MOD_NEG | MOD_ABS)
+
+#define OPINFO(O,SA,MA,SB,MB,SC,MC) \
+static struct opInfo \
+opInfo_##O = { \
+ .src = { { SRC_##SA, MOD_##MA }, \
+ { SRC_##SB, MOD_##MB }, \
+ { SRC_##SC, MOD_##MC }}, \
+};
+
+
+/* Handled by GV100LegalizeSSA. */
+OPINFO(FABS , RIC , NA , NONE, NONE, NONE, NONE);
+OPINFO(FCMP , R , NONE, RIC , NONE, RIC , NONE); //XXX: use FSEL for mods
+OPINFO(FNEG , RIC , NA , NONE, NONE, NONE, NONE);
+OPINFO(FSET , R , NA , RIC , NA , NONE, NONE);
+OPINFO(ICMP , R , NONE, RIC , NONE, RIC , NONE);
+OPINFO(IMUL , R , NONE, RIC , NONE, NONE, NONE);
+OPINFO(INEG , RIC , NEG , NONE, NONE, NONE, NONE);
+OPINFO(ISET , R , NONE, RIC , NONE, NONE, NONE);
+OPINFO(LOP2 , R , NOT , RIC , NOT , NONE, NONE);
+OPINFO(NOT , RIC , NONE, NONE, NONE, NONE, NONE);
+OPINFO(SAT , RIC , NA , NONE, NONE, NONE, NONE);
+OPINFO(SHL , RIC , NONE, RIC , NONE, NONE, NONE);
+OPINFO(SHR , RIC , NONE, RIC , NONE, NONE, NONE);
+OPINFO(SUB , R , NONE, RIC , NEG , NONE, NONE);
+OPINFO(IMNMX , R , NONE, RIC , NONE, NONE, NONE);
+
+/* Handled by CodeEmitterGV100. */
+OPINFO(AL2P , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(ALD , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(AST , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(ATOM , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(ATOMS , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(BAR , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(BRA , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(BMSK , R , NONE, RIC , NONE, NONE, NONE);
+OPINFO(BREV , RIC , NONE, NONE, NONE, NONE, NONE);
+OPINFO(CCTL , NONE, NONE, NONE, NONE, NONE, NONE);
+//OPINFO(CS2R , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(DADD , R , NA , RIC , NA , NONE, NONE);
+OPINFO(DFMA , R , NA , RIC , NA , RIC , NA );
+OPINFO(DMUL , R , NA , RIC , NA , NONE, NONE);
+OPINFO(DSETP , R , NA , RIC , NA , NONE, NONE);
+OPINFO(EXIT , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(F2F , RIC , NA , NONE, NONE, NONE, NONE);
+OPINFO(F2I , RIC , NA , NONE, NONE, NONE, NONE);
+OPINFO(FADD , R , NA , RIC , NA , NONE, NONE);
+OPINFO(FFMA , R , NA , RIC , NA , RIC , NA );
+OPINFO(FLO , RIC , NOT , NONE, NONE, NONE, NONE);
+OPINFO(FMNMX , R , NA , RIC , NA , NONE, NONE);
+OPINFO(FMUL , R , NA , RIC , NA , NONE, NONE);
+OPINFO(FRND , RIC , NA , NONE, NONE, NONE, NONE);
+OPINFO(FSET_BF , R , NA , RIC , NA , NONE, NONE);
+OPINFO(FSETP , R , NA , RIC , NA , NONE, NONE);
+OPINFO(FSWZADD , R , NONE, R , NONE, NONE, NONE);
+OPINFO(I2F , RIC , NONE, NONE, NONE, NONE, NONE);
+OPINFO(IABS , RIC , NONE, NONE, NONE, NONE, NONE);
+OPINFO(IADD3 , R , NEG , RIC , NEG , R , NEG );
+OPINFO(IMAD , R , NONE, RIC , NONE, RIC , NEG );
+OPINFO(IMAD_WIDE, R , NONE, RIC , NONE, RC , NEG );
+OPINFO(IPA , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(ISBERD , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(ISETP , R , NONE, RIC , NONE, NONE, NONE);
+OPINFO(KILL , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(LD , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(LDC , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(LDL , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(LDS , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(LEA , R , NEG , I , NONE, RIC , NEG );
+OPINFO(LOP3_LUT , R , NONE, RIC , NONE, R , NONE);
+OPINFO(MEMBAR , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(MOV , RIC , NONE, NONE, NONE, NONE, NONE);
+OPINFO(MUFU , RIC , NA , NONE, NONE, NONE, NONE);
+OPINFO(NOP , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(OUT , R , NONE, RI , NONE, NONE, NONE);
+OPINFO(PIXLD , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(PLOP3_LUT, NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(POPC , RIC , NOT , NONE, NONE, NONE, NONE);
+OPINFO(PRMT , R , NONE, RIC , NONE, RIC , NONE);
+OPINFO(RED , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(SGXT , R , NONE, RIC , NONE, NONE, NONE);
+OPINFO(S2R , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(SEL , R , NONE, RIC , NONE, NONE, NONE);
+OPINFO(SHF , R , NONE, RIC , NONE, RIC , NONE);
+OPINFO(SHFL , R , NONE, R , NONE, R , NONE);
+OPINFO(ST , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(STL , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(STS , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(SUATOM , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(SULD , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(SUST , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(TEX , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(TLD , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(TLD4 , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(TMML , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(TXD , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(TXQ , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(VOTE , NONE, NONE, NONE, NONE, NONE, NONE);
+OPINFO(WARPSYNC , R , NONE, NONE, NONE, NONE, NONE);
+
+static const struct opInfo *
+getOpInfo(const Instruction *i)
+{
+ switch (i->op) {
+ case OP_ABS:
+ if (isFloatType(i->dType))
+ return &opInfo_FABS;
+ return &opInfo_IABS;
+ case OP_ADD:
+ if (isFloatType(i->dType)) {
+ if (i->dType == TYPE_F32)
+ return &opInfo_FADD;
+ else
+ return &opInfo_DADD;
+ } else {
+ return &opInfo_IADD3;
+ }
+ break;
+ case OP_AFETCH: return &opInfo_AL2P;
+ case OP_AND:
+ case OP_OR:
+ case OP_XOR:
+ if (i->def(0).getFile() == FILE_PREDICATE)
+ return &opInfo_PLOP3_LUT;
+ return &opInfo_LOP2;
+ case OP_ATOM:
+ if (i->src(0).getFile() == FILE_MEMORY_SHARED)
+ return &opInfo_ATOMS;
+ else
+ if (!i->defExists(0) && i->subOp < NV50_IR_SUBOP_ATOM_CAS)
+ return &opInfo_RED;
+ else
+ return &opInfo_ATOM;
+ break;
+ case OP_BAR: return &opInfo_BAR;
+ case OP_BFIND: return &opInfo_FLO;
+ case OP_BMSK: return &opInfo_BMSK;
+ case OP_BREV: return &opInfo_BREV;
+ case OP_BRA:
+ case OP_JOIN: return &opInfo_BRA; //XXX
+ case OP_CCTL: return &opInfo_CCTL;
+ case OP_CEIL:
+ case OP_CVT:
+ case OP_FLOOR:
+ case OP_TRUNC:
+ if (i->op == OP_CVT && (i->def(0).getFile() == FILE_PREDICATE ||
+ i->src(0).getFile() == FILE_PREDICATE)) {
+ return &opInfo_MOV;
+ } else if (isFloatType(i->dType)) {
+ if (isFloatType(i->sType)) {
+ if (i->sType == i->dType)
+ return &opInfo_FRND;
+ else
+ return &opInfo_F2F;
+ } else {
+ return &opInfo_I2F;
+ }
+ } else {
+ if (isFloatType(i->sType))
+ return &opInfo_F2I;
+ }
+ break;
+ case OP_COS:
+ case OP_EX2:
+ case OP_LG2:
+ case OP_RCP:
+ case OP_RSQ:
+ case OP_SIN:
+ case OP_SQRT: return &opInfo_MUFU;
+ case OP_DISCARD: return &opInfo_KILL;
+ case OP_EMIT:
+ case OP_FINAL:
+ case OP_RESTART: return &opInfo_OUT;
+ case OP_EXIT: return &opInfo_EXIT;
+ case OP_EXPORT: return &opInfo_AST;
+ case OP_FMA:
+ case OP_MAD:
+ if (isFloatType(i->dType)) {
+ if (i->dType == TYPE_F32)
+ return &opInfo_FFMA;
+ else
+ return &opInfo_DFMA;
+ } else {
+ if (typeSizeof(i->dType) != 8)
+ return &opInfo_IMAD;
+ else
+ return &opInfo_IMAD_WIDE;
+ }
+ break;
+ case OP_JOINAT: return &opInfo_NOP; //XXX
+ case OP_LINTERP: return &opInfo_IPA;
+ case OP_LOAD:
+ switch (i->src(0).getFile()) {
+ case FILE_MEMORY_CONST : return &opInfo_LDC;
+ case FILE_MEMORY_LOCAL : return &opInfo_LDL;
+ case FILE_MEMORY_SHARED: return &opInfo_LDS;
+ case FILE_MEMORY_GLOBAL: return &opInfo_LD;
+ default:
+ break;
+ }
+ break;
+ case OP_LOP3_LUT: return &opInfo_LOP3_LUT;
+ case OP_MAX:
+ case OP_MIN:
+ if (isFloatType(i->dType)) {
+ if (i->dType == TYPE_F32)
+ return &opInfo_FMNMX;
+ } else {
+ return &opInfo_IMNMX;
+ }
+ break;
+ case OP_MEMBAR: return &opInfo_MEMBAR;
+ case OP_MOV: return &opInfo_MOV;
+ case OP_MUL:
+ if (isFloatType(i->dType)) {
+ if (i->dType == TYPE_F32)
+ return &opInfo_FMUL;
+ else
+ return &opInfo_DMUL;
+ }
+ return &opInfo_IMUL;
+ case OP_NEG:
+ if (isFloatType(i->dType))
+ return &opInfo_FNEG;
+ return &opInfo_INEG;
+ case OP_NOT: return &opInfo_NOT;
+ case OP_PERMT: return &opInfo_PRMT;
+ case OP_PFETCH: return &opInfo_ISBERD;
+ case OP_PIXLD: return &opInfo_PIXLD;
+ case OP_POPCNT: return &opInfo_POPC;
+ case OP_QUADOP: return &opInfo_FSWZADD;
+ case OP_RDSV:
+#if 0
+ if (targ->isCS2RSV(i->getSrc(0)->reg.data.sv.sv))
+ return &opInfo_CS2R;
+#endif
+ return &opInfo_S2R;
+ case OP_SAT: return &opInfo_SAT;
+ case OP_SELP: return &opInfo_SEL;
+ case OP_SET:
+ case OP_SET_AND:
+ case OP_SET_OR:
+ case OP_SET_XOR:
+ if (i->def(0).getFile() != FILE_PREDICATE) {
+ if (isFloatType(i->dType)) {
+ if (i->dType == TYPE_F32)
+ return &opInfo_FSET_BF;
+ } else {
+ if (isFloatType(i->sType))
+ return &opInfo_FSET;
+ return &opInfo_ISET;
+ }
+ } else {
+ if (isFloatType(i->sType))
+ if (i->sType == TYPE_F64)
+ return &opInfo_DSETP;
+ else
+ return &opInfo_FSETP;
+ else
+ return &opInfo_ISETP;
+ }
+ break;
+ case OP_SGXT: return &opInfo_SGXT;
+ case OP_SHF: return &opInfo_SHF;
+ case OP_SHFL: return &opInfo_SHFL;
+ case OP_SHL: return &opInfo_SHL;
+ case OP_SHLADD: return &opInfo_LEA;
+ case OP_SHR: return &opInfo_SHR;
+ case OP_SLCT:
+ if (isFloatType(i->sType))
+ return &opInfo_FCMP;
+ return &opInfo_ICMP;
+ case OP_STORE:
+ switch (i->src(0).getFile()) {
+ case FILE_MEMORY_LOCAL : return &opInfo_STL;
+ case FILE_MEMORY_SHARED: return &opInfo_STS;
+ case FILE_MEMORY_GLOBAL: return &opInfo_ST;
+ default:
+ break;
+ }
+ break;
+ case OP_SUB: return &opInfo_SUB;
+ case OP_SULDB:
+ case OP_SULDP: return &opInfo_SULD;
+ case OP_SUREDB:
+ case OP_SUREDP: return &opInfo_SUATOM;
+ case OP_SUSTB:
+ case OP_SUSTP: return &opInfo_SUST;
+ case OP_TEX:
+ case OP_TXB:
+ case OP_TXL: return &opInfo_TEX;
+ case OP_TXD: return &opInfo_TXD;
+ case OP_TXF: return &opInfo_TLD;
+ case OP_TXG: return &opInfo_TLD4;
+ case OP_TXLQ: return &opInfo_TMML;
+ case OP_TXQ: return &opInfo_TXQ;
+ case OP_VFETCH: return &opInfo_ALD;
+ case OP_VOTE: return &opInfo_VOTE;
+ case OP_WARPSYNC: return &opInfo_WARPSYNC;
+ default:
+ break;
+ }
+ return NULL;
+}
+
+bool
+TargetGV100::isSatSupported(const Instruction *i) const
+{
+ switch (i->dType) {
+ case TYPE_F32:
+ switch (i->op) {
+ case OP_ADD:
+ case OP_FMA:
+ case OP_MAD:
+ case OP_MUL: return true;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ return false;
+}
+
+bool
+TargetGV100::isModSupported(const Instruction *i, int s, Modifier mod) const
+{
+ const struct opInfo *info = nv50_ir::getOpInfo(i);
+ uint8_t mods = 0;
+ if (info && s < (int)ARRAY_SIZE(info->src))
+ mods = info->src[s].mods;
+ return (mod & Modifier(mods)) == mod;
+}
+
+bool
+TargetGV100::isOpSupported(operation op, DataType ty) const
+{
+ if (op == OP_MAD || op == OP_FMA)
+ return true;
+ if (ty == TYPE_F32) {
+ if (op == OP_MAX)
+ return true;
+ }
+ if (op == OP_RSQ)
+ return true;
+ if (op == OP_SET ||
+ op == OP_SET_AND ||
+ op == OP_SET_OR ||
+ op == OP_SET_XOR)
+ return true;
+ if (op == OP_SHLADD)
+ return true;
+ return false;
+}
+
+bool
+TargetGV100::isBarrierRequired(const Instruction *i) const
+{
+ switch (i->op) {
+ case OP_BREV:
+ return true;
+ default:
+ break;
+ }
+
+ return TargetGM107::isBarrierRequired(i);
+}
+
+bool
+TargetGV100::insnCanLoad(const Instruction *i, int s,
+ const Instruction *ld) const
+{
+ const struct opInfo *info = nv50_ir::getOpInfo(i);
+ uint16_t files = 0;
+
+ if (ld->src(0).getFile() == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
+ return (!i->isPseudo() &&
+ !i->asTex() &&
+ i->op != OP_EXPORT && i->op != OP_STORE);
+
+ if (ld->src(0).isIndirect(0))
+ return false;
+
+ if (info && s < (int)ARRAY_SIZE(info->src)) {
+ files = info->src[s].files;
+ if ((s == 1 && i->srcExists(2) && i->src(2).getFile() != FILE_GPR) ||
+ (s == 2 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR)) {
+ files &= ~(1 << FILE_MEMORY_CONST);
+ files &= ~(1 << FILE_IMMEDIATE);
+ } else
+ if ((i->op == OP_SHL || i->op == OP_SHR) &&
+ ((s == 0 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR) ||
+ (s == 1 && i->srcExists(0) && i->src(0).getFile() != FILE_GPR))) {
+ files &= ~(1 << FILE_MEMORY_CONST);
+ files &= ~(1 << FILE_IMMEDIATE);
+ }
+ }
+
+ if (ld->src(0).getFile() == FILE_IMMEDIATE) {
+ if (i->sType == TYPE_F64) {
+ if (ld->getSrc(0)->asImm()->reg.data.u64 & 0x00000000ffffffff)
+ return false;
+ }
+ }
+
+ return (files & (1 << ld->src(0).getFile()));
+}
+
+void
+TargetGV100::getBuiltinCode(const uint32_t **code, uint32_t *size) const
+{
+ //XXX: find out why gv100 (tu1xx is fine) hangs without this
+ static uint32_t builtin[] = {
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
+ };
+ *code = builtin;
+ *size = sizeof(builtin);
+}
+
+uint32_t
+TargetGV100::getBuiltinOffset(int builtin) const
+{
+ return 0;
+}
+
+bool
+TargetGV100::runLegalizePass(Program *prog, CGStage stage) const
+{
+ if (stage == CG_STAGE_PRE_SSA) {
+ GM107LoweringPass pass1(prog);
+ GV100LoweringPass pass2(prog);
+ pass1.run(prog, false, true);
+ pass2.run(prog, false, true);
+ return true;
+ } else
+ if (stage == CG_STAGE_SSA) {
+ GV100LegalizeSSA pass(prog);
+ return pass.run(prog, false, true);
+ } else
+ if (stage == CG_STAGE_POST_RA) {
+ NVC0LegalizePostRA pass(prog);
+ return pass.run(prog, false, true);
+ }
+ return false;
+}
+
+CodeEmitter *
+TargetGV100::getCodeEmitter(Program::Type type)
+{
+ return new CodeEmitterGV100(this);
+}
+
+TargetGV100::TargetGV100(unsigned int chipset)
+ : TargetGM107(chipset)
+{
+ initOpInfo();
+};
+
+Target *getTargetGV100(unsigned int chipset)
+{
+ return new TargetGV100(chipset);
+}
+
+};
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h
new file mode 100644
index 00000000000..897e6a22d30
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NV50_IR_TARGET_GV100_H__
+#define __NV50_IR_TARGET_GV100_H__
+#include "codegen/nv50_ir_target_gm107.h"
+
+namespace nv50_ir {
+
+class TargetGV100 : public TargetGM107 {
+public:
+ TargetGV100(unsigned int chipset);
+
+ virtual CodeEmitter *getCodeEmitter(Program::Type);
+
+ virtual bool runLegalizePass(Program *, CGStage stage) const;
+
+ virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
+ virtual uint32_t getBuiltinOffset(int builtin) const;
+
+ virtual bool insnCanLoad(const Instruction *, int, const Instruction *) const;
+ virtual bool isOpSupported(operation, DataType) const;
+ virtual bool isModSupported(const Instruction *, int s, Modifier) const;
+ virtual bool isSatSupported(const Instruction *) const;
+
+ virtual bool isBarrierRequired(const Instruction *) const;
+
+private:
+ void initOpInfo();
+ void initProps(const struct opProperties *, int);
+};
+
+};
+#endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index 60134b445db..ed5b343ccba 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -30,7 +30,7 @@ Target *getTargetNVC0(unsigned int chipset)
}
TargetNVC0::TargetNVC0(unsigned int card) :
- Target(card < 0x110, false, card >= 0xe4)
+ Target(card < 0x110, false, card >= 0xe4 && card < 0x140)
{
chipset = card;
initOpInfo();
diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build
index 7a1d18a6394..68cfebdf20c 100644
--- a/src/gallium/drivers/nouveau/meson.build
+++ b/src/gallium/drivers/nouveau/meson.build
@@ -150,17 +150,31 @@ files_libnouveau = files(
'codegen/nv50_ir_util.cpp',
'codegen/nv50_ir_util.h',
'codegen/unordered_set.h',
+ 'codegen/nv50_ir_emit_gv100.cpp',
+ 'codegen/nv50_ir_emit_gv100.h',
'codegen/nv50_ir_emit_gk110.cpp',
'codegen/nv50_ir_emit_gm107.cpp',
'codegen/nv50_ir_emit_nvc0.cpp',
+ 'codegen/nv50_ir_lowering_gv100.cpp',
+ 'codegen/nv50_ir_lowering_gv100.h',
'codegen/nv50_ir_lowering_gm107.cpp',
'codegen/nv50_ir_lowering_gm107.h',
'codegen/nv50_ir_lowering_nvc0.cpp',
'codegen/nv50_ir_lowering_nvc0.h',
+ 'codegen/nv50_ir_target_gv100.cpp',
+ 'codegen/nv50_ir_target_gv100.h',
'codegen/nv50_ir_target_gm107.cpp',
'codegen/nv50_ir_target_gm107.h',
'codegen/nv50_ir_target_nvc0.cpp',
'codegen/nv50_ir_target_nvc0.h',
+ 'nvc0/cla0c0qmd.h',
+ 'nvc0/clc0c0qmd.h',
+ 'nvc0/clc3c0qmd.h',
+ 'nvc0/drf.h',
+ 'nvc0/qmd.h',
+ 'nvc0/qmda0c0.c',
+ 'nvc0/qmdc0c0.c',
+ 'nvc0/qmdc3c0.c',
'nvc0/gm107_texture.xml.h',
'nvc0/nvc0_3d.xml.h',
'nvc0/nvc0_compute.c',
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index de9cce3812a..8606ba43c1a 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -188,7 +188,11 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
if (nv_dbg)
nouveau_mesa_debug = atoi(nv_dbg);
- screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
+ if (dev->chipset < 0x140)
+ screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
+ else
+ screen->prefer_nir = true;
+
screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false);
if (screen->force_enable_cl)
glsl_type_singleton_init_or_ref();
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
index 899d73d7398..31e7cf82233 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
@@ -218,9 +218,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NV50_2D_PATTERN_SELECT_BITMAP_1X64 0x00000002
#define NV50_2D_PATTERN_SELECT_COLOR 0x00000003
-#define NVC0_2D_UNK02B8(i0) (0x000002b8 + 0x4*(i0))
-#define NVC0_2D_UNK02B8__ESIZE 0x00000004
-#define NVC0_2D_UNK02B8__LEN 0x00000009
+#define NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8
#define NVC0_2D_UNK2DC 0x000002dc
diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
index 664bfae9f64..fac195d4846 100644
--- a/src/gallium/drivers/nouveau/nv_object.xml.h
+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
@@ -195,6 +195,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GM200_3D_CLASS 0x0000b197
#define GP100_3D_CLASS 0x0000c097
#define GP102_3D_CLASS 0x0000c197
+#define GV100_3D_CLASS 0x0000c397
+#define TU102_3D_CLASS 0x0000c597
#define NV50_2D_CLASS 0x0000502d
#define NVC0_2D_CLASS 0x0000902d
#define NV50_COMPUTE_CLASS 0x000050c0
@@ -207,6 +209,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define GM200_COMPUTE_CLASS 0x0000b1c0
#define GP100_COMPUTE_CLASS 0x0000c0c0
#define GP104_COMPUTE_CLASS 0x0000c1c0
+#define GV100_COMPUTE_CLASS 0x0000c3c0
+#define TU102_COMPUTE_CLASS 0x0000c5c0
#define NV84_CRYPT_CLASS 0x000074c1
#define BLOB_NVC0_PCOPY1_CLASS 0x000090b8
#define BLOB_NVC0_PCOPY0_CLASS 0x000090b5
diff --git a/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h
new file mode 100644
index 00000000000..c0829f1cdc2
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h
@@ -0,0 +1,660 @@
+/*******************************************************************************
+ Copyright (c) 2016 NVIDIA Corporation
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to
+ deal in the Software without restriction, including without limitation the
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ sell copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+/* AUTO GENERATED FILE -- DO NOT EDIT */
+
+#ifndef __CLA0C0QMD_H__
+#define __CLA0C0QMD_H__
+
+/*
+** Queue Meta Data, Version 00_06
+ */
+
+// The below C preprocessor definitions describe "multi-word" structures, where
+// fields may have bit numbers beyond 32. For example, MW(127:96) means
+// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)"
+// syntax is to distinguish from similar "X:Y" single-word definitions: the
+// macros historically used for single-word definitions would fail with
+// multi-word definitions.
+//
+// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
+// interface layer of nvidia.ko for an example of how to manipulate
+// these MW(X:Y) definitions.
+
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_A MW(30:0)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_B MW(31:31)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_C MW(62:32)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_D MW(63:63)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_E MW(94:64)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_F MW(95:95)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_G MW(126:96)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_H MW(127:127)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_A_A MW(159:128)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_I MW(191:160)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_J MW(196:192)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_A MW(199:197)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K MW(200:200)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L MW(201:201)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0 MW(202:202)
+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1 MW(203:203)
+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_QMD_RESERVED_B MW(207:204)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_M MW(222:208)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N MW(223:223)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_O MW(248:224)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_C MW(249:249)
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250)
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251)
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252)
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE MW(253:253)
+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE MW(254:254)
+#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255)
+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_PROGRAM_OFFSET MW(287:256)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_P MW(319:288)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Q MW(327:320)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_D MW(335:328)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_R MW(351:336)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_S MW(357:352)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_E MW(365:358)
+#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE MW(366:366)
+#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001
+#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE MW(369:368)
+#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_NONE 0x00000000
+#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001
+#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T MW(370:370)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U MW(371:371)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_THROTTLED MW(372:372)
+#define NVA0C0_QMDV00_06_THROTTLED_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_THROTTLED_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_A MW(376:376)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_B MW(377:377)
+#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT MW(378:378)
+#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT__32 0x00000000
+#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001
+#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING MW(379:379)
+#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
+#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
+#define NVA0C0_QMDV00_06_SAMPLER_INDEX MW(382:382)
+#define NVA0C0_QMDV00_06_SAMPLER_INDEX_INDEPENDENTLY 0x00000000
+#define NVA0C0_QMDV00_06_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001
+#define NVA0C0_QMDV00_06_QMD_RESERVED_E3_A MW(383:383)
+#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH MW(415:384)
+#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT MW(431:416)
+#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH MW(447:432)
+#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH_RESUME MW(479:448)
+#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT_RESUME MW(495:480)
+#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH_RESUME MW(511:496)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_V MW(535:512)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_F MW(542:536)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W MW(543:543)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_SHARED_MEMORY_SIZE MW(561:544)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_G MW(575:562)
+#define NVA0C0_QMDV00_06_QMD_VERSION MW(579:576)
+#define NVA0C0_QMDV00_06_QMD_MAJOR_VERSION MW(583:580)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_H MW(591:584)
+#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION0 MW(607:592)
+#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION1 MW(623:608)
+#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION2 MW(639:624)
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1))
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_QMD_RESERVED_I MW(668:648)
+#define NVA0C0_QMDV00_06_L1_CONFIGURATION MW(671:669)
+#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
+#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
+#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_X MW(703:672)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Y MW(735:704)
+#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_LOWER MW(767:736)
+#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_UPPER MW(775:768)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_J MW(783:776)
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP MW(790:788)
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_INC 0x00000003
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_AND 0x00000005
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_OR 0x00000006
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007
+#define NVA0C0_QMDV00_06_QMD_RESERVED_K MW(791:791)
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT MW(793:792)
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE MW(794:794)
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE MW(799:799)
+#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVA0C0_QMDV00_06_RELEASE0_PAYLOAD MW(831:800)
+#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_LOWER MW(863:832)
+#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_UPPER MW(871:864)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_L MW(879:872)
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP MW(886:884)
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_INC 0x00000003
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_AND 0x00000005
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_OR 0x00000006
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007
+#define NVA0C0_QMDV00_06_QMD_RESERVED_M MW(887:887)
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT MW(889:888)
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE MW(890:890)
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE MW(895:895)
+#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVA0C0_QMDV00_06_RELEASE1_PAYLOAD MW(927:896)
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64))
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64))
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64))
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64))
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001
+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64))
+#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440)
+#define NVA0C0_QMDV00_06_QMD_RESERVED_N MW(1466:1464)
+#define NVA0C0_QMDV00_06_BARRIER_COUNT MW(1471:1467)
+#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472)
+#define NVA0C0_QMDV00_06_REGISTER_COUNT MW(1503:1496)
+#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504)
+#define NVA0C0_QMDV00_06_SASS_VERSION MW(1535:1528)
+#define NVA0C0_QMDV00_06_QMD_SPARE_A MW(1567:1536)
+#define NVA0C0_QMDV00_06_QMD_SPARE_B MW(1599:1568)
+#define NVA0C0_QMDV00_06_QMD_SPARE_C MW(1631:1600)
+#define NVA0C0_QMDV00_06_QMD_SPARE_D MW(1663:1632)
+#define NVA0C0_QMDV00_06_QMD_SPARE_E MW(1695:1664)
+#define NVA0C0_QMDV00_06_QMD_SPARE_F MW(1727:1696)
+#define NVA0C0_QMDV00_06_QMD_SPARE_G MW(1759:1728)
+#define NVA0C0_QMDV00_06_QMD_SPARE_H MW(1791:1760)
+#define NVA0C0_QMDV00_06_QMD_SPARE_I MW(1823:1792)
+#define NVA0C0_QMDV00_06_QMD_SPARE_J MW(1855:1824)
+#define NVA0C0_QMDV00_06_QMD_SPARE_K MW(1887:1856)
+#define NVA0C0_QMDV00_06_QMD_SPARE_L MW(1919:1888)
+#define NVA0C0_QMDV00_06_QMD_SPARE_M MW(1951:1920)
+#define NVA0C0_QMDV00_06_QMD_SPARE_N MW(1983:1952)
+#define NVA0C0_QMDV00_06_DEBUG_ID_UPPER MW(2015:1984)
+#define NVA0C0_QMDV00_06_DEBUG_ID_LOWER MW(2047:2016)
+
+
+/*
+** Queue Meta Data, Version 01_06
+ */
+
+#define NVA0C0_QMDV01_06_OUTER_PUT MW(30:0)
+#define NVA0C0_QMDV01_06_OUTER_OVERFLOW MW(31:31)
+#define NVA0C0_QMDV01_06_OUTER_GET MW(62:32)
+#define NVA0C0_QMDV01_06_OUTER_STICKY_OVERFLOW MW(63:63)
+#define NVA0C0_QMDV01_06_INNER_GET MW(94:64)
+#define NVA0C0_QMDV01_06_INNER_OVERFLOW MW(95:95)
+#define NVA0C0_QMDV01_06_INNER_PUT MW(126:96)
+#define NVA0C0_QMDV01_06_INNER_STICKY_OVERFLOW MW(127:127)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_A_A MW(159:128)
+#define NVA0C0_QMDV01_06_SCHEDULER_NEXT_QMD_POINTER MW(191:160)
+#define NVA0C0_QMDV01_06_QMD_GROUP_ID MW(197:192)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_A MW(199:198)
+#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE MW(200:200)
+#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201)
+#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0 MW(202:202)
+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1 MW(203:203)
+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS MW(204:204)
+#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_QMD_RESERVED_B MW(207:205)
+#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_ADDR MW(222:208)
+#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID MW(223:223)
+#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_SIZE MW(248:224)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_C MW(249:249)
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250)
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251)
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252)
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE MW(253:253)
+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE MW(254:254)
+#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255)
+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_PROGRAM_OFFSET MW(287:256)
+#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288)
+#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_D MW(335:328)
+#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336)
+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_ID MW(357:352)
+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358)
+#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE MW(366:366)
+#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001
+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367)
+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE MW(369:368)
+#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_NONE 0x00000000
+#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001
+#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003
+#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS MW(370:370)
+#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371)
+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_THROTTLED MW(372:372)
+#define NVA0C0_QMDV01_06_THROTTLED_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_THROTTLED_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR MW(376:376)
+#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_LEGACY 0x00000000
+#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001
+#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR MW(377:377)
+#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000
+#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001
+#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT MW(378:378)
+#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT__32 0x00000000
+#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001
+#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING MW(379:379)
+#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
+#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
+#define NVA0C0_QMDV01_06_SAMPLER_INDEX MW(382:382)
+#define NVA0C0_QMDV01_06_SAMPLER_INDEX_INDEPENDENTLY 0x00000000
+#define NVA0C0_QMDV01_06_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001
+#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION MW(383:383)
+#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000
+#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001
+#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH MW(415:384)
+#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT MW(431:416)
+#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH MW(447:432)
+#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH_RESUME MW(479:448)
+#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT_RESUME MW(495:480)
+#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH_RESUME MW(511:496)
+#define NVA0C0_QMDV01_06_LAUNCH_QUOTA MW(535:512)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_F MW(542:536)
+#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE MW(543:543)
+#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_SHARED_MEMORY_SIZE MW(561:544)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_G MW(575:562)
+#define NVA0C0_QMDV01_06_QMD_VERSION MW(579:576)
+#define NVA0C0_QMDV01_06_QMD_MAJOR_VERSION MW(583:580)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_H MW(591:584)
+#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION0 MW(607:592)
+#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION1 MW(623:608)
+#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION2 MW(639:624)
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1))
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_QMD_RESERVED_I MW(668:648)
+#define NVA0C0_QMDV01_06_L1_CONFIGURATION MW(671:669)
+#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
+#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
+#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
+#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_LOWER MW(703:672)
+#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_UPPER MW(735:704)
+#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_LOWER MW(767:736)
+#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_UPPER MW(775:768)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_J MW(783:776)
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP MW(790:788)
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_INC 0x00000003
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_AND 0x00000005
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_OR 0x00000006
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007
+#define NVA0C0_QMDV01_06_QMD_RESERVED_K MW(791:791)
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT MW(793:792)
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE MW(794:794)
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE MW(799:799)
+#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVA0C0_QMDV01_06_RELEASE0_PAYLOAD MW(831:800)
+#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_LOWER MW(863:832)
+#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_UPPER MW(871:864)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_L MW(879:872)
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP MW(886:884)
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_INC 0x00000003
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_AND 0x00000005
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_OR 0x00000006
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007
+#define NVA0C0_QMDV01_06_QMD_RESERVED_M MW(887:887)
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT MW(889:888)
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE MW(890:890)
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE MW(895:895)
+#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVA0C0_QMDV01_06_RELEASE1_PAYLOAD MW(927:896)
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64))
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64))
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64))
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64))
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001
+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64))
+#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_N MW(1466:1464)
+#define NVA0C0_QMDV01_06_BARRIER_COUNT MW(1471:1467)
+#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472)
+#define NVA0C0_QMDV01_06_REGISTER_COUNT MW(1503:1496)
+#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504)
+#define NVA0C0_QMDV01_06_SASS_VERSION MW(1535:1528)
+#define NVA0C0_QMDV01_06_HW_ONLY_INNER_GET MW(1566:1536)
+#define NVA0C0_QMDV01_06_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567)
+#define NVA0C0_QMDV01_06_HW_ONLY_INNER_PUT MW(1598:1568)
+#define NVA0C0_QMDV01_06_HW_ONLY_SCHEDULE_ON_PUT_UPDATE_ENABLE MW(1599:1599)
+#define NVA0C0_QMDV01_06_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(1606:1600)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_Q MW(1609:1607)
+#define NVA0C0_QMDV01_06_COALESCE_WAITING_PERIOD MW(1617:1610)
+#define NVA0C0_QMDV01_06_QMD_RESERVED_R MW(1631:1618)
+#define NVA0C0_QMDV01_06_QMD_SPARE_D MW(1663:1632)
+#define NVA0C0_QMDV01_06_QMD_SPARE_E MW(1695:1664)
+#define NVA0C0_QMDV01_06_QMD_SPARE_F MW(1727:1696)
+#define NVA0C0_QMDV01_06_QMD_SPARE_G MW(1759:1728)
+#define NVA0C0_QMDV01_06_QMD_SPARE_H MW(1791:1760)
+#define NVA0C0_QMDV01_06_QMD_SPARE_I MW(1823:1792)
+#define NVA0C0_QMDV01_06_QMD_SPARE_J MW(1855:1824)
+#define NVA0C0_QMDV01_06_QMD_SPARE_K MW(1887:1856)
+#define NVA0C0_QMDV01_06_QMD_SPARE_L MW(1919:1888)
+#define NVA0C0_QMDV01_06_QMD_SPARE_M MW(1951:1920)
+#define NVA0C0_QMDV01_06_QMD_SPARE_N MW(1983:1952)
+#define NVA0C0_QMDV01_06_DEBUG_ID_UPPER MW(2015:1984)
+#define NVA0C0_QMDV01_06_DEBUG_ID_LOWER MW(2047:2016)
+
+
+/*
+** Queue Meta Data, Version 01_07
+ */
+
+#define NVA0C0_QMDV01_07_OUTER_PUT MW(30:0)
+#define NVA0C0_QMDV01_07_OUTER_OVERFLOW MW(31:31)
+#define NVA0C0_QMDV01_07_OUTER_GET MW(62:32)
+#define NVA0C0_QMDV01_07_OUTER_STICKY_OVERFLOW MW(63:63)
+#define NVA0C0_QMDV01_07_INNER_GET MW(94:64)
+#define NVA0C0_QMDV01_07_INNER_OVERFLOW MW(95:95)
+#define NVA0C0_QMDV01_07_INNER_PUT MW(126:96)
+#define NVA0C0_QMDV01_07_INNER_STICKY_OVERFLOW MW(127:127)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_A_A MW(159:128)
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_POINTER MW(191:160)
+#define NVA0C0_QMDV01_07_QMD_GROUP_ID MW(197:192)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_A MW(200:198)
+#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201)
+#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0 MW(202:202)
+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1 MW(203:203)
+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS MW(204:204)
+#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205)
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE MW(206:206)
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE 0x00000000
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID 0x00000001
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY MW(207:207)
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_QMD_RESERVED_B MW(223:208)
+#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE MW(248:224)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_C MW(249:249)
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250)
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251)
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252)
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE MW(253:253)
+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE MW(254:254)
+#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255)
+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_PROGRAM_OFFSET MW(287:256)
+#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288)
+#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_D MW(335:328)
+#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336)
+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID MW(357:352)
+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358)
+#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE MW(366:366)
+#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001
+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367)
+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE MW(369:368)
+#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE 0x00000000
+#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001
+#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003
+#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS MW(370:370)
+#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371)
+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_THROTTLED MW(372:372)
+#define NVA0C0_QMDV01_07_THROTTLED_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_THROTTLED_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR MW(376:376)
+#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY 0x00000000
+#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001
+#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR MW(377:377)
+#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000
+#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001
+#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT MW(378:378)
+#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32 0x00000000
+#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001
+#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING MW(379:379)
+#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
+#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
+#define NVA0C0_QMDV01_07_SAMPLER_INDEX MW(382:382)
+#define NVA0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY 0x00000000
+#define NVA0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001
+#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION MW(383:383)
+#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000
+#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001
+#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH MW(415:384)
+#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT MW(431:416)
+#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH MW(447:432)
+#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME MW(479:448)
+#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME MW(495:480)
+#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME MW(511:496)
+#define NVA0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512)
+#define NVA0C0_QMDV01_07_COALESCE_WAITING_PERIOD MW(529:522)
+#define NVA0C0_QMDV01_07_SHARED_MEMORY_SIZE MW(561:544)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_G MW(575:562)
+#define NVA0C0_QMDV01_07_QMD_VERSION MW(579:576)
+#define NVA0C0_QMDV01_07_QMD_MAJOR_VERSION MW(583:580)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_H MW(591:584)
+#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION0 MW(607:592)
+#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION1 MW(623:608)
+#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION2 MW(639:624)
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1))
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_QMD_RESERVED_I MW(668:648)
+#define NVA0C0_QMDV01_07_L1_CONFIGURATION MW(671:669)
+#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
+#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
+#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
+#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_LOWER MW(703:672)
+#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_UPPER MW(735:704)
+#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER MW(767:736)
+#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER MW(775:768)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_J MW(783:776)
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP MW(790:788)
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC 0x00000003
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND 0x00000005
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR 0x00000006
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007
+#define NVA0C0_QMDV01_07_QMD_RESERVED_K MW(791:791)
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT MW(793:792)
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE MW(794:794)
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE MW(799:799)
+#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVA0C0_QMDV01_07_RELEASE0_PAYLOAD MW(831:800)
+#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER MW(863:832)
+#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER MW(871:864)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_L MW(879:872)
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP MW(886:884)
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC 0x00000003
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND 0x00000005
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR 0x00000006
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007
+#define NVA0C0_QMDV01_07_QMD_RESERVED_M MW(887:887)
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT MW(889:888)
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE MW(890:890)
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE MW(895:895)
+#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVA0C0_QMDV01_07_RELEASE1_PAYLOAD MW(927:896)
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64))
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64))
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64))
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64))
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64))
+#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_N MW(1466:1464)
+#define NVA0C0_QMDV01_07_BARRIER_COUNT MW(1471:1467)
+#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472)
+#define NVA0C0_QMDV01_07_REGISTER_COUNT MW(1503:1496)
+#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504)
+#define NVA0C0_QMDV01_07_SASS_VERSION MW(1535:1528)
+#define NVA0C0_QMDV01_07_HW_ONLY_INNER_GET MW(1566:1536)
+#define NVA0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567)
+#define NVA0C0_QMDV01_07_HW_ONLY_INNER_PUT MW(1598:1568)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_P MW(1599:1599)
+#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600)
+#define NVA0C0_QMDV01_07_QMD_RESERVED_Q MW(1630:1630)
+#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631)
+#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000
+#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001
+#define NVA0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632)
+#define NVA0C0_QMDV01_07_QMD_SPARE_E MW(1695:1664)
+#define NVA0C0_QMDV01_07_QMD_SPARE_F MW(1727:1696)
+#define NVA0C0_QMDV01_07_QMD_SPARE_G MW(1759:1728)
+#define NVA0C0_QMDV01_07_QMD_SPARE_H MW(1791:1760)
+#define NVA0C0_QMDV01_07_QMD_SPARE_I MW(1823:1792)
+#define NVA0C0_QMDV01_07_QMD_SPARE_J MW(1855:1824)
+#define NVA0C0_QMDV01_07_QMD_SPARE_K MW(1887:1856)
+#define NVA0C0_QMDV01_07_QMD_SPARE_L MW(1919:1888)
+#define NVA0C0_QMDV01_07_QMD_SPARE_M MW(1951:1920)
+#define NVA0C0_QMDV01_07_QMD_SPARE_N MW(1983:1952)
+#define NVA0C0_QMDV01_07_DEBUG_ID_UPPER MW(2015:1984)
+#define NVA0C0_QMDV01_07_DEBUG_ID_LOWER MW(2047:2016)
+
+
+
+#endif // #ifndef __CLA0C0QMD_H__
diff --git a/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h
new file mode 100644
index 00000000000..040bdcd9dcb
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h
@@ -0,0 +1,665 @@
+/*******************************************************************************
+ Copyright (c) 2016 NVIDIA Corporation
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to
+ deal in the Software without restriction, including without limitation the
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ sell copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+/* AUTO GENERATED FILE -- DO NOT EDIT */
+
+#ifndef __CLC0C0QMD_H__
+#define __CLC0C0QMD_H__
+
+/*
+** Queue Meta Data, Version 01_07
+ */
+
+// The below C preprocessor definitions describe "multi-word" structures, where
+// fields may have bit numbers beyond 32. For example, MW(127:96) means
+// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)"
+// syntax is to distinguish from similar "X:Y" single-word definitions: the
+// macros historically used for single-word definitions would fail with
+// multi-word definitions.
+//
+// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
+// interface layer of nvidia.ko for an example of how to manipulate
+// these MW(X:Y) definitions.
+
+#define NVC0C0_QMDV01_07_OUTER_PUT MW(30:0)
+#define NVC0C0_QMDV01_07_OUTER_OVERFLOW MW(31:31)
+#define NVC0C0_QMDV01_07_OUTER_GET MW(62:32)
+#define NVC0C0_QMDV01_07_OUTER_STICKY_OVERFLOW MW(63:63)
+#define NVC0C0_QMDV01_07_INNER_GET MW(94:64)
+#define NVC0C0_QMDV01_07_INNER_OVERFLOW MW(95:95)
+#define NVC0C0_QMDV01_07_INNER_PUT MW(126:96)
+#define NVC0C0_QMDV01_07_INNER_STICKY_OVERFLOW MW(127:127)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_A_A MW(159:128)
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_POINTER MW(191:160)
+#define NVC0C0_QMDV01_07_QMD_GROUP_ID MW(197:192)
+#define NVC0C0_QMDV01_07_SM_GLOBAL_CACHING_ENABLE MW(198:198)
+#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION MW(199:199)
+#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_IS_QUEUE MW(200:200)
+#define NVC0C0_QMDV01_07_IS_QUEUE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_IS_QUEUE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201)
+#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0 MW(202:202)
+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1 MW(203:203)
+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS MW(204:204)
+#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205)
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE MW(206:206)
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE 0x00000000
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID 0x00000001
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY MW(207:207)
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_QMD_RESERVED_B MW(223:208)
+#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE MW(248:224)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_C MW(249:249)
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250)
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251)
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252)
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE MW(253:253)
+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE MW(254:254)
+#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255)
+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_PROGRAM_OFFSET MW(287:256)
+#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288)
+#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_D MW(335:328)
+#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336)
+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID MW(357:352)
+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358)
+#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE MW(366:366)
+#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001
+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367)
+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE MW(369:368)
+#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE 0x00000000
+#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001
+#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003
+#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS MW(370:370)
+#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371)
+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_THROTTLED MW(372:372)
+#define NVC0C0_QMDV01_07_THROTTLED_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_THROTTLED_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR MW(376:376)
+#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY 0x00000000
+#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001
+#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR MW(377:377)
+#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000
+#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001
+#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT MW(378:378)
+#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32 0x00000000
+#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001
+#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING MW(379:379)
+#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
+#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
+#define NVC0C0_QMDV01_07_SAMPLER_INDEX MW(382:382)
+#define NVC0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY 0x00000000
+#define NVC0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001
+#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION MW(383:383)
+#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000
+#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001
+#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH MW(415:384)
+#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT MW(431:416)
+#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH MW(447:432)
+#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME MW(479:448)
+#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME MW(495:480)
+#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME MW(511:496)
+#define NVC0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512)
+#define NVC0C0_QMDV01_07_COALESCE_WAITING_PERIOD MW(529:522)
+#define NVC0C0_QMDV01_07_SHARED_MEMORY_SIZE MW(561:544)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_G MW(575:562)
+#define NVC0C0_QMDV01_07_QMD_VERSION MW(579:576)
+#define NVC0C0_QMDV01_07_QMD_MAJOR_VERSION MW(583:580)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_H MW(591:584)
+#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION0 MW(607:592)
+#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION1 MW(623:608)
+#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION2 MW(639:624)
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1))
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_QMD_RESERVED_I MW(668:648)
+#define NVC0C0_QMDV01_07_L1_CONFIGURATION MW(671:669)
+#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
+#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
+#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
+#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_LOWER MW(703:672)
+#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_UPPER MW(735:704)
+#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER MW(767:736)
+#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER MW(775:768)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_J MW(783:776)
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP MW(790:788)
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC 0x00000003
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND 0x00000005
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR 0x00000006
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC0C0_QMDV01_07_QMD_RESERVED_K MW(791:791)
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT MW(793:792)
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE MW(794:794)
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE MW(799:799)
+#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC0C0_QMDV01_07_RELEASE0_PAYLOAD MW(831:800)
+#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER MW(863:832)
+#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER MW(871:864)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_L MW(879:872)
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP MW(886:884)
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC 0x00000003
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND 0x00000005
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR 0x00000006
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC0C0_QMDV01_07_QMD_RESERVED_M MW(887:887)
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT MW(889:888)
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE MW(890:890)
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE MW(895:895)
+#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC0C0_QMDV01_07_RELEASE1_PAYLOAD MW(927:896)
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64))
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64))
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64))
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64))
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64))
+#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_N MW(1466:1464)
+#define NVC0C0_QMDV01_07_BARRIER_COUNT MW(1471:1467)
+#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472)
+#define NVC0C0_QMDV01_07_REGISTER_COUNT MW(1503:1496)
+#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504)
+#define NVC0C0_QMDV01_07_SASS_VERSION MW(1535:1528)
+#define NVC0C0_QMDV01_07_HW_ONLY_INNER_GET MW(1566:1536)
+#define NVC0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567)
+#define NVC0C0_QMDV01_07_HW_ONLY_INNER_PUT MW(1598:1568)
+#define NVC0C0_QMDV01_07_HW_ONLY_SCG_TYPE MW(1599:1599)
+#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600)
+#define NVC0C0_QMDV01_07_QMD_RESERVED_Q MW(1630:1630)
+#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631)
+#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000
+#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001
+#define NVC0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632)
+#define NVC0C0_QMDV01_07_QMD_SPARE_E MW(1695:1664)
+#define NVC0C0_QMDV01_07_QMD_SPARE_F MW(1727:1696)
+#define NVC0C0_QMDV01_07_QMD_SPARE_G MW(1759:1728)
+#define NVC0C0_QMDV01_07_QMD_SPARE_H MW(1791:1760)
+#define NVC0C0_QMDV01_07_QMD_SPARE_I MW(1823:1792)
+#define NVC0C0_QMDV01_07_QMD_SPARE_J MW(1855:1824)
+#define NVC0C0_QMDV01_07_QMD_SPARE_K MW(1887:1856)
+#define NVC0C0_QMDV01_07_QMD_SPARE_L MW(1919:1888)
+#define NVC0C0_QMDV01_07_QMD_SPARE_M MW(1951:1920)
+#define NVC0C0_QMDV01_07_QMD_SPARE_N MW(1983:1952)
+#define NVC0C0_QMDV01_07_DEBUG_ID_UPPER MW(2015:1984)
+#define NVC0C0_QMDV01_07_DEBUG_ID_LOWER MW(2047:2016)
+
+
+/*
+** Queue Meta Data, Version 02_00
+ */
+
+#define NVC0C0_QMDV02_00_OUTER_PUT MW(30:0)
+#define NVC0C0_QMDV02_00_OUTER_OVERFLOW MW(31:31)
+#define NVC0C0_QMDV02_00_OUTER_GET MW(62:32)
+#define NVC0C0_QMDV02_00_OUTER_STICKY_OVERFLOW MW(63:63)
+#define NVC0C0_QMDV02_00_INNER_GET MW(94:64)
+#define NVC0C0_QMDV02_00_INNER_OVERFLOW MW(95:95)
+#define NVC0C0_QMDV02_00_INNER_PUT MW(126:96)
+#define NVC0C0_QMDV02_00_INNER_STICKY_OVERFLOW MW(127:127)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_A_A MW(159:128)
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_POINTER MW(191:160)
+#define NVC0C0_QMDV02_00_QMD_GROUP_ID MW(197:192)
+#define NVC0C0_QMDV02_00_SM_GLOBAL_CACHING_ENABLE MW(198:198)
+#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION MW(199:199)
+#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_IS_QUEUE MW(200:200)
+#define NVC0C0_QMDV02_00_IS_QUEUE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_IS_QUEUE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201)
+#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0 MW(202:202)
+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1 MW(203:203)
+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS MW(204:204)
+#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205)
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE MW(206:206)
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_QUEUE 0x00000000
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_GRID 0x00000001
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY MW(207:207)
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_QMD_RESERVED_B MW(223:208)
+#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_SIZE MW(248:224)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_C MW(249:249)
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250)
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251)
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252)
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE MW(253:253)
+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE MW(254:254)
+#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255)
+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_PROGRAM_OFFSET MW(287:256)
+#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288)
+#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_D MW(335:328)
+#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336)
+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_ID MW(357:352)
+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358)
+#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE MW(366:366)
+#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001
+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367)
+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE MW(369:368)
+#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_NONE 0x00000000
+#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001
+#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003
+#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS MW(370:370)
+#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371)
+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_THROTTLED MW(372:372)
+#define NVC0C0_QMDV02_00_THROTTLED_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_THROTTLED_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT MW(378:378)
+#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT__32 0x00000000
+#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001
+#define NVC0C0_QMDV02_00_SAMPLER_INDEX MW(382:382)
+#define NVC0C0_QMDV02_00_SAMPLER_INDEX_INDEPENDENTLY 0x00000000
+#define NVC0C0_QMDV02_00_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001
+#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH MW(415:384)
+#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT MW(431:416)
+#define NVC0C0_QMDV02_00_QMD_RESERVED13A MW(447:432)
+#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH MW(463:448)
+#define NVC0C0_QMDV02_00_QMD_RESERVED14A MW(479:464)
+#define NVC0C0_QMDV02_00_QMD_RESERVED15A MW(511:480)
+#define NVC0C0_QMDV02_00_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512)
+#define NVC0C0_QMDV02_00_COALESCE_WAITING_PERIOD MW(529:522)
+#define NVC0C0_QMDV02_00_SHARED_MEMORY_SIZE MW(561:544)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_G MW(575:562)
+#define NVC0C0_QMDV02_00_QMD_VERSION MW(579:576)
+#define NVC0C0_QMDV02_00_QMD_MAJOR_VERSION MW(583:580)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_H MW(591:584)
+#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION0 MW(607:592)
+#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION1 MW(623:608)
+#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION2 MW(639:624)
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1))
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_QMD_RESERVED_I MW(671:648)
+#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_LOWER MW(703:672)
+#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_UPPER MW(735:704)
+#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_LOWER MW(767:736)
+#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_UPPER MW(775:768)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_J MW(783:776)
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP MW(790:788)
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_INC 0x00000003
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_AND 0x00000005
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_OR 0x00000006
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC0C0_QMDV02_00_QMD_RESERVED_K MW(791:791)
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT MW(793:792)
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE MW(794:794)
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE MW(799:799)
+#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC0C0_QMDV02_00_RELEASE0_PAYLOAD MW(831:800)
+#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_LOWER MW(863:832)
+#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_UPPER MW(871:864)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_L MW(879:872)
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP MW(886:884)
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_INC 0x00000003
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_AND 0x00000005
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_OR 0x00000006
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC0C0_QMDV02_00_QMD_RESERVED_M MW(887:887)
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT MW(889:888)
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE MW(890:890)
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE MW(895:895)
+#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC0C0_QMDV02_00_RELEASE1_PAYLOAD MW(927:896)
+#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_N MW(954:952)
+#define NVC0C0_QMDV02_00_BARRIER_COUNT MW(959:955)
+#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960)
+#define NVC0C0_QMDV02_00_REGISTER_COUNT MW(991:984)
+#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992)
+#define NVC0C0_QMDV02_00_SASS_VERSION MW(1023:1016)
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64))
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64))
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64))
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64))
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64))
+#define NVC0C0_QMDV02_00_HW_ONLY_INNER_GET MW(1566:1536)
+#define NVC0C0_QMDV02_00_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567)
+#define NVC0C0_QMDV02_00_HW_ONLY_INNER_PUT MW(1598:1568)
+#define NVC0C0_QMDV02_00_HW_ONLY_SCG_TYPE MW(1599:1599)
+#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600)
+#define NVC0C0_QMDV02_00_QMD_RESERVED_Q MW(1630:1630)
+#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631)
+#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000
+#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001
+#define NVC0C0_QMDV02_00_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632)
+#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH_RESUME MW(1695:1664)
+#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT_RESUME MW(1711:1696)
+#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH_RESUME MW(1727:1712)
+#define NVC0C0_QMDV02_00_QMD_SPARE_G MW(1759:1728)
+#define NVC0C0_QMDV02_00_QMD_SPARE_H MW(1791:1760)
+#define NVC0C0_QMDV02_00_QMD_SPARE_I MW(1823:1792)
+#define NVC0C0_QMDV02_00_QMD_SPARE_J MW(1855:1824)
+#define NVC0C0_QMDV02_00_QMD_SPARE_K MW(1887:1856)
+#define NVC0C0_QMDV02_00_QMD_SPARE_L MW(1919:1888)
+#define NVC0C0_QMDV02_00_QMD_SPARE_M MW(1951:1920)
+#define NVC0C0_QMDV02_00_QMD_SPARE_N MW(1983:1952)
+#define NVC0C0_QMDV02_00_DEBUG_ID_UPPER MW(2015:1984)
+#define NVC0C0_QMDV02_00_DEBUG_ID_LOWER MW(2047:2016)
+
+
+/*
+** Queue Meta Data, Version 02_01
+ */
+
+#define NVC0C0_QMDV02_01_OUTER_PUT MW(30:0)
+#define NVC0C0_QMDV02_01_OUTER_OVERFLOW MW(31:31)
+#define NVC0C0_QMDV02_01_OUTER_GET MW(62:32)
+#define NVC0C0_QMDV02_01_OUTER_STICKY_OVERFLOW MW(63:63)
+#define NVC0C0_QMDV02_01_INNER_GET MW(94:64)
+#define NVC0C0_QMDV02_01_INNER_OVERFLOW MW(95:95)
+#define NVC0C0_QMDV02_01_INNER_PUT MW(126:96)
+#define NVC0C0_QMDV02_01_INNER_STICKY_OVERFLOW MW(127:127)
+#define NVC0C0_QMDV02_01_QMD_GROUP_ID MW(133:128)
+#define NVC0C0_QMDV02_01_SM_GLOBAL_CACHING_ENABLE MW(134:134)
+#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION MW(135:135)
+#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_IS_QUEUE MW(136:136)
+#define NVC0C0_QMDV02_01_IS_QUEUE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_IS_QUEUE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(137:137)
+#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0 MW(138:138)
+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1 MW(139:139)
+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS MW(140:140)
+#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE MW(141:141)
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE MW(142:142)
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_QUEUE 0x00000000
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_GRID 0x00000001
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY MW(143:143)
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_QMD_RESERVED_B MW(159:144)
+#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_SIZE MW(184:160)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_C MW(185:185)
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE MW(186:186)
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(187:187)
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE MW(188:188)
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE MW(189:189)
+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE MW(190:190)
+#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE MW(191:191)
+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH_RESUME MW(223:192)
+#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT_RESUME MW(239:224)
+#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH_RESUME MW(255:240)
+#define NVC0C0_QMDV02_01_PROGRAM_OFFSET MW(287:256)
+#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288)
+#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_D MW(335:328)
+#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336)
+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_ID MW(357:352)
+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358)
+#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE MW(366:366)
+#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001
+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367)
+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE MW(369:368)
+#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_NONE 0x00000000
+#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001
+#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003
+#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS MW(370:370)
+#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371)
+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_THROTTLED MW(372:372)
+#define NVC0C0_QMDV02_01_THROTTLED_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_THROTTLED_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT MW(378:378)
+#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT__32 0x00000000
+#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001
+#define NVC0C0_QMDV02_01_SAMPLER_INDEX MW(382:382)
+#define NVC0C0_QMDV02_01_SAMPLER_INDEX_INDEPENDENTLY 0x00000000
+#define NVC0C0_QMDV02_01_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001
+#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH MW(415:384)
+#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT MW(431:416)
+#define NVC0C0_QMDV02_01_QMD_RESERVED13A MW(447:432)
+#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH MW(463:448)
+#define NVC0C0_QMDV02_01_QMD_RESERVED14A MW(479:464)
+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_POINTER MW(511:480)
+#define NVC0C0_QMDV02_01_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512)
+#define NVC0C0_QMDV02_01_COALESCE_WAITING_PERIOD MW(529:522)
+#define NVC0C0_QMDV02_01_SHARED_MEMORY_SIZE MW(561:544)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_G MW(575:562)
+#define NVC0C0_QMDV02_01_QMD_VERSION MW(579:576)
+#define NVC0C0_QMDV02_01_QMD_MAJOR_VERSION MW(583:580)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_H MW(591:584)
+#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION0 MW(607:592)
+#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION1 MW(623:608)
+#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION2 MW(639:624)
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1))
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_QMD_RESERVED_I MW(671:648)
+#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_LOWER MW(703:672)
+#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_UPPER MW(735:704)
+#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_LOWER MW(767:736)
+#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_UPPER MW(775:768)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_J MW(783:776)
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP MW(790:788)
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_INC 0x00000003
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_AND 0x00000005
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_OR 0x00000006
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC0C0_QMDV02_01_QMD_RESERVED_K MW(791:791)
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT MW(793:792)
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE MW(794:794)
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE MW(799:799)
+#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC0C0_QMDV02_01_RELEASE0_PAYLOAD MW(831:800)
+#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_LOWER MW(863:832)
+#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_UPPER MW(871:864)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_L MW(879:872)
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP MW(886:884)
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_INC 0x00000003
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_AND 0x00000005
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_OR 0x00000006
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC0C0_QMDV02_01_QMD_RESERVED_M MW(887:887)
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT MW(889:888)
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE MW(890:890)
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE MW(895:895)
+#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC0C0_QMDV02_01_RELEASE1_PAYLOAD MW(927:896)
+#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_N MW(954:952)
+#define NVC0C0_QMDV02_01_BARRIER_COUNT MW(959:955)
+#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960)
+#define NVC0C0_QMDV02_01_REGISTER_COUNT MW(991:984)
+#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992)
+#define NVC0C0_QMDV02_01_SASS_VERSION MW(1023:1016)
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64))
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64))
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64))
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64))
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64))
+#define NVC0C0_QMDV02_01_QMD_RESERVED_R MW(1567:1536)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_S MW(1599:1568)
+#define NVC0C0_QMDV02_01_HW_ONLY_INNER_GET MW(1630:1600)
+#define NVC0C0_QMDV02_01_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1631:1631)
+#define NVC0C0_QMDV02_01_HW_ONLY_INNER_PUT MW(1662:1632)
+#define NVC0C0_QMDV02_01_HW_ONLY_SCG_TYPE MW(1663:1663)
+#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1693:1664)
+#define NVC0C0_QMDV02_01_QMD_RESERVED_Q MW(1694:1694)
+#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1695:1695)
+#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000
+#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001
+#define NVC0C0_QMDV02_01_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1727:1696)
+#define NVC0C0_QMDV02_01_QMD_SPARE_G MW(1759:1728)
+#define NVC0C0_QMDV02_01_QMD_SPARE_H MW(1791:1760)
+#define NVC0C0_QMDV02_01_QMD_SPARE_I MW(1823:1792)
+#define NVC0C0_QMDV02_01_QMD_SPARE_J MW(1855:1824)
+#define NVC0C0_QMDV02_01_QMD_SPARE_K MW(1887:1856)
+#define NVC0C0_QMDV02_01_QMD_SPARE_L MW(1919:1888)
+#define NVC0C0_QMDV02_01_QMD_SPARE_M MW(1951:1920)
+#define NVC0C0_QMDV02_01_QMD_SPARE_N MW(1983:1952)
+#define NVC0C0_QMDV02_01_DEBUG_ID_UPPER MW(2015:1984)
+#define NVC0C0_QMDV02_01_DEBUG_ID_LOWER MW(2047:2016)
+
+
+
+#endif // #ifndef __CLC0C0QMD_H__
diff --git a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
new file mode 100644
index 00000000000..588cc639d32
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
@@ -0,0 +1,245 @@
+/*******************************************************************************
+ Copyright (c) 2001-2010 NVIDIA Corporation
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to
+ deal in the Software without restriction, including without limitation the
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ sell copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+*******************************************************************************/
+
+/* AUTO GENERATED FILE -- DO NOT EDIT */
+
+#ifndef __CLC3C0QMD_H__
+#define __CLC3C0QMD_H__
+
+/*
+** Queue Meta Data, Version 02_02
+ */
+
+// The below C preprocessor definitions describe "multi-word" structures, where
+// fields may have bit numbers beyond 32. For example, MW(127:96) means
+// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)"
+// syntax is to distinguish from similar "X:Y" single-word definitions: the
+// macros historically used for single-word definitions would fail with
+// multi-word definitions.
+//
+// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
+// interface layer of nvidia.ko for an example of how to manipulate
+// these MW(X:Y) definitions.
+
+#define NVC3C0_QMDV02_02_OUTER_PUT MW(30:0)
+#define NVC3C0_QMDV02_02_OUTER_OVERFLOW MW(31:31)
+#define NVC3C0_QMDV02_02_OUTER_GET MW(62:32)
+#define NVC3C0_QMDV02_02_OUTER_STICKY_OVERFLOW MW(63:63)
+#define NVC3C0_QMDV02_02_INNER_GET MW(94:64)
+#define NVC3C0_QMDV02_02_INNER_OVERFLOW MW(95:95)
+#define NVC3C0_QMDV02_02_INNER_PUT MW(126:96)
+#define NVC3C0_QMDV02_02_INNER_STICKY_OVERFLOW MW(127:127)
+#define NVC3C0_QMDV02_02_QMD_GROUP_ID MW(133:128)
+#define NVC3C0_QMDV02_02_SM_GLOBAL_CACHING_ENABLE MW(134:134)
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION MW(135:135)
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_IS_QUEUE MW(136:136)
+#define NVC3C0_QMDV02_02_IS_QUEUE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_IS_QUEUE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(137:137)
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0 MW(138:138)
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1 MW(139:139)
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS MW(140:140)
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE MW(141:141)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE MW(142:142)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_QUEUE 0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_GRID 0x00000001
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY MW(143:143)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_QMD_RESERVED_B MW(159:144)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_SIZE MW(184:160)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_C MW(185:185)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE MW(186:186)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(187:187)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE MW(188:188)
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE MW(189:189)
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE MW(190:190)
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE MW(191:191)
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH_RESUME MW(223:192)
+#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT_RESUME MW(239:224)
+#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH_RESUME MW(255:240)
+#define NVC3C0_QMDV02_02_PROGRAM_OFFSET MW(287:256)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_D MW(335:328)
+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_ID MW(357:352)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358)
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE MW(366:366)
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE MW(369:368)
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_NONE 0x00000000
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001
+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS MW(370:370)
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371)
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT MW(378:378)
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT__32 0x00000000
+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX MW(382:382)
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX_INDEPENDENTLY 0x00000000
+#define NVC3C0_QMDV02_02_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001
+#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH MW(415:384)
+#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT MW(431:416)
+#define NVC3C0_QMDV02_02_QMD_RESERVED13A MW(447:432)
+#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH MW(463:448)
+#define NVC3C0_QMDV02_02_QMD_RESERVED14A MW(479:464)
+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_POINTER MW(511:480)
+#define NVC3C0_QMDV02_02_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512)
+#define NVC3C0_QMDV02_02_COALESCE_WAITING_PERIOD MW(529:522)
+#define NVC3C0_QMDV02_02_SHARED_MEMORY_SIZE MW(561:544)
+#define NVC3C0_QMDV02_02_MIN_SM_CONFIG_SHARED_MEM_SIZE MW(568:562)
+#define NVC3C0_QMDV02_02_MAX_SM_CONFIG_SHARED_MEM_SIZE MW(575:569)
+#define NVC3C0_QMDV02_02_QMD_VERSION MW(579:576)
+#define NVC3C0_QMDV02_02_QMD_MAJOR_VERSION MW(583:580)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_H MW(591:584)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION0 MW(607:592)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION1 MW(623:608)
+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION2 MW(639:624)
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_REGISTER_COUNT_V MW(656:648)
+#define NVC3C0_QMDV02_02_TARGET_SM_CONFIG_SHARED_MEM_SIZE MW(663:657)
+#define NVC3C0_QMDV02_02_FREE_CTA_SLOTS_EMPTY_SM MW(671:664)
+#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_LOWER MW(703:672)
+#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_UPPER MW(735:704)
+#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_LOWER MW(767:736)
+#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_UPPER MW(775:768)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_J MW(783:776)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP MW(790:788)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_INC 0x00000003
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_AND 0x00000005
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_OR 0x00000006
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC3C0_QMDV02_02_QMD_RESERVED_K MW(791:791)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT MW(793:792)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE MW(794:794)
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE MW(799:799)
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE0_PAYLOAD MW(831:800)
+#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_LOWER MW(863:832)
+#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_UPPER MW(871:864)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_L MW(879:872)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP MW(886:884)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_INC 0x00000003
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_AND 0x00000005
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_OR 0x00000006
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007
+#define NVC3C0_QMDV02_02_QMD_RESERVED_M MW(887:887)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT MW(889:888)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE MW(890:890)
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE MW(895:895)
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000
+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001
+#define NVC3C0_QMDV02_02_RELEASE1_PAYLOAD MW(927:896)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_N MW(954:952)
+#define NVC3C0_QMDV02_02_BARRIER_COUNT MW(959:955)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960)
+#define NVC3C0_QMDV02_02_REGISTER_COUNT MW(991:984)
+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992)
+#define NVC3C0_QMDV02_02_SASS_VERSION MW(1023:1016)
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64))
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64))
+#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_LOWER MW(1567:1536)
+#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_UPPER MW(1584:1568)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_S MW(1599:1585)
+#define NVC3C0_QMDV02_02_HW_ONLY_INNER_GET MW(1630:1600)
+#define NVC3C0_QMDV02_02_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1631:1631)
+#define NVC3C0_QMDV02_02_HW_ONLY_INNER_PUT MW(1662:1632)
+#define NVC3C0_QMDV02_02_HW_ONLY_SCG_TYPE MW(1663:1663)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1693:1664)
+#define NVC3C0_QMDV02_02_QMD_RESERVED_Q MW(1694:1694)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1695:1695)
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000
+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001
+#define NVC3C0_QMDV02_02_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1727:1696)
+#define NVC3C0_QMDV02_02_QMD_SPARE_G MW(1759:1728)
+#define NVC3C0_QMDV02_02_QMD_SPARE_H MW(1791:1760)
+#define NVC3C0_QMDV02_02_QMD_SPARE_I MW(1823:1792)
+#define NVC3C0_QMDV02_02_QMD_SPARE_J MW(1855:1824)
+#define NVC3C0_QMDV02_02_QMD_SPARE_K MW(1887:1856)
+#define NVC3C0_QMDV02_02_QMD_SPARE_L MW(1919:1888)
+#define NVC3C0_QMDV02_02_QMD_SPARE_M MW(1951:1920)
+#define NVC3C0_QMDV02_02_QMD_SPARE_N MW(1983:1952)
+#define NVC3C0_QMDV02_02_DEBUG_ID_UPPER MW(2015:1984)
+#define NVC3C0_QMDV02_02_DEBUG_ID_LOWER MW(2047:2016)
+
+
+
+#endif // #ifndef __CLC3C0QMD_H__
diff --git a/src/gallium/drivers/nouveau/nvc0/drf.h b/src/gallium/drivers/nouveau/nvc0/drf.h
new file mode 100644
index 00000000000..bf95c8c3185
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/drf.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2019 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVHW_DRF_H__
+#define __NVHW_DRF_H__
+
+/* Helpers common to all DRF accessors. */
+#define DRF_LO(drf) (0 ? drf)
+#define DRF_HI(drf) (1 ? drf)
+#define DRF_BITS(drf) (DRF_HI(drf) - DRF_LO(drf) + 1)
+#define DRF_MASK(drf) (~0ULL >> (64 - DRF_BITS(drf)))
+#define DRF_SMASK(drf) (DRF_MASK(drf) << DRF_LO(drf))
+
+/* Helpers for DRF-MW accessors. */
+#define DRF_MX_MW(drf) drf
+#define DRF_MX(drf) DRF_MX_##drf
+#define DRF_MW(drf) DRF_MX(drf)
+#define DRF_MW_SPANS(o,drf) (DRF_LW_IDX((o),drf) != DRF_HW_IDX((o),drf))
+#define DRF_MW_SIZE(o) (sizeof((o)[0]) * 8)
+
+#define DRF_LW_IDX(o,drf) (DRF_LO(DRF_MW(drf)) / DRF_MW_SIZE(o))
+#define DRF_LW_LO(o,drf) (DRF_LO(DRF_MW(drf)) % DRF_MW_SIZE(o))
+#define DRF_LW_HI(o,drf) (DRF_MW_SPANS((o),drf) ? (DRF_MW_SIZE(o) - 1) : DRF_HW_HI((o),drf))
+#define DRF_LW_BITS(o,drf) (DRF_LW_HI((o),drf) - DRF_LW_LO((o),drf) + 1)
+#define DRF_LW_MASK(o,drf) (~0ULL >> (64 - DRF_LW_BITS((o),drf)))
+#define DRF_LW_SMASK(o,drf) (DRF_LW_MASK((o),drf) << DRF_LW_LO((o),drf))
+#define DRF_LW_GET(o,drf) (((o)[DRF_LW_IDX((o),drf)] >> DRF_LW_LO((o),drf)) & DRF_LW_MASK((o),drf))
+#define DRF_LW_VAL(o,drf,v) (((v) & DRF_LW_MASK((o),drf)) << DRF_LW_LO((o),drf))
+#define DRF_LW_CLR(o,drf) ((o)[DRF_LW_IDX((o),drf)] & ~DRF_LW_SMASK((o),drf))
+#define DRF_LW_SET(o,drf,v) (DRF_LW_CLR((o),drf) | DRF_LW_VAL((o),drf,(v)))
+
+#define DRF_HW_IDX(o,drf) (DRF_HI(DRF_MW(drf)) / DRF_MW_SIZE(o))
+#define DRF_HW_LO(o,drf) 0
+#define DRF_HW_HI(o,drf) (DRF_HI(DRF_MW(drf)) % DRF_MW_SIZE(o))
+#define DRF_HW_BITS(o,drf) (DRF_HW_HI((o),drf) - DRF_HW_LO((o),drf) + 1)
+#define DRF_HW_MASK(o,drf) (~0ULL >> (64 - DRF_HW_BITS((o),drf)))
+#define DRF_HW_SMASK(o,drf) (DRF_HW_MASK((o),drf) << DRF_HW_LO((o),drf))
+#define DRF_HW_GET(o,drf) ((o)[DRF_HW_IDX(o,drf)] & DRF_HW_SMASK((o),drf))
+#define DRF_HW_VAL(o,drf,v) (((long long)(v) >> DRF_LW_BITS((o),drf)) & DRF_HW_SMASK((o),drf))
+#define DRF_HW_CLR(o,drf) ((o)[DRF_HW_IDX((o),drf)] & ~DRF_HW_SMASK((o),drf))
+#define DRF_HW_SET(o,drf,v) (DRF_HW_CLR((o),drf) | DRF_HW_VAL((o),drf,(v)))
+
+/* DRF accessors. */
+#define NVVAL_X(drf,v) (((v) & DRF_MASK(drf)) << DRF_LO(drf))
+#define NVVAL_N(X,d,r,f, v) NVVAL_X(d##_##r##_##f, (v))
+#define NVVAL_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), (v))
+#define NVVAL_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
+#define NVVAL(A...) NVVAL_(X, ##A, NVVAL_I, NVVAL_N)(X, ##A)
+
+#define NVDEF_N(X,d,r,f, v) NVVAL_X(d##_##r##_##f, d##_##r##_##f##_##v)
+#define NVDEF_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), d##_##r##_##f##_##v)
+#define NVDEF_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
+#define NVDEF(A...) NVDEF_(X, ##A, NVDEF_I, NVDEF_N)(X, ##A)
+
+#define NVVAL_GET_X(o,drf) (((o) >> DRF_LO(drf)) & DRF_MASK(drf))
+#define NVVAL_GET_N(X,o,d,r,f ) NVVAL_GET_X(o, d##_##r##_##f)
+#define NVVAL_GET_I(X,o,d,r,f,i) NVVAL_GET_X(o, d##_##r##_##f(i))
+#define NVVAL_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
+#define NVVAL_GET(A...) NVVAL_GET_(X, ##A, NVVAL_GET_I, NVVAL_GET_N)(X, ##A)
+
+#define NVVAL_SET_X(o,drf,v) (((o) & ~DRF_SMASK(drf)) | NVVAL_X(drf, (v)))
+#define NVVAL_SET_N(X,o,d,r,f, v) NVVAL_SET_X(o, d##_##r##_##f, (v))
+#define NVVAL_SET_I(X,o,d,r,f,i,v) NVVAL_SET_X(o, d##_##r##_##f(i), (v))
+#define NVVAL_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
+#define NVVAL_SET(A...) NVVAL_SET_(X, ##A, NVVAL_SET_I, NVVAL_SET_N)(X, ##A)
+
+#define NVDEF_SET_N(X,o,d,r,f, v) \
+ NVVAL_SET_X(o, d##_##r##_##f, d##_##r##_##f##_##v)
+#define NVDEF_SET_I(X,o,d,r,f,i,v) \
+ NVVAL_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v)
+#define NVDEF_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
+#define NVDEF_SET(A...) NVDEF_SET_(X, ##A, NVDEF_SET_I, NVDEF_SET_N)(X, ##A)
+
+/* DRF-MW accessors. */
+#define NVVAL_MW_GET_X(o,drf) \
+ ((DRF_MW_SPANS((o),drf) ? \
+ (DRF_HW_GET((o),drf) << DRF_LW_BITS((o),drf)) : 0) | DRF_LW_GET((o),drf))
+#define NVVAL_MW_GET_N(X,o,d,r,f ) NVVAL_MW_GET_X((o), d##_##r##_##f)
+#define NVVAL_MW_GET_I(X,o,d,r,f,i) NVVAL_MW_GET_X((o), d##_##r##_##f(i))
+#define NVVAL_MW_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
+#define NVVAL_MW_GET(A...) NVVAL_MW_GET_(X, ##A, NVVAL_MW_GET_I, NVVAL_MW_GET_N)(X, ##A)
+
+#define NVVAL_MW_SET_X(o,drf,v) do { \
+ (o)[DRF_LW_IDX((o),drf)] = DRF_LW_SET((o),drf,(v)); \
+ if (DRF_MW_SPANS((o),drf)) \
+ (o)[DRF_HW_IDX((o),drf)] = DRF_HW_SET((o),drf,(v)); \
+} while(0)
+#define NVVAL_MW_SET_N(X,o,d,r,f, v) NVVAL_MW_SET_X((o), d##_##r##_##f, (v))
+#define NVVAL_MW_SET_I(X,o,d,r,f,i,v) NVVAL_MW_SET_X((o), d##_##r##_##f(i), (v))
+#define NVVAL_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
+#define NVVAL_MW_SET(A...) \
+ NVVAL_MW_SET_(X, ##A, NVVAL_MW_SET_I, NVVAL_MW_SET_N)(X, ##A)
+
+#define NVDEF_MW_SET_N(X,o,d,r,f, v) \
+ NVVAL_MW_SET_X(o, d##_##r##_##f, d##_##r##_##f##_##v)
+#define NVDEF_MW_SET_I(X,o,d,r,f,i,v) \
+ NVVAL_MW_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v)
+#define NVDEF_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
+#define NVDEF_MW_SET(A...) \
+ NVDEF_MW_SET_(X, ##A, NVDEF_MW_SET_I, NVDEF_MW_SET_N)(X, ##A)
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
new file mode 100644
index 00000000000..390741cbd04
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
@@ -0,0 +1,904 @@
+#define NV_MME_PRED_MODE_UUUU 0
+#define NV_MME_PRED_MODE_TTTT 1
+#define NV_MME_PRED_MODE_FFFF 2
+#define NV_MME_PRED_MODE_TTUU 3
+#define NV_MME_PRED_MODE_FFUU 4
+#define NV_MME_PRED_MODE_TFUU 5
+#define NV_MME_PRED_MODE_TUUU 6
+#define NV_MME_PRED_MODE_FUUU 7
+#define NV_MME_PRED_MODE_UUTT 8
+#define NV_MME_PRED_MODE_UUTF 9
+#define NV_MME_PRED_MODE_UUTU 10
+#define NV_MME_PRED_MODE_UUFT 11
+#define NV_MME_PRED_MODE_UUFF 12
+#define NV_MME_PRED_MODE_UUFU 13
+#define NV_MME_PRED_MODE_UUUT 14
+#define NV_MME_PRED_MODE_UUUF 15
+
+#define NV_MME_REG_R0 0
+#define NV_MME_REG_R1 1
+#define NV_MME_REG_R2 2
+#define NV_MME_REG_R3 3
+#define NV_MME_REG_R4 4
+#define NV_MME_REG_R5 5
+#define NV_MME_REG_R6 6
+#define NV_MME_REG_R7 7
+#define NV_MME_REG_R8 8
+#define NV_MME_REG_R9 9
+#define NV_MME_REG_R10 10
+#define NV_MME_REG_R11 11
+#define NV_MME_REG_R12 12
+#define NV_MME_REG_R13 13
+#define NV_MME_REG_R14 14
+#define NV_MME_REG_R15 15
+#define NV_MME_REG_R16 16
+#define NV_MME_REG_R17 17
+#define NV_MME_REG_R18 18
+#define NV_MME_REG_R19 19
+#define NV_MME_REG_R20 20
+#define NV_MME_REG_R21 21
+#define NV_MME_REG_R22 22
+#define NV_MME_REG_R23 23
+#define NV_MME_REG_ZERO 24
+#define NV_MME_REG_IMMED 25
+#define NV_MME_REG_IMMEDPAIR 26
+#define NV_MME_REG_IMMED32 27
+#define NV_MME_REG_LOAD0 28
+#define NV_MME_REG_LOAD1 29
+
+#define NV_MME_ALU_ADD 0
+#define NV_MME_ALU_ADDC 1
+#define NV_MME_ALU_SUB 2
+#define NV_MME_ALU_SUBB 3
+#define NV_MME_ALU_MUL 4
+#define NV_MME_ALU_MULH 5
+#define NV_MME_ALU_MULU 6
+#define NV_MME_ALU_EXTENDED 7
+#define NV_MME_ALU_CLZ 8
+#define NV_MME_ALU_SLL 9
+#define NV_MME_ALU_SRL 10
+#define NV_MME_ALU_SRA 11
+#define NV_MME_ALU_AND 12
+#define NV_MME_ALU_NAND 13
+#define NV_MME_ALU_OR 14
+#define NV_MME_ALU_XOR 15
+#define NV_MME_ALU_MERGE 16
+#define NV_MME_ALU_SLT 17
+#define NV_MME_ALU_SLTU 18
+#define NV_MME_ALU_SLE 19
+#define NV_MME_ALU_SLEU 20
+#define NV_MME_ALU_SEQ 21
+#define NV_MME_ALU_STATE 22
+#define NV_MME_ALU_LOOP 23
+#define NV_MME_ALU_JAL 24
+#define NV_MME_ALU_BLT 25
+#define NV_MME_ALU_BLTU 26
+#define NV_MME_ALU_BLE 27
+#define NV_MME_ALU_BLEU 28
+#define NV_MME_ALU_BEQ 29
+#define NV_MME_ALU_DREAD 30
+#define NV_MME_ALU_DWRITE 31
+
+#define NV_MME_OUT_NONE 0
+#define NV_MME_OUT_ALU0 1
+#define NV_MME_OUT_ALU1 2
+#define NV_MME_OUT_LOAD0 3
+#define NV_MME_OUT_LOAD1 4
+#define NV_MME_OUT_IMMED0 5
+#define NV_MME_OUT_IMMED1 6
+#define NV_MME_OUT_RESERVED 7
+#define NV_MME_OUT_IMMEDHIGH0 8
+#define NV_MME_OUT_IMMEDHIGH1 9
+#define NV_MME_OUT_IMMED32_0 10
+
+#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1) \
+ ((e1) << (92 - 64) | (m1) << (89 - 64) | \
+ (e0) << (85 - 64) | (m0) << (82 - 64) | \
+ (i1) << (66 - 64) | (b1) >> (64 - 61)), \
+ (((b1) & 7) << (61 - 32) | (a1) << (56 - 32) | \
+ (d1) << (51 - 32) | (o1) << (46 - 32) | \
+ (i0) >> (32 - 30)), \
+ (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 | \
+ (pr) << 5 | (pm) << 1 | (en))
+
+#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1) \
+ MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO, \
+ NV_MME_ALU_##o0, NV_MME_REG_##d0, \
+ NV_MME_REG_##a0, NV_MME_REG_##b0, (i0), \
+ NV_MME_ALU_##o1, NV_MME_REG_##d1, \
+ NV_MME_REG_##a1, NV_MME_REG_##b1, (i1), \
+ NV_MME_OUT_##m0, NV_MME_OUT_##e0, \
+ NV_MME_OUT_##m1, NV_MME_OUT_##e1)
+
+uint32_t mmec597_per_instance_bf[] = {
+// r1 = load(); // count
+// r3 = load(); // mask
+// mthd(0x1880, 1); // VERTEX_ARRAY_PER_INSTANCE[0]
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x1880/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r1) {
+// send(r3 & 1);
+// r3 >>= 1;
+// }
+ MME_INSN(0, LOOP, ZERO, R1, ZERO, 0x0003, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, ZERO, R3, IMMED, 1, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, SRL, R3, R3, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_vertex_array_select[] = {
+// r1 = load(); // array
+// r2 = load(); // limit hi
+// r3 = load(); // limit lo
+// r4 = load(); // start hi
+// r5 = load(); // start lo
+// r6 = (r1 & 0x1f) << 2;
+// r7 = (r1 & 0x1f) << 1;
+// mthd(0x1c04 + r6, 1); // VERTEX_ARRAY_START_HIGH[]
+// send(r4);
+// send(r5);
+// mthd(0x0600 + r7, 1); // VERTEX_ARRAY_LIMIT_HIGH[]
+// send(r2);
+// send(r3);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE,
+ MERGE, R6, ZERO, R1, (2<<10)|(5<<5)|0, NONE, NONE),
+ MME_INSN(0, MERGE, R7, ZERO, R1, (1<<10)|(5<<5)|0, ALU1, NONE,
+ ADD, ZERO, R6, IMMED, (1<<12)|0x1c04/4, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R5, ZERO, 0, NONE, ALU1),
+ MME_INSN(1, ADD, ZERO, R7, IMMED, (1<<12)|0x0600/4, ALU0, ALU1,
+ ADD, ZERO, R2, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_blend_enables[] = {
+// r1 = load(); // enable mask
+// mthd(0x1360, 1); // NVC0_3D_BLEND_ENABLE[]
+// send((r1 >> 0) & 1);
+// send((r1 >> 1) & 1);
+// send((r1 >> 2) & 1);
+// send((r1 >> 3) & 1);
+// send((r1 >> 4) & 1);
+// send((r1 >> 5) & 1);
+// send((r1 >> 6) & 1);
+// send((r1 >> 7) & 1);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, IMMED1, NONE,
+ ADD, ZERO, ZERO, ZERO, (1<<12)|0x1360/4, NONE, NONE),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|0, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|1, NONE, ALU1),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|2, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|3, NONE, ALU1),
+ MME_INSN(1, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|4, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|5, NONE, ALU1),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|6, NONE, ALU0,
+ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|7, NONE, ALU1),
+};
+
+uint32_t mmec597_poly_mode_front[] = {
+// r1 = load();
+// mthd(0x0dac,0); // POLYGON_MODE_FRONT
+// send(r1);
+// r2 = read(0x0db0); // POLYGON_MODE_BACK
+// r3 = read(0x20c0); // SP_SELECT[3]
+// r7 = r1 | r2;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0dac/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0db0/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ OR, R7, R1, R2, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r3 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_poly_mode_back[] = {
+// r1 = load();
+// mthd(0x0db0,0); // POLYGON_MODE_BACK
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x20c0); // SP_SELECT[3]
+// r7 = r1 | r2;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0db0/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ OR, R7, R1, R2, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r3 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_gp_select[] = {
+// r1 = load();
+// mthd(0x2100,0); // SP_SELECT[4]
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x0db0); // POLYGON_MODE_BACK
+// r7 = r2 | r3;
+// r4 = read(0x20c0); // SP_SELECT[3]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x2100/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE,
+ OR, R7, R2, R3, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x20c0/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r1 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_tep_select[] = {
+// r1 = load();
+// mthd(0x20c0,0); // SP_SELECT[3]
+// send(r1);
+// r2 = read(0x0dac); // POLYGON_MODE_FRONT
+// r3 = read(0x0db0); // POLYGON_MODE_BACK
+// r7 = r2 | r3;
+// r4 = read(0x2100); // SP_SELECT[4]
+// r6 = 0x60;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x20c0/4, IMMED0, ALU0,
+ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE),
+ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE,
+ OR, R7, R2, R3, 0, NONE, NONE),
+ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE,
+ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0x200;
+ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r1 | r4;
+// r7 = r7 & 1;
+// if (r7 != 0)
+ MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = 0;
+ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x02ec, 0);
+// send(r6);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_arrays_indirect[] = {
+// r1 = load(); // mode
+// r5 = read(0x1438); // VB_INSTANCE_BASE
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R6, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R5, IMMED, ZERO, 0x1438/4, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r2 = load(); // count
+// r3 = load(); // instance_count
+// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST
+// send(load()); // start
+// r4 = load(); // start_instance
+// if (r3) {
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000c, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(0); // base_vertex
+// send(r4); // start_instance
+// send(r6); // draw id
+// mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r4);
+// r1 = r1 & ~(1<<26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 256 + 160, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT
+// send(r2); // count
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1<<26); // set INSTANCE_NEXT
+// } while(--r3);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R3, R3, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// };
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1438, 0); // restore VB_INSTANCE_BASE
+// send(r5);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_elts_indirect[] = {
+// r1 = load(); // mode
+// r8 = read(0x1434); // VB_ELEMENT_BASE
+// r9 = read(0x1438); // VB_INSTANCE_BASE
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE),
+ MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ ADD, R6, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r3 = load(); // count
+// r2 = load(); // instance_count
+// mthd(0x17dc, 0); // INDEX_BATCH_FIRST
+// send(load()); // start
+// r4 = load(); // index_bias
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(r4); // index_bias
+// r5 = load(); // start_instance
+// if (r2) {
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000d, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0x17dc/4, IMMED0, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, R4, ZERO, 256 + 160, NONE, ALU1),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, R5, LOAD0, ZERO, 0, NONE, NONE),
+// send(r5); // start_instance
+// send(r6); // draw_id
+// mthd(0x1434, 1); // VB_ELEMENT_BASE
+// send(r4); // index_bias
+// send(r5); // start_instance
+// mthd(0x1118, 0); // VERTEX_ID_BASE
+// send(r4); // index_bias
+// r1 &= ~(1 << 26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R5, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1118/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x17e0, 0); // INDEX_BATCH_COUNT
+// send(r3); // count
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1 << 26); // set INSTANCE_NEXT
+// } while (--r2);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R2, R2, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// };
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1434, 1);
+// send(r8); // restore VB_ELEMENT_BASE
+// send(r9); // restore VB_INSTANCE_BASE
+// mthd(0x1118, 0);
+// send(r8); // restore VERTEX_ID_BASE
+ MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R9, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, 0x1118/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_arrays_indirect_count[] = {
+// r1 = load(); // mode
+// r6 = load(); // start_drawid
+// r7 = load(); // numparams
+// r5 = load(); // totaldraws
+// r8 = read(0x1438); // VB_INSTANCE_BASE
+// r5 = r5 - r6; // remaining draws
+// if (r5 > r7)
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R6, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R5, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ SUB, R5, R5, R6, 0, NONE, NONE),
+ MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r5 = r7;
+ MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r5 >= 0) {
+ MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000e, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r5) {
+// r2 = load(); // count
+// r3 = load(); // instance_count
+// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST
+// send(load()); // start
+// r4 = load(); // start_instance
+// if (r3) {
+ MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000c, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE,
+ ADD, R3, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x238c, 1); // CB_POS
+// send(256 + 160);
+// send(0); // base_vertex
+// send(r4); // start_instance
+// send(r6); // draw_id
+// mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r4);
+// r1 &= ~(1 << 26); // clear INSTANCE_NEXT
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 256+160, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0); // VERTEX_BEGIN_GL
+// send(r1); // mode
+// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT
+// send(r2);
+// mthd(0x1614, 0); // VERTEX_END_GL
+// send(0);
+// r1 |= (1 << 26); // set INSTANCE_NEXT
+// } while (--r3);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R3, R3, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1; // draw_id++
+// }
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r7 - r5; // unneeded params
+// }
+ MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// load();
+// load();
+// load();
+// load();
+// }
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0003, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+// exit mthd(0x1438, 0); // VB_INSTANCE_BASE
+// send(r8);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_draw_elts_indirect_count[] = {
+// r8 = read(0x1434);
+// r1 = load();
+// r9 = read(0x1438);
+// r6 = load();
+// r7 = load();
+// r5 = load();
+// r5 = r5 - r6;
+// if (r5 > r7)
+ MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE,
+ ADD, R1, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE,
+ ADD, R6, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R5, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, SUB, R5, R5, R6, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r5 = r7;
+ MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r5 >= 0) {
+ MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000f, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r5) {
+// r3 = load();
+// r2 = load();
+// mthd(0x17dc, 0);
+// send(load());
+// r4 = load();
+// mthd(0x238c, 1);
+// send(256 + 160);
+// send(r4);
+// r10 = load();
+// if (r2) {
+ MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000d, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, (0<<12)|0x17dc/4, IMMED0, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, (1<<12)|0x238c/4, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 256 + 160, IMMED0, IMMED1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0,
+ ADD, R10, LOAD0, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// send(r10);
+// send(r6);
+// mthd(0x1434, 1);
+// send(r4);
+// send(r10);
+// mthd(0x1118, 0);
+// send(r4);
+// r1 &= ~(1 << 26);
+ MME_INSN(0, ADD, ZERO, R10, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R10, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R4, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0,
+ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE),
+// do {
+// mthd(0x1618, 0);
+// send(r1);
+// mthd(0x17e0, 0);
+// send(r3);
+// mthd(0x1614, 0);
+// send(0);
+// r1 |= (1 << 26);
+// } while (--r2);
+// }
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0,
+ ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0,
+ ADD, R4, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE,
+ SUB, R2, R2, IMMED, 1, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r6 = r6 + 1;
+// }
+ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// r7 = r7 - r5; // unneeded params
+// }
+ MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// while (HW_LOOP_COUNT < r7) {
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// r2 = load();
+// }
+ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1434, 1);
+// send(r8);
+// send(r9);
+// exit mthd(0x1118, 0);
+// send(r8);
+ MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0,
+ ADD, ZERO, R9, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R8, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_query_buffer_write[] = {
+// r1 = load(); // clamp value
+// r2 = load(); // end value (lo)
+// r3 = load(); // end value (hi)
+// r4 = load(); // start value (lo)
+// r5 = load(); // start value (hi)
+// r8 = load(); // desired sequence
+// r9 = load(); // actual sequence
+// r7 = load(); // query address (hi)
+// r6 = load(); // query address (lo)
+// if (r9 >= r8) {
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R2, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R4, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R8, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R9, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R7, LOAD1, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R6, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLT, ZERO, R9, R8, (2<<14)|0x000e, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// [r3,r2] = [r3,r2] - [r5,r4];
+// if (r1) {
+ MME_INSN(0, SUB, R2, R2, R4, 0, NONE, NONE,
+ SUBB, R3, R3, R5, 0, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R1, ZERO, (2<<14)|0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// if (r3 != 0 || r1 < r2)
+// r2 = r1;
+// }
+ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, BLTU, ZERO, R1, R2, (1<<14)|0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R2, R1, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1b00, 1);
+// send(r7);
+// send(r6);
+// send(r2)
+// send(0x10000000);
+// if (!r1) {
+ MME_INSN(0, ADD, ZERO, R7, ZERO, (1<<12)|0x1b00/4, IMMED0, ALU0,
+ ADD, ZERO, R6, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, BEQ, ZERO, R1, ZERO, (1<<14)|0x0004, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// [r7,r6] = [r7,r6] + 4;
+// mthd(0x1b00, 1);
+// send(r7);
+// send(r6);
+// send(r3);
+// send(0x10000000);
+// }
+ MME_INSN(0, ADD, ZERO, R6, IMMED, 4, IMMED1, ALU1,
+ ADDC, ZERO, R7, ZERO, (1<<12)|0x1b00/4, NONE, ALU0),
+ MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+// mthd(0x0110, 0);
+// send(0);
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (0<<12)|0x0110/4, IMMED0, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// }
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_conservative_raster_state[] = {
+// r1 = load();
+// mthd(0x3400, 1);
+// send(0);
+// send(((r1 >> 8) & 7) << 23);
+// send(0x03800000);
+// mthd(0x2310, 1);
+// send(0x00418800);
+// r2 = r1 & 0xf;
+// r3 = 16;
+// r2 = r2 | (((r1 >> 4) & 0xf) << 8);
+// mthd(0x0a1c, 8);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x3400/4, IMMED0, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, MERGE, ZERO, ZERO, R1, (23<<10)|(3<<5)|8, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0380, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x2310/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0041, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x8800, NONE, NONE),
+ MME_INSN(0, AND, R2, R1, IMMED, 0xf, NONE, NONE,
+ ADD, R3, ZERO, IMMED, 16, NONE, NONE),
+ MME_INSN(0, MERGE, R2, R2, R1, (8<<10)|(4<<5)|4, IMMED1, NONE,
+ ADD, ZERO, ZERO, ZERO, (8<<12)|0x0a1c/4, NONE, NONE),
+// while (HW_LOOP_COUNT < r3)
+// send(r2);
+ MME_INSN(0, LOOP, ZERO, R3, ZERO, 0x0002, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+// mthd(0x1148, 0);
+// send(1);
+ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x1148/4, IMMED0, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 1, NONE, IMMED1,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+};
+
+uint32_t mmec597_compute_counter[] = {
+// r0 = load();
+// r1 = 1;
+// r2 = 0;
+// while (HW_LOOP_COUNT < r2) {
+ MME_INSN(0, ADD, R0, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, R1, IMMED, ZERO, 1, NONE, NONE),
+ MME_INSN(0, LOOP, ZERO, R0, ZERO, 0x0003, NONE, NONE,
+ ADD, R2, ZERO, ZERO, 0, NONE, NONE),
+// r3 = load();
+// [r1,r0] *= r3;
+// }
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, MULU, R1, R1, R3, 0, NONE, NONE,
+ MULH, R2, ZERO, ZERO, 0, NONE, NONE),
+// r3 = read(0x3410);
+// r4 = read(0x3414);
+// [r4,r3] += [r2,r1];
+// mthd(0x3410, 1);
+// send(r3);
+// send(r4);
+ MME_INSN(0, STATE, ZERO, ZERO, ZERO, 0x3410/4, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(1, STATE, ZERO, ZERO, ZERO, 0x3414/4, NONE, NONE,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, R3, R1, (1<<12)|0x3410/4, IMMED0, ALU0,
+ ADDC, R4, R4, R2, 0, NONE, ALU1),
+};
+
+uint32_t mmec597_compute_counter_to_query[] = {
+// r1 = load();
+// r3 = read(0x3410);
+// r2 = load();
+// r4 = read(0x3414);
+// [r2,r1] = [r2,r1] + [r4,r3];
+// mthd(0x1b00, 1);
+// r3 = load();
+// send(r3);
+// r4 = load();
+// send(r4);
+// send(r1);
+// send(0x10000000);
+ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R3, IMMED, ZERO, 0x3410/4, NONE, NONE),
+ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0, NONE, NONE,
+ STATE, R4, IMMED, ZERO, 0x3414/4, NONE, NONE),
+ MME_INSN(0, ADD, R1, R1, R3, (1<<12)|0x1b00/4, IMMED0, NONE,
+ ADDC, R2, R2, R4, 0, NONE, NONE),
+ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, ALU0,
+ ADD, R4, LOAD1, ZERO, 0, NONE, ALU1),
+ MME_INSN(0, ADD, ZERO, R1, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+// [r3,r4] = [r3,r4] + 4;
+// mthd(0x1b00, 1);
+// send(r3);
+// send(r4);
+// send(r2);
+// send(0x10000000);
+ MME_INSN(0, ADD, ZERO, R4, IMMED, 4, IMMED1, ALU1,
+ ADDC, ZERO, R3, ZERO, (1<<12)|0x1b00/4, NONE, ALU0),
+ MME_INSN(1, ADD, ZERO, R2, ZERO, 0, NONE, ALU0,
+ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE),
+ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0,
+ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE),
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
index 221bab3105b..539bdc75022 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
@@ -157,6 +157,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_UNK0220__ESIZE 0x00000004
#define NVC0_3D_UNK0220__LEN 0x00000028
+#define TU102_3D_INDEX_ARRAY_LIMIT_HIGH 0x00000238
+
+#define TU102_3D_INDEX_ARRAY_LIMIT_LOW 0x0000023c
+
+#define TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8
+
#define NVC0_3D_UNK02C0 0x000002c0
#define NVC0_3D_UNK02C4 0x000002c4
@@ -278,6 +284,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_UNK0400__ESIZE 0x00000004
#define NVC0_3D_UNK0400__LEN 0x000000c0
+#define TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00000600 + 0x8*(i0))
+#define TU102_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00000604 + 0x8*(i0))
+
#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0))
#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010
#define NVC0_3D_TFB_STREAM__LEN 0x00000004
@@ -1787,6 +1796,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define NVC0_3D_SP_UNK14__ESIZE 0x00000004
#define NVC0_3D_SP_UNK14__LEN 0x00000004
+#define GV100_3D_SP_ADDRESS_HIGH(i0) (0x00002014 + 0x40*(i0))
+#define GV100_3D_SP_ADDRESS_LOW(i0) (0x00002018 + 0x40*(i0))
+
#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0))
#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index c897e4e8b97..69131fa22d3 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -37,6 +37,55 @@ nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);
}
+static uint32_t
+tu102_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
+{
+ uint32_t kind;
+
+ if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
+ return 0;
+ if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
+ return 0;
+
+ switch (mt->base.base.format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ if (compressed)
+ kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC
+ else
+ kind = 0x01; // NV_MMU_PTE_KIND_Z16
+ break;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8X24_UINT:
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ if (compressed)
+ kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC
+ else
+ kind = 0x05; // NV_MMU_PTE_KIND_Z24S8
+ break;
+ case PIPE_FORMAT_X24S8_UINT:
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (compressed)
+ kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC
+ else
+ kind = 0x03; // NV_MMU_PTE_KIND_S8Z24
+ break;
+ case PIPE_FORMAT_X32_S8X24_UINT:
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ if (compressed)
+ kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC
+ else
+ kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S8
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ default:
+ kind = 0x06;
+ break;
+ }
+
+ return kind;
+}
+
static uint32_t
nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
{
@@ -357,7 +406,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
if (pt->bind & PIPE_BIND_LINEAR)
pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
- bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
+ if (dev->chipset < 0x160)
+ bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
+ else
+ bo_config.nvc0.memtype = tu102_mt_choose_storage_type(mt, compressed);
if (!nvc0_miptree_init_ms_mode(mt)) {
FREE(mt);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 32aa82d168c..d2b2de47c8d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -645,7 +645,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
prog->code_size = info->bin.codeSize;
prog->relocs = info->bin.relocData;
prog->fixups = info->bin.fixupData;
- prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+ if (info->target >= NVISA_GV100_CHIPSET)
+ prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why?
+ else
+ prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
prog->cp.smem_size = info->bin.smemSize;
prog->num_barriers = info->numBarriers;
@@ -734,7 +737,14 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
struct nvc0_screen *screen = nvc0->screen;
const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
- uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+ uint32_t size = prog->code_size;
+
+ if (!is_cp) {
+ if (screen->eng3d->oclass < TU102_3D_CLASS)
+ size += GF100_SHADER_HEADER_SIZE;
+ else
+ size += TU102_SHADER_HEADER_SIZE;
+ }
/* On Fermi, SP_START_ID must be aligned to 0x40.
* On Kepler, the first instruction must be aligned to 0x80 because
@@ -750,7 +760,8 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
prog->code_base = prog->mem->start;
if (!is_cp) {
- if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS &&
+ screen->base.class_3d < TU102_3D_CLASS) {
switch (prog->mem->start & 0xff) {
case 0x40: prog->code_base += 0x70; break;
case 0x80: prog->code_base += 0x30; break;
@@ -777,7 +788,16 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
{
struct nvc0_screen *screen = nvc0->screen;
const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
- uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+ uint32_t code_pos = prog->code_base;
+ uint32_t size_sph = 0;
+
+ if (!is_cp) {
+ if (screen->eng3d->oclass < TU102_3D_CLASS)
+ size_sph = GF100_SHADER_HEADER_SIZE;
+ else
+ size_sph = TU102_SHADER_HEADER_SIZE;
+ }
+ code_pos += size_sph;
if (prog->relocs)
nv50_ir_relocate_code(prog->relocs, prog->code, code_pos,
@@ -803,8 +823,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
if (!is_cp)
nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
- NV_VRAM_DOMAIN(&screen->base),
- NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr);
nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
NV_VRAM_DOMAIN(&screen->base), prog->code_size,
@@ -817,7 +836,14 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
struct nvc0_screen *screen = nvc0->screen;
const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
int ret;
- uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+ uint32_t size = prog->code_size;
+
+ if (!is_cp) {
+ if (screen->eng3d->oclass < TU102_3D_CLASS)
+ size += GF100_SHADER_HEADER_SIZE;
+ else
+ size += TU102_SHADER_HEADER_SIZE;
+ }
ret = nvc0_program_alloc_code(nvc0, prog);
if (ret) {
@@ -874,8 +900,7 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(FLUSH), 1);
PUSH_DATA (nvc0->base.pushbuf, NVC0_COMPUTE_FLUSH_CODE);
} else {
- BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(SP_START_ID(i)), 1);
- PUSH_DATA (nvc0->base.pushbuf, progs[i]->code_base);
+ nvc0_program_sp_start_id(nvc0, i, progs[i]);
}
}
}
@@ -953,7 +978,7 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
unsigned base = 0;
unsigned i;
if (prog->type != PIPE_SHADER_COMPUTE)
- base = NVC0_SHADER_HEADER_SIZE;
+ base = GF100_SHADER_HEADER_SIZE;
for (i = 0; i < prog->cp.num_syms; ++i)
if (syms[i].label == label)
return prog->code_base + base + syms[i].offset;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
index 5684207aa54..2c465b342e9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -15,7 +15,9 @@ struct nvc0_transform_feedback_state {
};
-#define NVC0_SHADER_HEADER_SIZE (20 * 4)
+#define GF100_SHADER_HEADER_SIZE (20 * 4)
+#define TU102_SHADER_HEADER_SIZE (32 * 4)
+#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE
struct nvc0_program {
struct pipe_shader_state pipe;
@@ -30,7 +32,7 @@ struct nvc0_program {
unsigned code_size;
unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
- uint32_t hdr[20];
+ uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4];
uint32_t flags[2];
struct {
@@ -72,4 +74,6 @@ struct nvc0_program {
struct nouveau_heap *mem;
};
+void
+nvc0_program_sp_start_id(struct nvc0_context *, int, struct nvc0_program *);
#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 7abbf762af2..07d74ddd50c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -27,15 +27,17 @@
#include "util/format/u_format_s3tc.h"
#include "util/u_screen.h"
#include "pipe/p_screen.h"
-#include "compiler/nir/nir.h"
#include "nouveau_vp3_video.h"
+#include "codegen/nv50_ir_driver.h"
+
#include "nvc0/nvc0_context.h"
#include "nvc0/nvc0_screen.h"
#include "nvc0/mme/com9097.mme.h"
#include "nvc0/mme/com90c0.mme.h"
+#include "nvc0/mme/comc597.mme.h"
#include "nv50/g80_texture.xml.h"
@@ -443,8 +445,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_PREFERRED_IR:
return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
case PIPE_SHADER_CAP_SUPPORTED_IRS: {
- uint32_t irs = 1 << PIPE_SHADER_IR_TGSI |
- 1 << PIPE_SHADER_IR_NIR;
+ uint32_t irs = 1 << PIPE_SHADER_IR_NIR |
+ ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI);
if (screen->force_enable_cl)
irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
return irs;
@@ -467,6 +469,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
return shader != PIPE_SHADER_FRAGMENT;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ /* HW doesn't support indirect addressing of fragment program inputs
+ * on Volta. The binary driver generates a function to handle every
+ * possible indirection, and indirectly calls the function to handle
+ * this instead.
+ */
+ if (class_3d >= GV100_3D_CLASS)
+ return shader != PIPE_SHADER_FRAGMENT;
+ return 1;
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
return 1;
@@ -717,6 +727,26 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
return pos + size;
}
+static int
+tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+ unsigned size, const uint32_t *data)
+{
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+ size /= 4;
+
+ assert((pos + size) <= 0x800);
+
+ BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
+ PUSH_DATA (push, (m - 0x3800) / 8);
+ PUSH_DATA (push, pos);
+ BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
+ PUSH_DATA (push, pos);
+ PUSH_DATAp(push, data, size);
+
+ return pos + (size / 3);
+}
+
static void
nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
{
@@ -728,8 +758,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
PUSH_DATA (push, 0xff);
PUSH_DATA (push, 0xff);
- BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
- PUSH_DATA (push, 0x3f);
+ if (obj_class < GV100_3D_CLASS) {
+ BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
+ PUSH_DATA (push, 0x3f);
+ }
BEGIN_NVC0(push, SUBC_3D(0x16a8), 1);
PUSH_DATA (push, (3 << 16) | 3);
@@ -761,8 +793,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
PUSH_DATA (push, 3);
- BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
- PUSH_DATA (push, 0x3fffff);
+ if (obj_class < GV100_3D_CLASS) {
+ BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
+ PUSH_DATA (push, 0x3fffff);
+ }
BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
PUSH_DATA (push, 1);
BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
@@ -822,6 +856,8 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
case 0x110:
case 0x120:
case 0x130:
+ case 0x140:
+ case 0x160:
return nve4_screen_compute_setup(screen, screen->base.pushbuf);
default:
return -1;
@@ -893,13 +929,15 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
nouveau_heap_init(&screen->text_heap, 0, size - 0x100);
/* update the code segment setup */
- BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->text->offset);
- PUSH_DATA (push, screen->text->offset);
- if (screen->compute) {
- BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
+ if (screen->eng3d->oclass < GV100_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
PUSH_DATAh(push, screen->text->offset);
PUSH_DATA (push, screen->text->offset);
+ if (screen->compute) {
+ BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+ }
}
return 0;
@@ -939,74 +977,14 @@ nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize,
IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0));
}
-static const nir_shader_compiler_options nir_options = {
- .lower_fdiv = false,
- .lower_ffma = false,
- .fuse_ffma = false, /* nir doesn't track mad vs fma */
- .lower_flrp32 = true,
- .lower_flrp64 = true,
- .lower_fpow = false,
- .lower_fsat = false,
- .lower_fsqrt = false, // TODO: only before gm200
- .lower_fmod = true,
- .lower_bitfield_extract = false,
- .lower_bitfield_extract_to_shifts = false,
- .lower_bitfield_insert = false,
- .lower_bitfield_insert_to_shifts = false,
- .lower_bitfield_reverse = false,
- .lower_bit_count = false,
- .lower_ifind_msb = false,
- .lower_find_lsb = false,
- .lower_uadd_carry = true, // TODO
- .lower_usub_borrow = true, // TODO
- .lower_mul_high = false,
- .lower_negate = false,
- .lower_sub = true,
- .lower_scmp = true, // TODO: not implemented yet
- .lower_idiv = true,
- .lower_isign = false, // TODO
- .fdot_replicates = false, // TODO
- .lower_ffloor = false, // TODO
- .lower_ffract = true,
- .lower_fceil = false, // TODO
- .lower_ldexp = true,
- .lower_pack_half_2x16 = true,
- .lower_pack_unorm_2x16 = true,
- .lower_pack_snorm_2x16 = true,
- .lower_pack_unorm_4x8 = true,
- .lower_pack_snorm_4x8 = true,
- .lower_unpack_half_2x16 = true,
- .lower_unpack_unorm_2x16 = true,
- .lower_unpack_snorm_2x16 = true,
- .lower_unpack_unorm_4x8 = true,
- .lower_unpack_snorm_4x8 = true,
- .lower_extract_byte = true,
- .lower_extract_word = true,
- .lower_all_io_to_temps = false,
- .vertex_id_zero_based = false,
- .lower_base_vertex = false,
- .lower_helper_invocation = false,
- .lower_cs_local_index_from_id = true,
- .lower_cs_local_id_from_index = false,
- .lower_device_index_to_zero = false, // TODO
- .lower_wpos_pntc = false, // TODO
- .lower_hadd = true, // TODO
- .lower_add_sat = true, // TODO
- .use_interpolated_input_intrinsics = true,
- .lower_mul_2x32_64 = true, // TODO
- .max_unroll_iterations = 32,
- .lower_int64_options = nir_lower_ufind_msb64|nir_lower_divmod64, // TODO
- .lower_doubles_options = nir_lower_dmod, // TODO
- .lower_to_scalar = true,
-};
-
static const void *
nvc0_screen_get_compiler_options(struct pipe_screen *pscreen,
enum pipe_shader_ir ir,
enum pipe_shader_type shader)
{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
if (ir == PIPE_SHADER_IR_NIR)
- return &nir_options;
+ return nv50_ir_nir_shader_compiler_options(screen->base.device->chipset);
return NULL;
}
@@ -1038,6 +1016,8 @@ nvc0_screen_create(struct nouveau_device *dev)
case 0x110:
case 0x120:
case 0x130:
+ case 0x140:
+ case 0x160:
break;
default:
return NULL;
@@ -1104,16 +1084,19 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.fence.emit = nvc0_screen_fence_emit;
screen->base.fence.update = nvc0_screen_fence_update;
+ if (dev->chipset < 0x140) {
+ ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
+ NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
+ if (ret)
+ FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
- ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
- NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
- if (ret)
- FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
-
- BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
- PUSH_DATA (push, screen->nvsw->handle);
+ BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->nvsw->handle);
+ }
switch (dev->chipset & ~0xf) {
+ case 0x160:
+ case 0x140:
case 0x130:
case 0x120:
case 0x110:
@@ -1167,6 +1150,12 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, screen->fence.bo->offset + 16);
switch (dev->chipset & ~0xf) {
+ case 0x160:
+ obj_class = TU102_3D_CLASS;
+ break;
+ case 0x140:
+ obj_class = GV100_3D_CLASS;
+ break;
case 0x130:
switch (dev->chipset) {
case 0x130:
@@ -1414,25 +1403,47 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 16384 << 16);
}
+ if (screen->eng3d->oclass < TU102_3D_CLASS) {
#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
- i = 0;
- MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
- MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
- MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
- MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
- MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
- MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
- MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
- MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
- MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
- MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
- MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
- MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
- MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
- MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
- MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
- MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
+ i = 0;
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
+ MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
+ MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
+ MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
+ MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
+ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
+ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
+ MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
+ } else {
+#undef MK_MACRO
+#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n);
+
+ i = 0;
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf);
+ MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables);
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select);
+ MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select);
+ MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count);
+ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write);
+ MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state);
+ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter);
+ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query);
+ }
BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
PUSH_DATA (push, 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index b7e0c8a930f..490026b2c00 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -64,6 +64,22 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
return true; /* stream output info only */
}
+void
+nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,
+ struct nvc0_program *prog)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
+ PUSH_DATA (push, prog->code_base);
+ } else {
+ BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);
+ PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);
+ PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);
+ }
+}
+
void
nvc0_vertprog_validate(struct nvc0_context *nvc0)
{
@@ -74,9 +90,9 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)
return;
nvc0_program_update_context_state(nvc0, vp, 0);
- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1);
PUSH_DATA (push, 0x11);
- PUSH_DATA (push, vp->code_base);
+ nvc0_program_sp_start_id(nvc0, 1, vp);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
PUSH_DATA (push, vp->num_gprs);
@@ -152,9 +168,9 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
fp->fp.post_depth_coverage);
}
- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1);
PUSH_DATA (push, 0x51);
- PUSH_DATA (push, fp->code_base);
+ nvc0_program_sp_start_id(nvc0, 5, fp);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
PUSH_DATA (push, fp->num_gprs);
@@ -176,9 +192,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
PUSH_DATA (push, tp->tp.tess_mode);
}
- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
PUSH_DATA (push, 0x21);
- PUSH_DATA (push, tp->code_base);
+ nvc0_program_sp_start_id(nvc0, 2, tp);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
PUSH_DATA (push, tp->num_gprs);
} else {
@@ -186,9 +202,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
/* not a whole lot we can do to handle this failure */
if (!nvc0_program_validate(nvc0, tp))
assert(!"unable to validate empty tcp");
- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
PUSH_DATA (push, 0x20);
- PUSH_DATA (push, tp->code_base);
+ nvc0_program_sp_start_id(nvc0, 2, tp);
}
nvc0_program_update_context_state(nvc0, tp, 1);
}
@@ -206,8 +222,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0)
}
BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
PUSH_DATA (push, 0x31);
- BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
- PUSH_DATA (push, tp->code_base);
+ nvc0_program_sp_start_id(nvc0, 3, tp);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
PUSH_DATA (push, tp->num_gprs);
} else {
@@ -227,8 +242,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
PUSH_DATA (push, 0x41);
- BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
- PUSH_DATA (push, gp->code_base);
+ nvc0_program_sp_start_id(nvc0, 4, gp);
BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
PUSH_DATA (push, gp->num_gprs);
} else {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 538effdb531..731b0b5dbf8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -29,6 +29,8 @@
#include "util/format/u_format.h"
#include "util/u_surface.h"
+#include "tgsi/tgsi_ureg.h"
+
#include "os/os_thread.h"
#include "nvc0/nvc0_context.h"
@@ -138,6 +140,11 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
PUSH_DATA (push, bo->offset + offset);
}
+ if (dst) {
+ IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE),
+ util_format_is_depth_or_stencil(pformat));
+ }
+
#if 0
if (dst) {
BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4);
@@ -772,7 +779,7 @@ gm200_evaluate_depth_buffer(struct pipe_context *pipe)
struct nvc0_blitter
{
struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES];
- struct nvc0_program vp;
+ struct nvc0_program *vp;
struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
@@ -785,6 +792,7 @@ struct nvc0_blitctx
{
struct nvc0_context *nvc0;
struct nvc0_program *fp;
+ struct nvc0_program *vp;
uint8_t mode;
uint16_t color_mask;
uint8_t filter;
@@ -809,78 +817,27 @@ struct nvc0_blitctx
struct nvc0_rasterizer_stateobj rast;
};
-static void
-nvc0_blitter_make_vp(struct nvc0_blitter *blit)
+static void *
+nvc0_blitter_make_vp(struct pipe_context *pipe)
{
- static const uint32_t code_nvc0[] =
- {
- 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
- 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
- 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
- 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
- 0x00001de7, 0x80000000, /* exit */
- };
- static const uint32_t code_nve4[] =
- {
- 0x00000007, 0x20000000, /* sched */
- 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
- 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
- 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
- 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
- 0x00001de7, 0x80000000, /* exit */
- };
- static const uint32_t code_gk110[] =
- {
- 0x00000000, 0x08000000, /* sched */
- 0x401ffc12, 0x7ec7fc00, /* ld b64 $r4d a[0x80] 0x0 0x0 */
- 0x481ffc02, 0x7ecbfc00, /* ld b96 $r0t a[0x90] 0x0 0x0 */
- 0x381ffc12, 0x7f07fc00, /* st b64 a[0x70] $r4d 0x0 0x0 */
- 0x401ffc02, 0x7f0bfc00, /* st b96 a[0x80] $r0t 0x0 0x0 */
- 0x001c003c, 0x18000000, /* exit */
- };
- static const uint32_t code_gm107[] =
- {
- 0xe4200701, 0x001d0400, /* sched (st 0x1 wr 0x0) (st 0x1 wr 0x1) (st 0x1 wr 0x2) */
- 0x0807ff00, 0xefd87f80, /* ld b32 $r0 a[0x80] 0x0 */
- 0x0847ff01, 0xefd87f80, /* ld b32 $r1 a[0x84] 0x0 */
- 0x0907ff02, 0xefd87f80, /* ld b32 $r2 a[0x90] 0x0 */
- 0xf0200761, 0x003f8400, /* sched (st 0x1 wr 0x3) (st 0x1 wr 0x4) (st 0x1 wt 0x1) */
- 0x0947ff03, 0xefd87f80, /* ld b32 $r3 a[0x94] 0x0 */
- 0x0987ff04, 0xefd87f80, /* ld b32 $r4 a[0x98] 0x0 */
- 0x0707ff00, 0xeff07f80, /* st b32 a[0x70] $r0 0x0 */
- 0xfc2017e1, 0x011f8404, /* sched (st 0x1 wt 0x2) (st 0x1 wt 0x4) (st 0x1 wt 0x8) */
- 0x0747ff01, 0xeff07f80, /* st b32 a[0x74] $r1 0x0 */
- 0x0807ff02, 0xeff07f80, /* st b32 a[0x80] $r2 0x0 */
- 0x0847ff03, 0xeff07f80, /* st b32 a[0x84] $r3 0x0 */
- 0xfde087e1, 0x001f8000, /* sched (st 0x1 wt 0x10) (st 0xf) (st 0x0) */
- 0x0887ff04, 0xeff07f80, /* st b32 a[0x88] $r4 0x0 */
- 0x0007000f, 0xe3000000, /* exit */
- };
-
- blit->vp.type = PIPE_SHADER_VERTEX;
- blit->vp.translated = true;
- if (blit->screen->base.class_3d >= GM107_3D_CLASS) {
- blit->vp.code = (uint32_t *)code_gm107; /* const_cast */
- blit->vp.code_size = sizeof(code_gm107);
- } else
- if (blit->screen->base.class_3d >= NVF0_3D_CLASS) {
- blit->vp.code = (uint32_t *)code_gk110; /* const_cast */
- blit->vp.code_size = sizeof(code_gk110);
- } else
- if (blit->screen->base.class_3d >= NVE4_3D_CLASS) {
- blit->vp.code = (uint32_t *)code_nve4; /* const_cast */
- blit->vp.code_size = sizeof(code_nve4);
- } else {
- blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
- blit->vp.code_size = sizeof(code_nvc0);
- }
- blit->vp.num_gprs = 6;
- blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
+ struct ureg_program *ureg;
+ struct ureg_src ipos, itex;
+ struct ureg_dst opos, otex;
+
+ ureg = ureg_create(PIPE_SHADER_VERTEX);
+ if (!ureg)
+ return NULL;
+
+ opos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+ ipos = ureg_DECL_vs_input(ureg, 0);
+ otex = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
+ itex = ureg_DECL_vs_input(ureg, 1);
+
+ ureg_MOV(ureg, ureg_writemask(opos, TGSI_WRITEMASK_XY ), ipos);
+ ureg_MOV(ureg, ureg_writemask(otex, TGSI_WRITEMASK_XYZ), itex);
+ ureg_END(ureg);
- blit->vp.hdr[0] = 0x00020461; /* vertprog magic */
- blit->vp.hdr[4] = 0x000ff000; /* no outputs read */
- blit->vp.hdr[6] = 0x00000073; /* a[0x80].xy, a[0x90].xyz */
- blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */
+ return ureg_create_shader_and_destroy(ureg, pipe);
}
static void
@@ -910,6 +867,20 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
G80_TSC_1_MIP_FILTER_NONE;
}
+static void
+nvc0_blit_select_vp(struct nvc0_blitctx *ctx)
+{
+ struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter;
+
+ if (!blitter->vp) {
+ mtx_lock(&blitter->mutex);
+ if (!blitter->vp)
+ blitter->vp = nvc0_blitter_make_vp(&ctx->nvc0->base.pipe);
+ mtx_unlock(&blitter->mutex);
+ }
+ ctx->vp = blitter->vp;
+}
+
static void
nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info)
{
@@ -1082,7 +1053,7 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx,
nvc0->rast = &ctx->rast;
- nvc0->vertprog = &blitter->vp;
+ nvc0->vertprog = ctx->vp;
nvc0->tctlprog = NULL;
nvc0->tevlprog = NULL;
nvc0->gmtyprog = NULL;
@@ -1221,6 +1192,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
blit->filter = nv50_blit_get_filter(info);
blit->render_condition_enable = info->render_condition_enable;
+ nvc0_blit_select_vp(blit);
nvc0_blit_select_fp(blit, info);
nvc0_blitctx_pre_blit(blit, info);
@@ -1266,6 +1238,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
}
}
+ if (screen->eng3d->oclass >= TU102_3D_CLASS) {
+ IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE),
+ util_format_is_depth_or_stencil(info->dst.format));
+ }
+
IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
@@ -1326,7 +1303,10 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
PUSH_DATAh(push, vtxbuf);
PUSH_DATA (push, vtxbuf);
PUSH_DATA (push, 0);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ if (screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
PUSH_DATAh(push, vtxbuf + length - 1);
PUSH_DATA (push, vtxbuf + length - 1);
@@ -1403,6 +1383,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
/* restore viewport transform */
IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+ if (screen->eng3d->oclass >= TU102_3D_CLASS)
+ IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0);
}
static void
@@ -1697,7 +1679,6 @@ nvc0_blitter_create(struct nvc0_screen *screen)
(void) mtx_init(&screen->blitter->mutex, mtx_plain);
- nvc0_blitter_make_vp(screen->blitter);
nvc0_blitter_make_sampler(screen->blitter);
return true;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 92bd7eb5b8e..8287d8431b1 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -360,7 +360,11 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
PUSH_DATAh(push, res->address + offset);
PUSH_DATA (push, res->address + offset);
}
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
PUSH_DATAh(push, res->address + limit);
PUSH_DATA (push, res->address + limit);
@@ -406,7 +410,11 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
PUSH_DATAh(push, buf->address + offset);
PUSH_DATA (push, buf->address + offset);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
PUSH_DATAh(push, buf->address + limit);
PUSH_DATA (push, buf->address + limit);
@@ -961,12 +969,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
assert(nouveau_resource_mapped_by_gpu(&buf->base));
PUSH_SPACE(push, 6);
- BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
- PUSH_DATAh(push, buf->address);
- PUSH_DATA (push, buf->address);
- PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
- PUSH_DATA (push, buf->address + buf->base.width0 - 1);
- PUSH_DATA (push, info->index_size >> 1);
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
+ PUSH_DATAh(push, buf->address);
+ PUSH_DATA (push, buf->address);
+ PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, info->index_size >> 1);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 2);
+ PUSH_DATAh(push, buf->address);
+ PUSH_DATA (push, buf->address);
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_INDEX_ARRAY_LIMIT_HIGH), 2);
+ PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+ BEGIN_NVC0(push, NVC0_3D(INDEX_FORMAT), 1);
+ PUSH_DATA (push, info->index_size >> 1);
+ }
BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
index 8aa7088dfec..d49a5dfd2cf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -228,7 +228,11 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
PUSH_DATAh(push, va);
PUSH_DATA (push, va);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
PUSH_DATAh(push, va + size - 1);
PUSH_DATA (push, va + size - 1);
@@ -771,7 +775,11 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx,
PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
PUSH_DATAh(push, va);
PUSH_DATA (push, va);
- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+
+ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+ else
+ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
PUSH_DATAh(push, va + info->count * index_size - 1);
PUSH_DATA (push, va + info->count * index_size - 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index 146eeb35f85..ebbc410184b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -27,11 +27,18 @@
#include "codegen/nv50_ir_driver.h"
-#ifndef NDEBUG
-static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
-static void gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *);
-#endif
-
+#include "drf.h"
+#include "qmd.h"
+#include "cla0c0qmd.h"
+#include "clc0c0qmd.h"
+#include "clc3c0qmd.h"
+
+#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
+#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
+#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
+#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
+#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
int
nve4_screen_compute_setup(struct nvc0_screen *screen,
@@ -45,6 +52,12 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
uint64_t address;
switch (dev->chipset & ~0xf) {
+ case 0x160:
+ obj_class = TU102_COMPUTE_CLASS;
+ break;
+ case 0x140:
+ obj_class = GV100_COMPUTE_CLASS;
+ break;
case 0x100:
case 0xf0:
obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
@@ -88,24 +101,35 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
PUSH_DATAh(push, screen->tls->size / screen->mp_count);
PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
PUSH_DATA (push, 0xff);
- BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
- PUSH_DATAh(push, screen->tls->size / screen->mp_count);
- PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
- PUSH_DATA (push, 0xff);
+ if (obj_class < GV100_COMPUTE_CLASS) {
+ BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
+ PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+ PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+ PUSH_DATA (push, 0xff);
+ }
/* Unified address space ? Who needs that ? Certainly not OpenCL.
*
* FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
* accessible. We cannot prevent that at the moment, so expect failure.
*/
- BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
- PUSH_DATA (push, 0xff << 24);
- BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
- PUSH_DATA (push, 0xfe << 24);
-
- BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
- PUSH_DATAh(push, screen->text->offset);
- PUSH_DATA (push, screen->text->offset);
+ if (obj_class < GV100_COMPUTE_CLASS) {
+ BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
+ PUSH_DATA (push, 0xff << 24);
+ BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
+ PUSH_DATA (push, 0xfe << 24);
+
+ BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+ } else {
+ BEGIN_NVC0(push, SUBC_CP(0x2a0), 2);
+ PUSH_DATAh(push, 0xfeULL << 24);
+ PUSH_DATA (push, 0xfeULL << 24);
+ BEGIN_NVC0(push, SUBC_CP(0x7b0), 2);
+ PUSH_DATAh(push, 0xffULL << 24);
+ PUSH_DATA (push, 0xffULL << 24);
+ }
BEGIN_NVC0(push, SUBC_CP(0x0310), 1);
PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
@@ -542,14 +566,35 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
}
-static inline uint8_t
-nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
+static inline void
+gp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
+ struct nouveau_bo *bo, uint32_t base, uint32_t size)
+{
+ uint64_t address = bo->offset + base;
+
+ assert(index < 8);
+ assert(!(base & 0xff));
+
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index,
+ DIV_ROUND_UP(size, 16));
+ NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
+}
+
+static inline void
+nve4_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, struct nouveau_bo *bo,
+ uint32_t base, uint32_t size)
{
- if (shared_size > (32 << 10))
- return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
- if (shared_size > (16 << 10))
- return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
- return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
+ uint64_t address = bo->offset + base;
+
+ assert(index < 8);
+ assert(!(base & 0xff));
+
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
}
static void
@@ -577,92 +622,186 @@ nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc)
}
static void
-nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
- struct nve4_cp_launch_desc *desc,
+nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
const struct pipe_grid_info *info)
{
const struct nvc0_screen *screen = nvc0->screen;
const struct nvc0_program *cp = nvc0->compprog;
- nve4_cp_launch_desc_init_default(desc);
-
- desc->entry = nvc0_program_symbol_offset(cp, info->pc);
-
- desc->griddim_x = info->grid[0];
- desc->griddim_y = info->grid[1];
- desc->griddim_z = info->grid[2];
- desc->blockdim_x = info->block[0];
- desc->blockdim_y = info->block[1];
- desc->blockdim_z = info->block[2];
-
- desc->shared_size = align(cp->cp.smem_size, 0x100);
- desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10);
- desc->local_size_n = 0;
- desc->cstack_size = 0x800;
- desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);
+ NVA0C0_QMDV00_06_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
+
+ NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET,
+ nvc0_program_symbol_offset(cp, info->pc));
+
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
+
+ NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE,
+ align(cp->cp.smem_size, 0x100));
+ NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
+ (cp->hdr[1] & 0xfffff0) +
+ align(cp->cp.lmem_size, 0x10));
+ NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
+
+ if (cp->cp.smem_size > (32 << 10))
+ NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
+ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
+ else
+ if (cp->cp.smem_size > (16 << 10))
+ NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
+ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
+ else
+ NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
+ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
- desc->gpr_alloc = cp->num_gprs;
- desc->bar_alloc = cp->num_barriers;
+ NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);
+ NVA0C0_QMDV00_06_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
// Only bind user uniforms and the driver constant buffer through the
// launch descriptor because UBOs are sticked to the driver cb to avoid the
// limitation of 8 CBs.
if (nvc0->constbuf[5][0].user || cp->parm_size) {
- nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
+ nve4_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
NVC0_CB_USR_INFO(5), 1 << 16);
// Later logic will attempt to bind a real buffer at position 0. That
// should not happen if we've bound a user buffer.
assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
}
- nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
+ nve4_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
NVC0_CB_AUX_INFO(5), 1 << 11);
- nve4_compute_setup_buf_cb(nvc0, false, desc);
+ nve4_compute_setup_buf_cb(nvc0, false, qmd);
}
static void
-gp100_compute_setup_launch_desc(struct nvc0_context *nvc0,
- struct gp100_cp_launch_desc *desc,
+gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
const struct pipe_grid_info *info)
{
const struct nvc0_screen *screen = nvc0->screen;
const struct nvc0_program *cp = nvc0->compprog;
- gp100_cp_launch_desc_init_default(desc);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
+ NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
+ NVC0C0_QMDV02_01_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);
+ NVC0C0_QMDV02_01_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
+
+ NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET,
+ nvc0_program_symbol_offset(cp, info->pc));
+
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
+
+ NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE,
+ align(cp->cp.smem_size, 0x100));
+ NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
+ (cp->hdr[1] & 0xfffff0) +
+ align(cp->cp.lmem_size, 0x10));
+ NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
- desc->entry = nvc0_program_symbol_offset(cp, info->pc);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);
+ NVC0C0_QMDV02_01_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
- desc->griddim_x = info->grid[0];
- desc->griddim_y = info->grid[1];
- desc->griddim_z = info->grid[2];
- desc->blockdim_x = info->block[0];
- desc->blockdim_y = info->block[1];
- desc->blockdim_z = info->block[2];
+ // Only bind user uniforms and the driver constant buffer through the
+ // launch descriptor because UBOs are sticked to the driver cb to avoid the
+ // limitation of 8 CBs.
+ if (nvc0->constbuf[5][0].user || cp->parm_size) {
+ gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
+ NVC0_CB_USR_INFO(5), 1 << 16);
- desc->shared_size = align(cp->cp.smem_size, 0x100);
- desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10);
- desc->local_size_n = 0;
- desc->cstack_size = 0x800;
+ // Later logic will attempt to bind a real buffer at position 0. That
+ // should not happen if we've bound a user buffer.
+ assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
+ }
+ gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
+ NVC0_CB_AUX_INFO(5), 1 << 11);
+
+ nve4_compute_setup_buf_cb(nvc0, true, qmd);
+}
+
+static int
+gv100_sm_config_smem_size(u32 size)
+{
+ if (size > 64 * 1024) size = 96 * 1024;
+ else if (size > 32 * 1024) size = 64 * 1024;
+ else if (size > 16 * 1024) size = 32 * 1024;
+ else if (size > 8 * 1024) size = 16 * 1024;
+ else size = 8 * 1024;
+ return (size / 4096) + 1;
+}
- desc->gpr_alloc = cp->num_gprs;
- desc->bar_alloc = cp->num_barriers;
+static void
+gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,
+ const struct pipe_grid_info *info)
+{
+ struct nvc0_program *cp = nvc0->compprog;
+ struct nvc0_screen *screen = nvc0->screen;
+ uint64_t entry =
+ screen->text->offset + nvc0_program_symbol_offset(cp, info->pc);
+
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
+ NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
+ NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
+ align(cp->cp.smem_size, 0x100));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
+ (cp->hdr[1] & 0xfffff0) +
+ align(cp->cp.lmem_size, 0x10));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(8 * 1024));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(96 * 1024));
+ NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
+ gv100_sm_config_smem_size(cp->cp.smem_size));
+
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
// Only bind user uniforms and the driver constant buffer through the
// launch descriptor because UBOs are sticked to the driver cb to avoid the
// limitation of 8 CBs.
if (nvc0->constbuf[5][0].user || cp->parm_size) {
- gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
+ gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
NVC0_CB_USR_INFO(5), 1 << 16);
// Later logic will attempt to bind a real buffer at position 0. That
// should not happen if we've bound a user buffer.
assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
}
- gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
+ gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
NVC0_CB_AUX_INFO(5), 1 << 11);
- nve4_compute_setup_buf_cb(nvc0, true, desc);
+ nve4_compute_setup_buf_cb(nvc0, true, qmd);
+
+ NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff);
+ NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32);
}
static inline void *
@@ -677,6 +816,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
ptr += adj;
*pgpuaddr += adj;
}
+ memset(ptr, 0x00, 256);
return ptr;
}
@@ -734,6 +874,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
if (ret)
goto out;
+ if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
+ gv100_compute_setup_launch_desc(nvc0, desc, info);
+ else
if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
gp100_compute_setup_launch_desc(nvc0, desc, info);
else
@@ -743,10 +886,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
#ifndef NDEBUG
if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
+ debug_printf("Queue Meta Data:\n");
+ if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
+ NVC3C0QmdDump_V02_02(desc);
+ else
if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
- gp100_compute_dump_launch_desc(desc);
+ NVC0C0QmdDump_V02_01(desc);
else
- nve4_compute_dump_launch_desc(desc);
+ NVA0C0QmdDump_V00_06(desc);
}
#endif
@@ -877,115 +1024,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
}
-
-#ifndef NDEBUG
-static const char *nve4_cache_split_name(unsigned value)
-{
- switch (value) {
- case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
- case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
- case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
- default:
- return "(invalid)";
- }
-}
-
-static void
-nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
-{
- const uint32_t *data = (const uint32_t *)desc;
- unsigned i;
- bool zero = false;
-
- debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
-
- for (i = 0; i < sizeof(*desc); i += 4) {
- if (data[i / 4]) {
- debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
- zero = false;
- } else
- if (!zero) {
- debug_printf("...\n");
- zero = true;
- }
- }
-
- debug_printf("entry = 0x%x\n", desc->entry);
- debug_printf("grid dimensions = %ux%ux%u\n",
- desc->griddim_x, desc->griddim_y, desc->griddim_z);
- debug_printf("block dimensions = %ux%ux%u\n",
- desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
- debug_printf("s[] size: 0x%x\n", desc->shared_size);
- debug_printf("l[] size: -0x%x / +0x%x\n",
- desc->local_size_n, desc->local_size_p);
- debug_printf("stack size: 0x%x\n", desc->cstack_size);
- debug_printf("barrier count: %u\n", desc->bar_alloc);
- debug_printf("$r count: %u\n", desc->gpr_alloc);
- debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
- debug_printf("linked tsc: %d\n", desc->linked_tsc);
-
- for (i = 0; i < 8; ++i) {
- uint64_t address;
- uint32_t size = desc->cb[i].size;
- bool valid = !!(desc->cb_mask & (1 << i));
-
- address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
-
- if (!valid && !address && !size)
- continue;
- debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
- i, address, size, valid ? "" : " (invalid)");
- }
-}
-
-static void
-gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *desc)
-{
- const uint32_t *data = (const uint32_t *)desc;
- unsigned i;
- bool zero = false;
-
- debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
-
- for (i = 0; i < sizeof(*desc); i += 4) {
- if (data[i / 4]) {
- debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
- zero = false;
- } else
- if (!zero) {
- debug_printf("...\n");
- zero = true;
- }
- }
-
- debug_printf("entry = 0x%x\n", desc->entry);
- debug_printf("grid dimensions = %ux%ux%u\n",
- desc->griddim_x, desc->griddim_y, desc->griddim_z);
- debug_printf("block dimensions = %ux%ux%u\n",
- desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
- debug_printf("s[] size: 0x%x\n", desc->shared_size);
- debug_printf("l[] size: -0x%x / +0x%x\n",
- desc->local_size_n, desc->local_size_p);
- debug_printf("stack size: 0x%x\n", desc->cstack_size);
- debug_printf("barrier count: %u\n", desc->bar_alloc);
- debug_printf("$r count: %u\n", desc->gpr_alloc);
- debug_printf("linked tsc: %d\n", desc->linked_tsc);
-
- for (i = 0; i < 8; ++i) {
- uint64_t address;
- uint32_t size = desc->cb[i].size_sh4 << 4;
- bool valid = !!(desc->cb_mask & (1 << i));
-
- address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
-
- if (!valid && !address && !size)
- continue;
- debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
- i, address, size, valid ? "" : " (invalid)");
- }
-}
-#endif
-
#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
static void
nve4_compute_trap_info(struct nvc0_context *nvc0)
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
index 7ff6935cc3d..d2599f7a71d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -4,142 +4,6 @@
#include "nvc0/nve4_compute.xml.h"
-struct nve4_cp_launch_desc
-{
- u32 unk0[8];
- u32 entry;
- u32 unk9[2];
- u32 unk11_0 : 30;
- u32 linked_tsc : 1;
- u32 unk11_31 : 1;
- u32 griddim_x : 31;
- u32 unk12 : 1;
- u16 griddim_y;
- u16 griddim_z;
- u32 unk14[3];
- u16 shared_size; /* must be aligned to 0x100 */
- u16 unk17;
- u16 unk18;
- u16 blockdim_x;
- u16 blockdim_y;
- u16 blockdim_z;
- u32 cb_mask : 8;
- u32 unk20_8 : 21;
- u32 cache_split : 2;
- u32 unk20_31 : 1;
- u32 unk21[8];
- struct {
- u32 address_l;
- u32 address_h : 8;
- u32 reserved : 7;
- u32 size : 17;
- } cb[8];
- u32 local_size_p : 20;
- u32 unk45_20 : 7;
- u32 bar_alloc : 5;
- u32 local_size_n : 20;
- u32 unk46_20 : 4;
- u32 gpr_alloc : 8;
- u32 cstack_size : 20;
- u32 unk47_20 : 12;
- u32 unk48[16];
-};
-
-struct gp100_cp_launch_desc
-{
- u32 unk0[8];
- u32 entry;
- u32 unk9[2];
- u32 unk11_0 : 30;
- u32 linked_tsc : 1;
- u32 unk11_31 : 1;
- u32 griddim_x : 31;
- u32 unk12 : 1;
- u16 griddim_y;
- u16 unk13;
- u16 griddim_z;
- u16 unk14;
- u32 unk15[2];
- u32 shared_size : 18;
- u32 unk17 : 14;
- u16 unk18;
- u16 blockdim_x;
- u16 blockdim_y;
- u16 blockdim_z;
- u32 cb_mask : 8;
- u32 unk20 : 24;
- u32 unk21[8];
- u32 local_size_p : 24;
- u32 unk29 : 3;
- u32 bar_alloc : 5;
- u32 local_size_n : 24;
- u32 gpr_alloc : 8;
- u32 cstack_size : 24;
- u32 unk31 : 8;
- struct {
- u32 address_l;
- u32 address_h : 17;
- u32 reserved : 2;
- u32 size_sh4 : 13;
- } cb[8];
- u32 unk48[16];
-};
-
-static inline void
-nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
-{
- memset(desc, 0, sizeof(*desc));
-
- desc->unk0[7] = 0xbc000000;
- desc->unk11_0 = 0x04014000;
- desc->unk47_20 = 0x300;
-}
-
-static inline void
-nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
- unsigned index,
- struct nouveau_bo *bo,
- uint32_t base, uint32_t size)
-{
- uint64_t address = bo->offset + base;
-
- assert(index < 8);
- assert(!(base & 0xff));
-
- desc->cb[index].address_l = address;
- desc->cb[index].address_h = address >> 32;
- desc->cb[index].size = size;
-
- desc->cb_mask |= 1 << index;
-}
-
-static inline void
-gp100_cp_launch_desc_init_default(struct gp100_cp_launch_desc *desc)
-{
- memset(desc, 0, sizeof(*desc));
-
- desc->unk0[4] = 0x40;
- desc->unk11_0 = 0x04014000;
-}
-
-static inline void
-gp100_cp_launch_desc_set_cb(struct gp100_cp_launch_desc *desc,
- unsigned index,
- struct nouveau_bo *bo,
- uint32_t base, uint32_t size)
-{
- uint64_t address = bo->offset + base;
-
- assert(index < 8);
- assert(!(base & 0xff));
-
- desc->cb[index].address_l = address;
- desc->cb[index].address_h = address >> 32;
- desc->cb[index].size_sh4 = DIV_ROUND_UP(size, 16);
-
- desc->cb_mask |= 1 << index;
-}
-
struct nve4_mp_trap_info {
u32 lock;
u32 pc;
diff --git a/src/gallium/drivers/nouveau/nvc0/qmd.h b/src/gallium/drivers/nouveau/nvc0/qmd.h
new file mode 100644
index 00000000000..86c290fe836
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/qmd.h
@@ -0,0 +1,68 @@
+#ifndef __NVHW_QMD_H__
+#define __NVHW_QMD_H__
+#include <stdio.h>
+#include <stdint.h>
+#include "util/u_debug.h"
+#include "drf.h"
+
+#define NVQMD_ENUM_1(X,drf,v0) \
+ [drf##_##v0] = #v0
+#define NVQMD_ENUM_2(X,drf,v0,v1) \
+ [drf##_##v0] = #v0, \
+ [drf##_##v1] = #v1
+#define NVQMD_ENUM_3(X,drf,v0,v1,v2) \
+ [drf##_##v0] = #v0, \
+ [drf##_##v1] = #v1, \
+ [drf##_##v2] = #v2
+#define NVQMD_ENUM_8(X,drf,v0,v1,v2,v3,v4,v5,v6,v7) \
+ [drf##_##v0] = #v0, \
+ [drf##_##v1] = #v1, \
+ [drf##_##v2] = #v2, \
+ [drf##_##v3] = #v3, \
+ [drf##_##v4] = #v4, \
+ [drf##_##v5] = #v5, \
+ [drf##_##v6] = #v6, \
+ [drf##_##v7] = #v7
+
+#define NVQMD_ENUM_(X,_1,_2,_3,_4,_5,_6,_7,_8,_9,IMPL,...) IMPL
+#define NVQMD_ENUM(A...) NVQMD_ENUM_(X, ##A, NVQMD_ENUM_8, NVQMD_ENUM_7, \
+ NVQMD_ENUM_6, NVQMD_ENUM_5, \
+ NVQMD_ENUM_4, NVQMD_ENUM_3, \
+ NVQMD_ENUM_2, NVQMD_ENUM_1)(X, ##A)
+
+#define NVQMD_VAL_N(X,d,r,p,f,o) do { \
+ uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f); \
+ debug_printf(" %-36s: "o"\n", #f, val); \
+} while(0)
+#define NVQMD_VAL_I(X,d,r,p,f,i,o) do { \
+ uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f(i)); \
+ char name[80]; \
+ snprintf(name, sizeof(name), "%s(%d)", #f, i); \
+ debug_printf(" %-36s: "o"\n", name, val); \
+} while(0)
+#define NVQMD_VAL_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
+#define NVQMD_VAL(A...) NVQMD_VAL_(X, ##A, NVQMD_VAL_I, NVQMD_VAL_N)(X, ##A)
+
+#define NVQMD_DEF(d,r,p,f,e...) do { \
+ static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) }; \
+ uint32_t val = NVVAL_MW_GET((p), d, r, f); \
+ if (val < ARRAY_SIZE(ev) && ev[val]) \
+ debug_printf(" %-36s: %s\n", #f, ev[val]); \
+ else \
+ debug_printf(" %-36s: UNKNOWN 0x%x\n", #f, val); \
+} while(0)
+#define NVQMD_IDX(d,r,p,f,i,e...) do { \
+ static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) }; \
+ char name[80]; \
+ snprintf(name, sizeof(name), "%s(%d)", #f, i); \
+ uint32_t val = NVVAL_MW_GET((p), d, r, f, i); \
+ if (val < ARRAY_SIZE(ev) && ev[val]) \
+ debug_printf(" %-36s: %s\n", name, ev[val]); \
+ else \
+ debug_printf(" %-36s: UNKNOWN 0x%x\n", name, val); \
+} while(0)
+
+void NVA0C0QmdDump_V00_06(uint32_t *);
+void NVC0C0QmdDump_V02_01(uint32_t *);
+void NVC3C0QmdDump_V02_02(uint32_t *);
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/qmda0c0.c b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c
new file mode 100644
index 00000000000..7103a893af5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "qmd.h"
+#include "cla0c0qmd.h"
+
+#define NVA0C0_QMDV00_06_VAL(a...) NVQMD_VAL(NVA0C0, QMDV00_06, ##a)
+#define NVA0C0_QMDV00_06_DEF(a...) NVQMD_DEF(NVA0C0, QMDV00_06, ##a)
+#define NVA0C0_QMDV00_06_IDX(a...) NVQMD_IDX(NVA0C0, QMDV00_06, ##a)
+
+void
+NVA0C0QmdDump_V00_06(uint32_t *qmd)
+{
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_A, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_B, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_C, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_D, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_E, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_F, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_G, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_H, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A_A, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_I, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_J, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_K, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_L, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_B, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_M, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_N, FALSE, TRUE);
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_O, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_C, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
+ NVA0C0_QMDV00_06_VAL(qmd, PROGRAM_OFFSET, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_P, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Q, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_D, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_R, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_S, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
+ NVA0C0_QMDV00_06_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
+ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_T, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_U, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, THROTTLED, FALSE, TRUE);
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_A, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_B, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
+ NVA0C0_QMDV00_06_DEF(qmd, SHARED_MEMORY_BANK_MAPPING, FOUR_BYTES_PER_BANK,
+ EIGHT_BYTES_PER_BANK);
+ NVA0C0_QMDV00_06_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E3_A, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_V, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_F, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_W, FALSE, TRUE);
+ NVA0C0_QMDV00_06_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_G, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_VERSION, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_H, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
+ for (int i = 0; i < 8; i++)
+ NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_I, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, L1_CONFIGURATION,
+ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB,
+ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB,
+ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_X, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Y, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_J, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
+ RED_MIN,
+ RED_MAX,
+ RED_INC,
+ RED_DEC,
+ RED_AND,
+ RED_OR,
+ RED_XOR);
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_K, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+ NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_L, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
+ RED_MIN,
+ RED_MAX,
+ RED_INC,
+ RED_DEC,
+ RED_AND,
+ RED_OR,
+ RED_XOR);
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_M, "0x%x");
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
+ NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+ NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
+ for (int i = 0; i < 8; i++) {
+ NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
+ NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
+ NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_SIZE, i, "0x%x");
+ }
+ NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_N, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, BARRIER_COUNT, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, REGISTER_COUNT, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, SASS_VERSION, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_A, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_B, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_C, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_D, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_E, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_F, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_G, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_H, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_I, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_J, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_K, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_L, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_M, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_N, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
+ NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c
new file mode 100644
index 00000000000..945439ee0c8
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "qmd.h"
+#include "clc0c0qmd.h"
+
+#define NVC0C0_QMDV02_01_VAL(a...) NVQMD_VAL(NVC0C0, QMDV02_01, ##a)
+#define NVC0C0_QMDV02_01_DEF(a...) NVQMD_DEF(NVC0C0, QMDV02_01, ##a)
+#define NVC0C0_QMDV02_01_IDX(a...) NVQMD_IDX(NVC0C0, QMDV02_01, ##a)
+
+void
+NVC0C0QmdDump_V02_01(uint32_t *qmd)
+{
+ NVC0C0_QMDV02_01_VAL(qmd, OUTER_PUT, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, OUTER_OVERFLOW, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, OUTER_GET, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, INNER_GET, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, INNER_OVERFLOW, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, INNER_PUT, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_GROUP_ID, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x");
+ NVC0C0_QMDV02_01_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, IS_QUEUE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID);
+ NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE);
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_B, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_C, "0x%x");
+ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, PROGRAM_OFFSET, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_D, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x");
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
+ NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
+ NVC0C0_QMDV02_01_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, THROTTLED, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
+ NVC0C0_QMDV02_01_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED13A, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED14A, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_G, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_VERSION, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_H, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
+ for (int i = 0; i < 8; i++)
+ NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_I, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_J, "0x%x");
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
+ RED_MIN,
+ RED_MAX,
+ RED_INC,
+ RED_DEC,
+ RED_AND,
+ RED_OR,
+ RED_XOR);
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_K, "0x%x");
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+ NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_L, "0x%x");
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
+ RED_MIN,
+ RED_MAX,
+ RED_INC,
+ RED_DEC,
+ RED_AND,
+ RED_OR,
+ RED_XOR);
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_M, "0x%x");
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
+ NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+ NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_N, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, BARRIER_COUNT, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, REGISTER_COUNT, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, SASS_VERSION, "0x%x");
+ for (int i = 0; i < 8; i++) {
+ NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
+ NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
+ NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x");
+ }
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_R, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_S, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_GET, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_Q, "0x%x");
+ NVC0C0_QMDV02_01_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE);
+ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_G, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_H, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_I, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_J, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_K, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_L, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_M, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_N, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
+ NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
new file mode 100644
index 00000000000..c9bd8966114
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "qmd.h"
+#include "clc3c0qmd.h"
+
+#define NVC3C0_QMDV02_02_VAL(a...) NVQMD_VAL(NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_DEF(a...) NVQMD_DEF(NVC3C0, QMDV02_02, ##a)
+#define NVC3C0_QMDV02_02_IDX(a...) NVQMD_IDX(NVC3C0, QMDV02_02, ##a)
+
+void
+NVC3C0QmdDump_V02_02(uint32_t *qmd)
+{
+ NVC3C0_QMDV02_02_VAL(qmd, OUTER_PUT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, OUTER_OVERFLOW, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, OUTER_GET, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, INNER_GET, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, INNER_OVERFLOW, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, INNER_PUT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_GROUP_ID, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, IS_QUEUE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID);
+ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_B, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_C, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_OFFSET, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_D, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
+ NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
+ NVC3C0_QMDV02_02_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
+ NVC3C0_QMDV02_02_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED13A, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED14A, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_VERSION, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_H, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
+ for (int i = 0; i < 8; i++)
+ NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT_V, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, FREE_CTA_SLOTS_EMPTY_SM, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_J, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
+ RED_MIN,
+ RED_MAX,
+ RED_INC,
+ RED_DEC,
+ RED_AND,
+ RED_OR,
+ RED_XOR);
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_K, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_L, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
+ RED_MIN,
+ RED_MAX,
+ RED_INC,
+ RED_DEC,
+ RED_AND,
+ RED_OR,
+ RED_XOR);
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_M, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
+ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
+ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_N, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, BARRIER_COUNT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, SASS_VERSION, "0x%x");
+ for (int i = 0; i < 8; i++) {
+ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
+ NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x");
+ }
+ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_LOWER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_S, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_GET, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_Q, "0x%x");
+ NVC3C0_QMDV02_02_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE);
+ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_G, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_H, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_I, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_J, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_K, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_L, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_M, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_N, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
+ NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
+}
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index 5c43518afcb..d123c8a1c17 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -104,6 +104,8 @@ nouveau_drm_screen_create(int fd)
case 0x110:
case 0x120:
case 0x130:
+ case 0x140:
+ case 0x160:
init = nvc0_screen_create;
break;
default: