diff --git a/.gitignore b/.gitignore index 4b27d16..94c9bc6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/mesa-19.3.4.tar.xz +SOURCES/mesa-20.1.4.tar.xz diff --git a/.mesa.metadata b/.mesa.metadata index 1e49d90..ca12abc 100644 --- a/.mesa.metadata +++ b/.mesa.metadata @@ -1 +1 @@ -73dccea365efef46c700bcfd87d14209381efa3d SOURCES/mesa-19.3.4.tar.xz +78243cd7152a8ba759f8f2bdfcf0a877b455e351 SOURCES/mesa-20.1.4.tar.xz diff --git a/SOURCES/0001-dri-add-another-get-shm-variant.patch b/SOURCES/0001-dri-add-another-get-shm-variant.patch deleted file mode 100644 index 2d3cc3b..0000000 --- a/SOURCES/0001-dri-add-another-get-shm-variant.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 7881d29f2c729e4a8a5af21a0abcf3db18e22918 Mon Sep 17 00:00:00 2001 -From: Dave Airlie -Date: Fri, 14 Feb 2020 15:00:13 +1000 -Subject: [PATCH 1/2] dri: add another get shm variant. - -When Brian in 02c3dad0f3b4d26e0faa5cc51d06bc50d693dcdc restricted -the shm permissions it means we hit the fallback paths in some -scenarios we hadn't before. - -When you use Xephyr to xdmcp from one user to another the new perms -stop the X server (running as user a) attaching to the SHM segments -from gnome-shell (running as user b). - -In this case however only the GLX side of the code had insight into this, -and the dri could was meant of fall back, and it worked for put image -fine but the get image path was broken, since there was no indication -in the broken case of the need to fallback. - -This adds a return type to a new interface member that lets the -caller know it has to fallback. - -Fixes: 02c3dad0f3b4 ("Call shmget() with permission 0600 instead of 0777") ---- - include/GL/internal/dri_interface.h | 15 ++++++++++++++- - src/gallium/state_trackers/dri/drisw.c | 3 +++ - 2 files changed, 17 insertions(+), 1 deletion(-) - -diff --git a/include/GL/internal/dri_interface.h b/include/GL/internal/dri_interface.h -index 09782c8baeb..e40106575c0 100644 ---- a/include/GL/internal/dri_interface.h -+++ b/include/GL/internal/dri_interface.h -@@ -634,7 +634,7 @@ struct __DRIdamageExtensionRec { - * SWRast Loader extension. - */ - #define __DRI_SWRAST_LOADER "DRI_SWRastLoader" --#define __DRI_SWRAST_LOADER_VERSION 5 -+#define __DRI_SWRAST_LOADER_VERSION 6 - struct __DRIswrastLoaderExtensionRec { - __DRIextension base; - -@@ -711,6 +711,19 @@ struct __DRIswrastLoaderExtensionRec { - int width, int height, int stride, - int shmid, char *shmaddr, unsigned offset, - void *loaderPrivate); -+ -+ /** -+ * get shm image to drawable (v2) -+ * -+ * There are some cases where GLX can't use SHM, but DRI -+ * still tries, we need to get a return type for when to -+ * fallback to the non-shm path. -+ * -+ * \since 6 -+ */ -+ GLboolean (*getImageShm2)(__DRIdrawable *readable, -+ int x, int y, int width, int height, -+ int shmid, void *loaderPrivate); - }; - - /** -diff --git a/src/gallium/state_trackers/dri/drisw.c b/src/gallium/state_trackers/dri/drisw.c -index e3fb3f1b925..df364c47391 100644 ---- a/src/gallium/state_trackers/dri/drisw.c -+++ b/src/gallium/state_trackers/dri/drisw.c -@@ -138,6 +138,9 @@ get_image_shm(__DRIdrawable *dPriv, int x, int y, int width, int height, - if (!res->screen->resource_get_handle(res->screen, NULL, res, &whandle, PIPE_HANDLE_USAGE_FRAMEBUFFER_WRITE)) - return FALSE; - -+ if (loader->base.version > 5 && loader->getImageShm2) -+ return loader->getImageShm2(dPriv, x, y, width, height, whandle.handle, dPriv->loaderPrivate); -+ - loader->getImageShm(dPriv, x, y, width, height, whandle.handle, dPriv->loaderPrivate); - return TRUE; - } --- -2.21.1 - diff --git a/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch b/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch new file mode 100644 index 0000000..0daf825 --- /dev/null +++ b/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch @@ -0,0 +1,34 @@ +From d3ec950f0d8492b980a91844ffd744d7e7824277 Mon Sep 17 00:00:00 2001 +From: Ben Skeggs +Date: Sat, 6 Jun 2020 16:58:00 +1000 +Subject: [PATCH] nir: use bitfield_insert instead of bfi in + nir_lower_double_ops + +NVIDIA hardware doesn't have an equivilant to bfi, but we do already have +a lowering for bitfield_insert->bfi. + +Signed-off-by: Ben Skeggs +Reviewed-by: Jason Ekstrand +Part-of: +--- + src/compiler/nir/nir_lower_double_ops.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c +index f9c93a910a5..73226fd62ef 100644 +--- a/src/compiler/nir/nir_lower_double_ops.c ++++ b/src/compiler/nir/nir_lower_double_ops.c +@@ -49,7 +49,9 @@ set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp) + /* The exponent is bits 52-62, or 20-30 of the high word, so set the exponent + * to 1023 + */ +- nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x7ff00000), exp, hi); ++ nir_ssa_def *new_hi = nir_bitfield_insert(b, hi, exp, ++ nir_imm_int(b, 20), ++ nir_imm_int(b, 11)); + /* recombine */ + return nir_pack_64_2x32_split(b, lo, new_hi); + } +-- +2.26.2 + diff --git a/SOURCES/0002-glx-add-getImageShm2-path.patch b/SOURCES/0002-glx-add-getImageShm2-path.patch deleted file mode 100644 index d8062b3..0000000 --- a/SOURCES/0002-glx-add-getImageShm2-path.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 74e4f90deeae466ed19d2a31d8f62f6fc5e1709b Mon Sep 17 00:00:00 2001 -From: Dave Airlie -Date: Fri, 14 Feb 2020 15:03:24 +1000 -Subject: [PATCH 2/2] glx: add getImageShm2 path - -If the glx side shmid is -1 (because the X server failed to attach) -then we should let the caller know to fallback. - -Fixes: 02c3dad0f3b4 ("Call shmget() with permission 0600 instead of 0777") ---- - src/glx/drisw_glx.c | 25 +++++++++++++++++++------ - 1 file changed, 19 insertions(+), 6 deletions(-) - -diff --git a/src/glx/drisw_glx.c b/src/glx/drisw_glx.c -index 069f64d5216..dfa3218d759 100644 ---- a/src/glx/drisw_glx.c -+++ b/src/glx/drisw_glx.c -@@ -288,10 +288,10 @@ swrastGetImage(__DRIdrawable * read, - swrastGetImage2(read, x, y, w, h, 0, data, loaderPrivate); - } - --static void --swrastGetImageShm(__DRIdrawable * read, -- int x, int y, int w, int h, -- int shmid, void *loaderPrivate) -+static GLboolean -+swrastGetImageShm2(__DRIdrawable * read, -+ int x, int y, int w, int h, -+ int shmid, void *loaderPrivate) - { - struct drisw_drawable *prp = loaderPrivate; - __GLXDRIdrawable *pread = &(prp->base); -@@ -301,8 +301,11 @@ swrastGetImageShm(__DRIdrawable * read, - - if (!prp->ximage || shmid != prp->shminfo.shmid) { - if (!XCreateDrawable(prp, shmid, dpy)) -- return; -+ return GL_FALSE; - } -+ -+ if (prp->shminfo.shmid == -1) -+ return GL_FALSE; - readable = pread->xDrawable; - - ximage = prp->ximage; -@@ -312,10 +315,19 @@ swrastGetImageShm(__DRIdrawable * read, - ximage->bytes_per_line = bytes_per_line(w * ximage->bits_per_pixel, 32); - - XShmGetImage(dpy, readable, ximage, x, y, ~0L); -+ return GL_TRUE; -+} -+ -+static void -+swrastGetImageShm(__DRIdrawable * read, -+ int x, int y, int w, int h, -+ int shmid, void *loaderPrivate) -+{ -+ swrastGetImageShm2(read, x, y, w, h, shmid, loaderPrivate); - } - - static const __DRIswrastLoaderExtension swrastLoaderExtension_shm = { -- .base = {__DRI_SWRAST_LOADER, 5 }, -+ .base = {__DRI_SWRAST_LOADER, 6 }, - - .getDrawableInfo = swrastGetDrawableInfo, - .putImage = swrastPutImage, -@@ -325,6 +337,7 @@ static const __DRIswrastLoaderExtension swrastLoaderExtension_shm = { - .putImageShm = swrastPutImageShm, - .getImageShm = swrastGetImageShm, - .putImageShm2 = swrastPutImageShm2, -+ .getImageShm2 = swrastGetImageShm2, - }; - - static const __DRIextension *loader_extensions_shm[] = { --- -2.21.1 - diff --git a/SOURCES/Makefile b/SOURCES/Makefile index 68bdaa7..8396596 100644 --- a/SOURCES/Makefile +++ b/SOURCES/Makefile @@ -1,4 +1,4 @@ -VERSION ?= 19.3.4 +VERSION ?= 20.1.4 SANITIZE ?= 1 DIRNAME = mesa-${VERSION} @@ -10,7 +10,7 @@ clean: rm -f mesa-${VERSION}.tar.xz clone: clean - curl -O https://mesa.freedesktop.org/archive/mesa-${VERSION}.tar.xz + curl -O https://archive.mesa3d.org/mesa-${VERSION}.tar.xz tar xf mesa-${VERSION}.tar.xz sanitize: clone vl_mpeg12_decoder.c vl_decoder.c diff --git a/SOURCES/dri-shm-fix-put-image.patch b/SOURCES/dri-shm-fix-put-image.patch deleted file mode 100644 index 7548e56..0000000 --- a/SOURCES/dri-shm-fix-put-image.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff -up mesa-19.3.3/src/glx/drisw_glx.c.dma mesa-19.3.3/src/glx/drisw_glx.c ---- mesa-19.3.3/src/glx/drisw_glx.c.dma 2020-02-14 12:37:42.551008273 +1000 -+++ mesa-19.3.3/src/glx/drisw_glx.c 2020-02-14 12:37:49.683081406 +1000 -@@ -199,7 +199,7 @@ swrastXPutImage(__DRIdrawable * draw, in - XShmPutImage(dpy, drawable, gc, ximage, srcx, srcy, x, y, w, h, False); - XSync(dpy, False); - } else { -- ximage->width = w; -+ ximage->width = ximage->bytes_per_line / ((ximage->bits_per_pixel + 7)/ 8); - ximage->height = h; - XPutImage(dpy, drawable, gc, ximage, srcx, srcy, x, y, w, h); - } diff --git a/SOURCES/nouveau-tu1xx-support.patch b/SOURCES/nouveau-tu1xx-support.patch new file mode 100644 index 0000000..3254466 --- /dev/null +++ b/SOURCES/nouveau-tu1xx-support.patch @@ -0,0 +1,10387 @@ +diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources +index 6c360992a53..9de8168fbd9 100644 +--- a/src/gallium/drivers/nouveau/Makefile.sources ++++ b/src/gallium/drivers/nouveau/Makefile.sources +@@ -151,6 +151,14 @@ NVC0_CODEGEN_SOURCES := \ + codegen/nv50_ir_target_nvc0.h + + NVC0_C_SOURCES := \ ++ nvc0/cla0c0qmd.h \ ++ nvc0/clc0c0qmd.h \ ++ nvc0/clc3c0qmd.h \ ++ nvc0/drf.h \ ++ nvc0/qmd.h \ ++ nvc0/qmda0c0.c \ ++ nvc0/qmdc0c0.c \ ++ nvc0/qmdc3c0.c \ + nvc0/gm107_texture.xml.h \ + nvc0/nvc0_3d.xml.h \ + nvc0/nvc0_compute.c \ +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h +index 42ee969c66b..d58c0d206ec 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h +@@ -67,8 +67,10 @@ enum operation + OP_AND, + OP_OR, + OP_XOR, ++ OP_LOP3_LUT, + OP_SHL, + OP_SHR, ++ OP_SHF, + OP_MAX, + OP_MIN, + OP_SAT, // CLAMP(f32, 0.0, 1.0) +@@ -116,6 +118,7 @@ enum operation + OP_PINTERP, + OP_EMIT, // emit vertex + OP_RESTART, // restart primitive ++ OP_FINAL, // finish emitting primitives + OP_TEX, + OP_TXB, // texture bias + OP_TXL, // texure lod +@@ -151,7 +154,10 @@ enum operation + OP_INSBF, // insert first src1[8:15] bits of src0 into src2 at src1[0:7] + OP_EXTBF, // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK + OP_BFIND, // find highest/lowest set bit ++ OP_BREV, // bitfield reverse ++ OP_BMSK, // bitfield mask + OP_PERMT, // dst = bytes from src2,src0 selected by src1 (nvc0's src order) ++ OP_SGXT, + OP_ATOM, + OP_BAR, // execution barrier, sources = { id, thread count, predicate } + OP_VADD, // byte/word vector operations +@@ -167,6 +173,7 @@ enum operation + OP_SHFL, // warp shuffle + OP_VOTE, + OP_BUFQ, // buffer query ++ OP_WARPSYNC, + OP_LAST + }; + +@@ -254,11 +261,29 @@ enum operation + #define NV50_IR_SUBOP_VOTE_ALL 0 + #define NV50_IR_SUBOP_VOTE_ANY 1 + #define NV50_IR_SUBOP_VOTE_UNI 2 ++#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0 ++#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc ++#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa ++#define NV50_IR_SUBOP_LOP3_LUT(exp) ({ \ ++ uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \ ++ uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \ ++ uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \ ++ (uint8_t)(exp); \ ++}) ++#define NV50_IR_SUBOP_BMSK_C (0 << 0) ++#define NV50_IR_SUBOP_BMSK_W (1 << 0) + + #define NV50_IR_SUBOP_MINMAX_LOW 1 + #define NV50_IR_SUBOP_MINMAX_MED 2 + #define NV50_IR_SUBOP_MINMAX_HIGH 3 + ++#define NV50_IR_SUBOP_SHF_L (0 << 0) ++#define NV50_IR_SUBOP_SHF_R (1 << 0) ++#define NV50_IR_SUBOP_SHF_LO (0 << 1) ++#define NV50_IR_SUBOP_SHF_HI (1 << 1) ++#define NV50_IR_SUBOP_SHF_C (0 << 2) ++#define NV50_IR_SUBOP_SHF_W (1 << 2) ++ + // xmad(src0, src1, 0) << 16 + src2 + #define NV50_IR_SUBOP_XMAD_PSL (1 << 0) + // (xmad(src0, src1, src2) & 0xffff) | (src1 << 16) +@@ -900,7 +925,7 @@ public: + + uint16_t subOp; // quadop, 1 for mul-high, etc. + +- unsigned encSize : 4; // encoding size in bytes ++ unsigned encSize : 5; // encoding size in bytes + unsigned saturate : 1; // to [0.0f, 1.0f] + unsigned join : 1; // converge control flow (use OP_JOIN until end) + unsigned fixed : 1; // prevent dead code elimination +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +index 5dc0e24c5dc..63ea7f5e7e8 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h +@@ -29,6 +29,8 @@ + #include "tgsi/tgsi_parse.h" + #include "tgsi/tgsi_scan.h" + ++struct nir_shader_compiler_options; ++ + /* + * This struct constitutes linkage information in TGSI terminology. + * +@@ -70,10 +72,12 @@ struct nv50_ir_prog_symbol + uint32_t offset; + }; + ++#define NVISA_GF100_CHIPSET 0xc0 + #define NVISA_GK104_CHIPSET 0xe0 + #define NVISA_GK20A_CHIPSET 0xea + #define NVISA_GM107_CHIPSET 0x110 + #define NVISA_GM200_CHIPSET 0x120 ++#define NVISA_GV100_CHIPSET 0x140 + + struct nv50_ir_prog_info + { +@@ -200,6 +204,9 @@ struct nv50_ir_prog_info + extern "C" { + #endif + ++const struct nir_shader_compiler_options * ++nv50_ir_nir_shader_compiler_options(int chipset); ++ + extern int nv50_ir_generate_code(struct nv50_ir_prog_info *); + + extern void nv50_ir_relocate_code(void *relocData, uint32_t *code, +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +index e244bd0d610..dd8e1ab86c4 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp +@@ -23,6 +23,7 @@ + */ + + #include "codegen/nv50_ir_target_gm107.h" ++#include "codegen/nv50_ir_sched_gm107.h" + + //#define GM107_DEBUG_SCHED_DATA + +@@ -170,6 +171,7 @@ private: + void emitBFI(); + void emitBFE(); + void emitFLO(); ++ void emitPRMT(); + + void emitLDSTs(int, DataType); + void emitLDSTc(int); +@@ -2371,6 +2373,33 @@ CodeEmitterGM107::emitFLO() + emitGPR (0x00, insn->def(0)); + } + ++void ++CodeEmitterGM107::emitPRMT() ++{ ++ switch (insn->src(1).getFile()) { ++ case FILE_GPR: ++ emitInsn(0x5bc00000); ++ emitGPR (0x14, insn->src(1)); ++ break; ++ case FILE_MEMORY_CONST: ++ emitInsn(0x4bc00000); ++ emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1)); ++ break; ++ case FILE_IMMEDIATE: ++ emitInsn(0x36c00000); ++ emitIMMD(0x14, 19, insn->src(1)); ++ break; ++ default: ++ assert(!"bad src1 file"); ++ break; ++ } ++ ++ emitField(0x30, 3, insn->subOp); ++ emitGPR (0x27, insn->src(2)); ++ emitGPR (0x08, insn->src(0)); ++ emitGPR (0x00, insn->def(0)); ++} ++ + /******************************************************************************* + * memory + ******************************************************************************/ +@@ -3537,6 +3566,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i) + case OP_BFIND: + emitFLO(); + break; ++ case OP_PERMT: ++ emitPRMT(); ++ break; + case OP_SLCT: + if (isFloatType(insn->dType)) + emitFCMP(); +@@ -3742,156 +3774,6 @@ CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const + * sched data calculator + ******************************************************************************/ + +-class SchedDataCalculatorGM107 : public Pass +-{ +-public: +- SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {} +- +-private: +- struct RegScores +- { +- struct ScoreData { +- int r[256]; +- int p[8]; +- int c; +- } rd, wr; +- int base; +- +- void rebase(const int base) +- { +- const int delta = this->base - base; +- if (!delta) +- return; +- this->base = 0; +- +- for (int i = 0; i < 256; ++i) { +- rd.r[i] += delta; +- wr.r[i] += delta; +- } +- for (int i = 0; i < 8; ++i) { +- rd.p[i] += delta; +- wr.p[i] += delta; +- } +- rd.c += delta; +- wr.c += delta; +- } +- void wipe() +- { +- memset(&rd, 0, sizeof(rd)); +- memset(&wr, 0, sizeof(wr)); +- } +- int getLatest(const ScoreData& d) const +- { +- int max = 0; +- for (int i = 0; i < 256; ++i) +- if (d.r[i] > max) +- max = d.r[i]; +- for (int i = 0; i < 8; ++i) +- if (d.p[i] > max) +- max = d.p[i]; +- if (d.c > max) +- max = d.c; +- return max; +- } +- inline int getLatestRd() const +- { +- return getLatest(rd); +- } +- inline int getLatestWr() const +- { +- return getLatest(wr); +- } +- inline int getLatest() const +- { +- return MAX2(getLatestRd(), getLatestWr()); +- } +- void setMax(const RegScores *that) +- { +- for (int i = 0; i < 256; ++i) { +- rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); +- wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); +- } +- for (int i = 0; i < 8; ++i) { +- rd.p[i] = MAX2(rd.p[i], that->rd.p[i]); +- wr.p[i] = MAX2(wr.p[i], that->wr.p[i]); +- } +- rd.c = MAX2(rd.c, that->rd.c); +- wr.c = MAX2(wr.c, that->wr.c); +- } +- void print(int cycle) +- { +- for (int i = 0; i < 256; ++i) { +- if (rd.r[i] > cycle) +- INFO("rd $r%i @ %i\n", i, rd.r[i]); +- if (wr.r[i] > cycle) +- INFO("wr $r%i @ %i\n", i, wr.r[i]); +- } +- for (int i = 0; i < 8; ++i) { +- if (rd.p[i] > cycle) +- INFO("rd $p%i @ %i\n", i, rd.p[i]); +- if (wr.p[i] > cycle) +- INFO("wr $p%i @ %i\n", i, wr.p[i]); +- } +- if (rd.c > cycle) +- INFO("rd $c @ %i\n", rd.c); +- if (wr.c > cycle) +- INFO("wr $c @ %i\n", wr.c); +- } +- }; +- +- RegScores *score; // for current BB +- std::vector scoreBoards; +- +- const TargetGM107 *targ; +- bool visit(Function *); +- bool visit(BasicBlock *); +- +- void commitInsn(const Instruction *, int); +- int calcDelay(const Instruction *, int) const; +- void setDelay(Instruction *, int, const Instruction *); +- void recordWr(const Value *, int, int); +- void checkRd(const Value *, int, int&) const; +- +- inline void emitYield(Instruction *); +- inline void emitStall(Instruction *, uint8_t); +- inline void emitReuse(Instruction *, uint8_t); +- inline void emitWrDepBar(Instruction *, uint8_t); +- inline void emitRdDepBar(Instruction *, uint8_t); +- inline void emitWtDepBar(Instruction *, uint8_t); +- +- inline int getStall(const Instruction *) const; +- inline int getWrDepBar(const Instruction *) const; +- inline int getRdDepBar(const Instruction *) const; +- inline int getWtDepBar(const Instruction *) const; +- +- void setReuseFlag(Instruction *); +- +- inline void printSchedInfo(int, const Instruction *) const; +- +- struct LiveBarUse { +- LiveBarUse(Instruction *insn, Instruction *usei) +- : insn(insn), usei(usei) { } +- Instruction *insn; +- Instruction *usei; +- }; +- +- struct LiveBarDef { +- LiveBarDef(Instruction *insn, Instruction *defi) +- : insn(insn), defi(defi) { } +- Instruction *insn; +- Instruction *defi; +- }; +- +- bool insertBarriers(BasicBlock *); +- +- bool doesInsnWriteTo(const Instruction *insn, const Value *val) const; +- Instruction *findFirstUse(const Instruction *) const; +- Instruction *findFirstDef(const Instruction *) const; +- +- bool needRdDepBar(const Instruction *) const; +- bool needWrDepBar(const Instruction *) const; +-}; +- + inline void + SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt) + { +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp +new file mode 100644 +index 00000000000..ef33743e610 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp +@@ -0,0 +1,2052 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include "codegen/nv50_ir_emit_gv100.h" ++#include "codegen/nv50_ir_sched_gm107.h" ++ ++namespace nv50_ir { ++ ++/******************************************************************************* ++ * instruction format helpers ++ ******************************************************************************/ ++ ++#define FA_NODEF (1 << 0) ++#define FA_RRR (1 << 1) ++#define FA_RRI (1 << 2) ++#define FA_RRC (1 << 3) ++#define FA_RIR (1 << 4) ++#define FA_RCR (1 << 5) ++ ++#define FA_SRC_MASK 0x0ff ++#define FA_SRC_NEG 0x100 ++#define FA_SRC_ABS 0x200 ++ ++#define EMPTY -1 ++#define __(a) (a) // no source modifiers ++#define _A(a) ((a) | FA_SRC_ABS) ++#define N_(a) ((a) | FA_SRC_NEG) ++#define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS) ++ ++void ++CodeEmitterGV100::emitFormA_I32(int src) ++{ ++ emitIMMD(32, 32, insn->src(src)); ++ if (insn->src(src).mod.abs()) ++ code[1] &= 0x7fffffff; ++ if (insn->src(src).mod.neg()) ++ code[1] ^= 0x80000000; ++} ++ ++void ++CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2) ++{ ++ emitInsn(op); ++ if (src1 >= 0) { ++ emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); ++ emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); ++ emitGPR (64, insn->src(src1 & FA_SRC_MASK)); ++ } ++ if (src2 >= 0) { ++ emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG)); ++ emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS)); ++ emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK)); ++ } ++} ++ ++void ++CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2) ++{ ++ emitInsn(op); ++ if (src1 >= 0) { ++ emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); ++ emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); ++ emitGPR (64, insn->src(src1 & FA_SRC_MASK)); ++ } ++ if (src2 >= 0) ++ emitFormA_I32(src2 & FA_SRC_MASK); ++} ++ ++void ++CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2) ++{ ++ emitInsn(op); ++ if (src2 >= 0) { ++ emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG)); ++ emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS)); ++ emitGPR (64, insn->src(src2 & FA_SRC_MASK)); ++ } ++ ++ if (src1 >= 0) { ++ emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG)); ++ emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS)); ++ emitGPR (32, insn->src(src1 & FA_SRC_MASK)); ++ } ++} ++ ++void ++CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms, ++ int src0, int src1, int src2) ++{ ++ switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) { ++ case FILE_GPR: ++ switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) { ++ case FILE_GPR: ++ assert(forms & FA_RRR); ++ emitFormA_RRR((1 << 9) | op, src1, src2); ++ break; ++ case FILE_IMMEDIATE: ++ assert(forms & FA_RRI); ++ emitFormA_RRI((2 << 9) | op, src1, src2); ++ break; ++ case FILE_MEMORY_CONST: ++ assert(forms & FA_RRC); ++ emitFormA_RRC((3 << 9) | op, src1, src2); ++ break; ++ default: ++ assert(!"bad src2 file"); ++ break; ++ } ++ break; ++ case FILE_IMMEDIATE: ++ assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR); ++ assert(forms & FA_RIR); ++ emitFormA_RRI((4 << 9) | op, src2, src1); ++ break; ++ case FILE_MEMORY_CONST: ++ assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR); ++ assert(forms & FA_RCR); ++ emitFormA_RRC((5 << 9) | op, src2, src1); ++ break; ++ default: ++ assert(!"bad src1 file"); ++ break; ++ } ++ ++ if (src0 >= 0) { ++ assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR); ++ emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS)); ++ emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG)); ++ emitGPR(24, insn->src(src0 & FA_SRC_MASK)); ++ } ++ ++ if (!(forms & FA_NODEF)) ++ emitGPR(16, insn->def(0)); ++} ++ ++/******************************************************************************* ++ * control ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitBRA() ++{ ++ const FlowInstruction *insn = this->insn->asFlow(); ++ int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4; ++ ++ assert(!insn->indirect && !insn->absolute); ++ ++ emitInsn (0x947); ++ emitField(34, 48, target); ++ emitPRED (87); ++ emitField(86, 2, 0); // ./.INC/.DEC ++} ++ ++void ++CodeEmitterGV100::emitEXIT() ++{ ++ emitInsn (0x94d); ++ emitNOT (90); ++ emitPRED (87); ++ emitField(85, 1, 0); // .NO_ATEXIT ++ emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3 ++} ++ ++void ++CodeEmitterGV100::emitKILL() ++{ ++ emitInsn(0x95b); ++ emitPRED(87); ++} ++ ++void ++CodeEmitterGV100::emitNOP() ++{ ++ emitInsn(0x918); ++} ++ ++void ++CodeEmitterGV100::emitWARPSYNC() ++{ ++ emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); ++ emitNOT (90); ++ emitPRED (87); ++} ++ ++/******************************************************************************* ++ * movement / conversion ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitCS2R() ++{ ++ emitInsn(0x805); ++ emitSYS (72, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitF2F() ++{ ++ if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) ++ emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); ++ else ++ emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); ++ emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); ++ emitFMZ (80, 1); ++ emitRND (78); ++ emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); ++ emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3 ++} ++ ++void ++CodeEmitterGV100::emitF2I() ++{ ++ if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) ++ emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); ++ else ++ emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); ++ emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); ++ emitFMZ (80, 1); ++ emitRND (78); ++ emitField(77, 1, 0); // .NTZ ++ emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); ++ emitField(72, 1, isSignedType(insn->dType)); ++} ++ ++void ++CodeEmitterGV100::emitFRND() ++{ ++ int subop = 0; ++ ++ switch (insn->op) { ++ case OP_CVT: ++ switch (insn->rnd) { ++ case ROUND_NI: subop = 0; break; ++ case ROUND_MI: subop = 1; break; ++ case ROUND_PI: subop = 2; break; ++ case ROUND_ZI: subop = 3; break; ++ default: ++ assert(!"invalid FRND mode"); ++ break; ++ } ++ break; ++ case OP_FLOOR: subop = 1; break; ++ case OP_CEIL : subop = 2; break; ++ case OP_TRUNC: subop = 3; break; ++ default: ++ assert(!"invalid FRND opcode"); ++ break; ++ } ++ ++ if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) ++ emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); ++ else ++ emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); ++ emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); ++ emitFMZ (80, 1); ++ emitField(78, 2, subop); ++ emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); ++} ++ ++void ++CodeEmitterGV100::emitI2F() ++{ ++ if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8) ++ emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); ++ else ++ emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); ++ emitField(84, 2, util_logbase2(typeSizeof(insn->sType))); ++ emitRND (78); ++ emitField(75, 2, util_logbase2(typeSizeof(insn->dType))); ++ emitField(74, 1, isSignedType(insn->sType)); ++ if (typeSizeof(insn->sType) == 2) ++ emitField(60, 2, insn->subOp >> 1); ++ else ++ emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3 ++} ++ ++void ++CodeEmitterGV100::emitMOV() ++{ ++ switch (insn->def(0).getFile()) { ++ case FILE_GPR: ++ switch (insn->src(0).getFile()) { ++ case FILE_GPR: ++ case FILE_MEMORY_CONST: ++ case FILE_IMMEDIATE: ++ emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); ++ emitField(72, 4, insn->lanes); ++ break; ++ case FILE_PREDICATE: ++ emitInsn (0x807); ++ emitGPR (16, insn->def(0)); ++ emitGPR (24); ++ emitField(32, 32, 0xffffffff); ++ emitField(90, 1, 1); ++ emitPRED (87, insn->src(0)); ++ break; ++ default: ++ assert(!"bad src file"); ++ break; ++ } ++ break; ++ case FILE_PREDICATE: ++ emitInsn (0x20c); ++ emitPRED (87); ++ emitPRED (84); ++ emitNOT (71); ++ emitPRED (68); ++ emitPRED (81, insn->def(0)); ++ emitCond3(76, CC_NE); ++ emitGPR (24, insn->src(0)); ++ emitGPR (32); ++ break; ++ default: ++ assert(!"bad dst file"); ++ break; ++ } ++} ++ ++void ++CodeEmitterGV100::emitPRMT() ++{ ++ emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2)); ++ emitField(72, 3, insn->subOp); ++} ++ ++void ++CodeEmitterGV100::emitS2R() ++{ ++ emitInsn(0x919); ++ emitSYS (72, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++static void ++selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data) ++{ ++ int loc = entry->loc; ++ if (data.force_persample_interp) ++ code[loc + 2] |= 1 << 26; ++ else ++ code[loc + 2] &= ~(1 << 26); ++} ++ ++void ++CodeEmitterGV100::emitSEL() ++{ ++ emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); ++ emitNOT (90, insn->src(2)); ++ emitPRED (87, insn->src(2)); ++ if (insn->subOp == 1) ++ addInterp(0, 0, selpFlip); ++} ++ ++void ++CodeEmitterGV100::emitSHFL() ++{ ++ switch (insn->src(1).getFile()) { ++ case FILE_GPR: ++ switch (insn->src(2).getFile()) { ++ case FILE_GPR: ++ emitInsn(0x389); ++ emitGPR (64, insn->src(2)); ++ break; ++ case FILE_IMMEDIATE: ++ emitInsn(0x589); ++ emitIMMD(40, 13, insn->src(2)); ++ break; ++ default: ++ assert(!"bad src2 file"); ++ break; ++ } ++ emitGPR(32, insn->src(1)); ++ break; ++ case FILE_IMMEDIATE: ++ switch (insn->src(2).getFile()) { ++ case FILE_GPR: ++ emitInsn(0x989); ++ emitGPR (64, insn->src(2)); ++ break; ++ case FILE_IMMEDIATE: ++ emitInsn(0xf89); ++ emitIMMD(40, 13, insn->src(2)); ++ break; ++ default: ++ assert(!"bad src2 file"); ++ break; ++ } ++ emitIMMD(53, 5, insn->src(1)); ++ break; ++ default: ++ assert(!"bad src1 file"); ++ break; ++ } ++ ++ if (insn->defExists(1)) ++ emitPRED(81, insn->def(1)); ++ else ++ emitPRED(81); ++ ++ emitField(58, 2, insn->subOp); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++/******************************************************************************* ++ * fp32 ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitFADD() ++{ ++ if (insn->src(1).getFile() == FILE_GPR) ++ emitFormA(0x021, FA_RRR , NA(0), NA(1), EMPTY); ++ else ++ emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); ++ emitFMZ (80, 1); ++ emitRND (78); ++ emitSAT (77); ++} ++ ++void ++CodeEmitterGV100::emitFFMA() ++{ ++ emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2)); ++ emitField(80, 1, insn->ftz); ++ emitRND (78); ++ emitSAT (77); ++ emitField(76, 1, insn->dnz); ++} ++ ++void ++CodeEmitterGV100::emitFMNMX() ++{ ++ emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); ++ emitField(90, 1, insn->op == OP_MAX); ++ emitPRED (87); ++ emitFMZ (80, 1); ++} ++ ++void ++CodeEmitterGV100::emitFMUL() ++{ ++ emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); ++ emitField(80, 1, insn->ftz); ++ emitPDIV (84); ++ emitRND (78); ++ emitSAT (77); ++ emitField(76, 1, insn->dnz); ++} ++ ++void ++CodeEmitterGV100::emitFSET_BF() ++{ ++ const CmpInstruction *insn = this->insn->asCmp(); ++ ++ emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); ++ emitFMZ (80, 1); ++ emitCond4(76, insn->setCond); ++ ++ if (insn->op != OP_SET) { ++ switch (insn->op) { ++ case OP_SET_AND: emitField(74, 2, 0); break; ++ case OP_SET_OR : emitField(74, 2, 1); break; ++ case OP_SET_XOR: emitField(74, 2, 2); break; ++ default: ++ assert(!"invalid set op"); ++ break; ++ } ++ emitNOT (90, insn->src(2)); ++ emitPRED(87, insn->src(2)); ++ } else { ++ emitPRED(87); ++ } ++} ++ ++void ++CodeEmitterGV100::emitFSETP() ++{ ++ const CmpInstruction *insn = this->insn->asCmp(); ++ ++ emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); ++ emitFMZ (80, 1); ++ emitCond4(76, insn->setCond); ++ ++ if (insn->op != OP_SET) { ++ switch (insn->op) { ++ case OP_SET_AND: emitField(74, 2, 0); break; ++ case OP_SET_OR : emitField(74, 2, 1); break; ++ case OP_SET_XOR: emitField(74, 2, 2); break; ++ default: ++ assert(!"invalid set op"); ++ break; ++ } ++ emitNOT (90, insn->src(2)); ++ emitPRED(87, insn->src(2)); ++ } else { ++ emitPRED(87); ++ } ++ ++ if (insn->defExists(1)) ++ emitPRED(84, insn->def(1)); ++ else ++ emitPRED(84); ++ emitPRED(81, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitFSWZADD() ++{ ++ uint8_t subOp = 0; ++ ++ // NP/PN swapped vs SM60 ++ for (int i = 0; i < 4; i++) { ++ uint8_t p = ((insn->subOp >> (i * 2)) & 3); ++ if (p == 1 || p == 2) ++ p ^= 3; ++ subOp |= p << (i * 2); ++ } ++ ++ emitInsn (0x822); ++ emitFMZ (80, 1); ++ emitRND (78); ++ emitField(77, 1, insn->lanes); /* abused for .ndv */ ++ emitGPR (64, insn->src(1)); ++ emitField(32, 8, subOp); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitMUFU() ++{ ++ int mufu = 0; ++ ++ switch (insn->op) { ++ case OP_COS : mufu = 0; break; ++ case OP_SIN : mufu = 1; break; ++ case OP_EX2 : mufu = 2; break; ++ case OP_LG2 : mufu = 3; break; ++ case OP_RCP : mufu = 4 + 2 * insn->subOp; break; ++ case OP_RSQ : mufu = 5 + 2 * insn->subOp; break; ++ case OP_SQRT: mufu = 8; break; ++ default: ++ assert(!"invalid mufu"); ++ break; ++ } ++ ++ emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY); ++ emitField(74, 4, mufu); ++} ++ ++/******************************************************************************* ++ * fp64 ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitDADD() ++{ ++ emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); ++ emitRND(78); ++} ++ ++void ++CodeEmitterGV100::emitDFMA() ++{ ++ emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2)); ++ emitRND(78); ++} ++ ++void ++CodeEmitterGV100::emitDMUL() ++{ ++ emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY); ++ emitRND(78); ++} ++ ++void ++CodeEmitterGV100::emitDSETP() ++{ ++ const CmpInstruction *insn = this->insn->asCmp(); ++ ++ if (insn->src(1).getFile() == FILE_GPR) ++ emitFormA(0x02a, FA_NODEF | FA_RRR , NA(0), NA(1), EMPTY); ++ else ++ emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1)); ++ ++ if (insn->op != OP_SET) { ++ switch (insn->op) { ++ case OP_SET_AND: emitField(74, 2, 0); break; ++ case OP_SET_OR : emitField(74, 2, 1); break; ++ case OP_SET_XOR: emitField(74, 2, 2); break; ++ default: ++ assert(!"invalid set op"); ++ break; ++ } ++ emitNOT (90, insn->src(2)); ++ emitPRED(87, insn->src(2)); ++ } else { ++ emitPRED(87); ++ } ++ ++ if (insn->defExists(1)) ++ emitPRED(84, insn->def(1)); ++ else ++ emitPRED(84); ++ emitPRED (81, insn->def(0)); ++ emitCond4(76, insn->setCond); ++} ++ ++/******************************************************************************* ++ * integer ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitBMSK() ++{ ++ emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); ++ emitField(75, 1, insn->subOp); // .C/.W ++} ++ ++void ++CodeEmitterGV100::emitBREV() ++{ ++ emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); ++} ++ ++void ++CodeEmitterGV100::emitFLO() ++{ ++ emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); ++ emitPRED (81); ++ emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT); ++ emitField(73, 1, isSignedType(insn->dType)); ++ emitNOT (63, insn->src(0)); ++} ++ ++void ++CodeEmitterGV100::emitIABS() ++{ ++ emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); ++} ++ ++void ++CodeEmitterGV100::emitIADD3() ++{ ++// emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2)); ++ emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY); ++ emitGPR (64); //XXX: fix when switching back to N_(2) ++ emitPRED (84, NULL); // .CC1 ++ emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL); ++ if (insn->flagsSrc >= 0) { ++ emitField(74, 1, 1); // .X ++ emitPRED (87, insn->getSrc(insn->flagsSrc)); ++ emitField(77, 4, 0xf); // .X1 ++ } ++} ++ ++void ++CodeEmitterGV100::emitIMAD() ++{ ++ emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2)); ++ emitField(73, 1, isSignedType(insn->sType)); ++} ++ ++void ++CodeEmitterGV100::emitIMAD_WIDE() ++{ ++ emitFormA(0x025, FA_RRR | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2)); ++ emitPRED (81); ++ emitField(73, 1, isSignedType(insn->sType)); ++} ++ ++void ++CodeEmitterGV100::emitISETP() ++{ ++ const CmpInstruction *insn = this->insn->asCmp(); ++ ++ emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); ++ ++ if (insn->op != OP_SET) { ++ switch (insn->op) { ++ case OP_SET_AND: emitField(74, 2, 0); break; ++ case OP_SET_OR : emitField(74, 2, 1); break; ++ case OP_SET_XOR: emitField(74, 2, 2); break; ++ default: ++ assert(!"invalid set op"); ++ break; ++ } ++ emitNOT (90, insn->src(2)); ++ emitPRED(87, insn->src(2)); ++ } else { ++ emitPRED(87); ++ } ++ ++ //XXX: CC->pred ++ if (insn->flagsSrc >= 0) { ++ assert(0); ++ emitField(68, 4, 6); ++ } else { ++ emitNOT (71); ++ if (!insn->subOp) ++ emitPRED(68); ++ } ++ ++ if (insn->defExists(1)) ++ emitPRED(84, insn->def(1)); ++ else ++ emitPRED(84); ++ emitPRED (81, insn->def(0)); ++ emitCond3(76, insn->setCond); ++ emitField(73, 1, isSignedType(insn->sType)); ++ ++ if (insn->subOp) { // .EX ++ assert(0); ++ emitField(72, 1, 1); ++ emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2)); ++ } ++} ++ ++void ++CodeEmitterGV100::emitLEA() ++{ ++ assert(insn->src(1).get()->asImm()); ++ ++ emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY); ++ emitPRED (81); ++ emitIMMD (75, 5, insn->src(1)); ++ emitGPR (64); ++} ++ ++void ++CodeEmitterGV100::emitLOP3_LUT() ++{ ++ emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2)); ++ emitField(90, 1, 1); ++ emitPRED (87); ++ emitPRED (81); ++ emitField(80, 1, 0); // .PAND ++ emitField(72, 8, insn->subOp); ++} ++ ++void ++CodeEmitterGV100::emitPOPC() ++{ ++ emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY); ++ emitNOT (63, insn->src(0)); ++} ++ ++void ++CodeEmitterGV100::emitSGXT() ++{ ++ emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY); ++ emitField(75, 1, 0); // .W ++ emitField(73, 1, 1); // /.U32 ++} ++ ++void ++CodeEmitterGV100::emitSHF() ++{ ++ emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2)); ++ emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI)); ++ emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R)); ++ emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W)); ++ ++ switch (insn->sType) { ++ case TYPE_S64: emitField(73, 2, 0); break; ++ case TYPE_U64: emitField(73, 2, 1); break; ++ case TYPE_S32: emitField(73, 2, 2); break; ++ case TYPE_U32: ++ default: ++ emitField(73, 2, 3); ++ break; ++ } ++} ++ ++/******************************************************************************* ++ * load/stores ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitALD() ++{ ++ emitInsn (0x321); ++ emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1); ++ emitGPR (32, insn->src(0).getIndirect(1)); ++ emitO (79); ++ emitP (76); ++ emitADDR (24, 40, 10, 0, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitAST() ++{ ++ emitInsn (0x322); ++ emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1); ++ emitGPR (64, insn->src(0).getIndirect(1)); ++ emitP (76); ++ emitADDR (24, 40, 10, 0, insn->src(0)); ++ emitGPR (32, insn->src(1)); ++} ++ ++void ++CodeEmitterGV100::emitATOM() ++{ ++ unsigned subOp, dType; ++ ++ if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) { ++ emitInsn(0x38a); ++ ++ if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) ++ subOp = 8; ++ else ++ subOp = insn->subOp; ++ emitField(87, 4, subOp); ++ ++ switch (insn->dType) { ++ case TYPE_U32 : dType = 0; break; ++ case TYPE_S32 : dType = 1; break; ++ case TYPE_U64 : dType = 2; break; ++ case TYPE_F32 : dType = 3; break; ++ case TYPE_B128: dType = 4; break; ++ case TYPE_S64 : dType = 5; break; ++ default: ++ assert(!"unexpected dType"); ++ dType = 0; ++ break; ++ } ++ emitField(73, 3, dType); ++ } else { ++ emitInsn(0x38b); ++ ++ switch (insn->dType) { ++ case TYPE_U32: dType = 0; break; ++ case TYPE_U64: dType = 2; break; ++ default: ++ assert(!"unexpected dType"); ++ dType = 0; ++ break; ++ } ++ emitField(73, 3, dType); ++ emitGPR (64, insn->src(2)); ++ } ++ ++ emitPRED (81); ++ emitField(79, 2, 1); ++ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); ++ emitGPR (32, insn->src(1)); ++ emitADDR (24, 40, 24, 0, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitATOMS() ++{ ++ unsigned dType, subOp; ++ ++ if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { ++ switch (insn->dType) { ++ case TYPE_U32: dType = 0; break; ++ case TYPE_S32: dType = 1; break; ++ case TYPE_U64: dType = 2; break; ++ default: assert(!"unexpected dType"); dType = 0; break; ++ } ++ ++ emitInsn (0x38d); ++ emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST ++ emitField(73, 2, dType); ++ emitGPR (64, insn->src(2)); ++ } else { ++ emitInsn(0x38c); ++ ++ if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) ++ subOp = 8; ++ else ++ subOp = insn->subOp; ++ emitField(87, 4, subOp); ++ ++ switch (insn->dType) { ++ case TYPE_U32: dType = 0; break; ++ case TYPE_S32: dType = 1; break; ++ case TYPE_U64: dType = 2; break; ++ default: assert(!"unexpected dType"); dType = 0; break; ++ } ++ ++ emitField(73, 2, dType); ++ } ++ ++ emitGPR (32, insn->src(1)); ++ emitADDR (24, 40, 24, 0, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++static void ++interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) ++{ ++ int ipa = entry->ipa; ++ int loc = entry->loc; ++ ++ if (data.force_persample_interp && ++ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && ++ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { ++ ipa |= NV50_IR_INTERP_CENTROID; ++ } ++ ++ int sample; ++ switch (ipa & NV50_IR_INTERP_SAMPLE_MASK) { ++ case NV50_IR_INTERP_DEFAULT : sample = 0; break; ++ case NV50_IR_INTERP_CENTROID: sample = 1; break; ++ case NV50_IR_INTERP_OFFSET : sample = 2; break; ++ default: assert(!"invalid sample mode"); ++ } ++ ++ int interp; ++ switch (ipa & NV50_IR_INTERP_MODE_MASK) { ++ case NV50_IR_INTERP_LINEAR : ++ case NV50_IR_INTERP_PERSPECTIVE: interp = 0; break; ++ case NV50_IR_INTERP_FLAT : interp = 1; break; ++ case NV50_IR_INTERP_SC : interp = 2; break; ++ default: assert(!"invalid ipa mode"); ++ } ++ ++ code[loc + 2] &= ~(0xf << 12); ++ code[loc + 2] |= sample << 12; ++ code[loc + 2] |= interp << 14; ++} ++ ++void ++CodeEmitterGV100::emitIPA() ++{ ++ emitInsn (0x326); ++ emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL); ++ ++ switch (insn->getInterpMode()) { ++ case NV50_IR_INTERP_LINEAR : ++ case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break; ++ case NV50_IR_INTERP_FLAT : emitField(78, 2, 1); break; ++ case NV50_IR_INTERP_SC : emitField(78, 2, 2); break; ++ default: ++ assert(!"invalid ipa mode"); ++ break; ++ } ++ ++ switch (insn->getSampleMode()) { ++ case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break; ++ case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break; ++ case NV50_IR_INTERP_OFFSET : emitField(76, 2, 2); break; ++ default: ++ assert(!"invalid sample mode"); ++ break; ++ } ++ ++ if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) { ++ emitGPR (32); ++ addInterp(insn->ipa, 0xff, interpApply); ++ } else { ++ emitGPR (32, insn->src(1)); ++ addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply); ++ } ++ ++ assert(!insn->src(0).isIndirect(0)); ++ emitADDR (-1, 64, 8, 2, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitISBERD() ++{ ++ emitInsn(0x923); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitLDSTc(int posm, int poso) ++{ ++ int mode = 0; ++ int order = 1; ++ ++ switch (insn->cache) { ++ case CACHE_CA: mode = 0; order = 1; break; ++ case CACHE_CG: mode = 2; order = 2; break; ++ case CACHE_CV: mode = 3; order = 2; break; ++ default: ++ assert(!"invalid caching mode"); ++ break; ++ } ++ ++ emitField(poso, 2, order); ++ emitField(posm, 2, mode); ++} ++ ++void ++CodeEmitterGV100::emitLDSTs(int pos, DataType type) ++{ ++ int data = 0; ++ ++ switch (typeSizeof(type)) { ++ case 1: data = isSignedType(type) ? 1 : 0; break; ++ case 2: data = isSignedType(type) ? 3 : 2; break; ++ case 4: data = 4; break; ++ case 8: data = 5; break; ++ case 16: data = 6; break; ++ default: ++ assert(!"bad type"); ++ break; ++ } ++ ++ emitField(pos, 3, data); ++} ++ ++void ++CodeEmitterGV100::emitLD() ++{ ++ emitInsn (0x980); ++ emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO ++ emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS ++ emitLDSTs(73, insn->dType); ++ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); ++ emitADDR (24, 32, 32, 0, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitLDC() ++{ ++ emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY); ++ emitField(78, 2, insn->subOp); ++ emitLDSTs(73, insn->dType); ++ emitGPR (24, insn->src(0).getIndirect(0)); ++} ++ ++void ++CodeEmitterGV100::emitLDL() ++{ ++ emitInsn (0x983); ++ emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7 ++ emitLDSTs(73, insn->dType); ++ emitADDR (24, 40, 24, 0, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitLDS() ++{ ++ emitInsn (0x984); ++ emitLDSTs(73, insn->dType); ++ emitADDR (24, 40, 24, 0, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitOUT() ++{ ++ const int cut = insn->op == OP_RESTART || insn->subOp; ++ const int emit = insn->op == OP_EMIT; ++ ++ if (insn->op != OP_FINAL) ++ emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY); ++ else ++ emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY); ++ emitField(78, 2, (cut << 1) | emit); ++} ++ ++void ++CodeEmitterGV100::emitRED() ++{ ++ unsigned dType; ++ ++ switch (insn->dType) { ++ case TYPE_U32: dType = 0; break; ++ case TYPE_S32: dType = 1; break; ++ case TYPE_U64: dType = 2; break; ++ case TYPE_F32: dType = 3; break; ++ case TYPE_B128: dType = 4; break; ++ case TYPE_S64: dType = 5; break; ++ default: assert(!"unexpected dType"); dType = 0; break; ++ } ++ ++ emitInsn (0x98e); ++ emitField(87, 3, insn->subOp); ++ emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA ++ emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3 ++ emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS ++ emitField(73, 3, dType); ++ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); ++ emitGPR (32, insn->src(1)); ++ emitADDR (24, 40, 24, 0, insn->src(0)); ++} ++ ++void ++CodeEmitterGV100::emitST() ++{ ++ emitInsn (0x385); ++ emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO ++ emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS ++ emitLDSTs(73, insn->dType); ++ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); ++ emitGPR (64, insn->src(1)); ++ emitADDR (24, 32, 32, 0, insn->src(0)); ++} ++ ++void ++CodeEmitterGV100::emitSTL() ++{ ++ emitInsn (0x387); ++ emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7 ++ emitLDSTs(73, insn->dType); ++ emitADDR (24, 40, 24, 0, insn->src(0)); ++ emitGPR (32, insn->src(1)); ++} ++ ++void ++CodeEmitterGV100::emitSTS() ++{ ++ emitInsn (0x388); ++ emitLDSTs(73, insn->dType); ++ emitADDR (24, 40, 24, 0, insn->src(0)); ++ emitGPR (32, insn->src(1)); ++} ++ ++/******************************************************************************* ++ * texture ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitTEXs(int pos) ++{ ++ int src1 = insn->predSrc == 1 ? 2 : 1; ++ if (insn->srcExists(src1)) ++ emitGPR(pos, insn->src(src1)); ++ else ++ emitGPR(pos); ++} ++ ++void ++CodeEmitterGV100::emitTEX() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ int lodm = 0; ++ ++ if (!insn->tex.levelZero) { ++ switch (insn->op) { ++ case OP_TEX: lodm = 0; break; ++ case OP_TXB: lodm = 2; break; ++ case OP_TXL: lodm = 3; break; ++ default: ++ assert(!"invalid tex op"); ++ break; ++ } ++ } else { ++ lodm = 1; ++ } ++ ++ if (insn->tex.rIndirectSrc < 0) { ++ emitInsn (0xb60); ++ emitField(54, 5, prog->driver->io.auxCBSlot); ++ emitField(40, 14, insn->tex.r); ++ } else { ++ emitInsn (0x361); ++ emitField(59, 1, 1); // .B ++ } ++ emitField(90, 1, insn->tex.liveOnly); // .NODEP ++ emitField(87, 3, lodm); ++ emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA ++ emitField(78, 1, insn->tex.target.isShadow()); // .DC ++ emitField(77, 1, insn->tex.derivAll); // .NDV ++ emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI ++ emitPRED (81); ++ emitGPR (64, insn->def(1)); ++ emitGPR (16, insn->def(0)); ++ emitGPR (24, insn->src(0)); ++ emitTEXs (32); ++ emitField(63, 1, insn->tex.target.isArray()); ++ emitField(61, 2, insn->tex.target.isCube() ? 3 : ++ insn->tex.target.getDim() - 1); ++ emitField(72, 4, insn->tex.mask); ++} ++ ++void ++CodeEmitterGV100::emitTLD() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ ++ if (insn->tex.rIndirectSrc < 0) { ++ emitInsn (0xb66); ++ emitField(54, 5, prog->driver->io.auxCBSlot); ++ emitField(40, 14, insn->tex.r); ++ } else { ++ emitInsn (0x367); ++ emitField(59, 1, 1); // .B ++ } ++ emitField(90, 1, insn->tex.liveOnly); ++ emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */); ++ emitPRED (81); ++ emitField(78, 1, insn->tex.target.isMS()); ++ emitField(76, 1, insn->tex.useOffsets == 1); ++ emitField(72, 4, insn->tex.mask); ++ emitGPR (64, insn->def(1)); ++ emitField(63, 1, insn->tex.target.isArray()); ++ emitField(61, 2, insn->tex.target.isCube() ? 3 : ++ insn->tex.target.getDim() - 1); ++ emitTEXs (32); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitTLD4() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ ++ int offsets = 0; ++ switch (insn->tex.useOffsets) { ++ case 4: offsets = 2; break; ++ case 1: offsets = 1; break; ++ case 0: offsets = 0; break; ++ default: assert(!"invalid offsets count"); break; ++ } ++ ++ if (insn->tex.rIndirectSrc < 0) { ++ emitInsn (0xb63); ++ emitField(54, 5, prog->driver->io.auxCBSlot); ++ emitField(40, 14, insn->tex.r); ++ } else { ++ emitInsn (0x364); ++ emitField(59, 1, 1); // .B ++ } ++ emitField(90, 1, insn->tex.liveOnly); ++ emitField(87, 2, insn->tex.gatherComp); ++ emitField(84, 1, 1); // !.EF ++ emitPRED (81); ++ emitField(78, 1, insn->tex.target.isShadow()); ++ emitField(76, 2, offsets); ++ emitField(72, 4, insn->tex.mask); ++ emitGPR (64, insn->def(1)); ++ emitField(63, 1, insn->tex.target.isArray()); ++ emitField(61, 2, insn->tex.target.isCube() ? 3 : ++ insn->tex.target.getDim() - 1); ++ emitTEXs (32); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitTMML() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ ++ if (insn->tex.rIndirectSrc < 0) { ++ emitInsn (0xb69); ++ emitField(54, 5, prog->driver->io.auxCBSlot); ++ emitField(40, 14, insn->tex.r); ++ } else { ++ emitInsn (0x36a); ++ emitField(59, 1, 1); // .B ++ } ++ emitField(90, 1, insn->tex.liveOnly); ++ emitField(77, 1, insn->tex.derivAll); ++ emitField(72, 4, insn->tex.mask); ++ emitGPR (64, insn->def(1)); ++ emitField(63, 1, insn->tex.target.isArray()); ++ emitField(61, 2, insn->tex.target.isCube() ? 3 : ++ insn->tex.target.getDim() - 1); ++ emitTEXs (32); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitTXD() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ ++ if (insn->tex.rIndirectSrc < 0) { ++ emitInsn (0xb6c); ++ emitField(54, 5, prog->driver->io.auxCBSlot); ++ emitField(40, 14, insn->tex.r); ++ } else { ++ emitInsn (0x36d); ++ emitField(59, 1, 1); // .B ++ } ++ emitField(90, 1, insn->tex.liveOnly); ++ emitPRED (81); ++ emitField(76, 1, insn->tex.useOffsets == 1); ++ emitField(72, 4, insn->tex.mask); ++ emitGPR (64, insn->def(1)); ++ emitField(63, 1, insn->tex.target.isArray()); ++ emitField(61, 2, insn->tex.target.isCube() ? 3 : ++ insn->tex.target.getDim() - 1); ++ emitTEXs (32); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitTXQ() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ int type = 0; ++ ++ switch (insn->tex.query) { ++ case TXQ_DIMS : type = 0x00; break; ++ case TXQ_TYPE : type = 0x01; break; ++ case TXQ_SAMPLE_POSITION: type = 0x02; break; ++ default: ++ assert(!"invalid txq query"); ++ break; ++ } ++ ++ if (insn->tex.rIndirectSrc < 0) { ++ emitInsn (0xb6f); ++ emitField(54, 5, prog->driver->io.auxCBSlot); ++ emitField(40, 14, insn->tex.r); ++ } else { ++ emitInsn (0x370); ++ emitField(59, 1, 1); // .B ++ } ++ emitField(90, 1, insn->tex.liveOnly); ++ emitField(72, 4, insn->tex.mask); ++ emitGPR (64, insn->def(1)); ++ emitField(62, 2, type); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++/******************************************************************************* ++ * surface ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitSUHandle(const int s) ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ ++ assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); ++ ++ if (insn->src(s).getFile() == FILE_GPR) { ++ emitGPR(64, insn->src(s)); ++ } else { ++ assert(0); ++ //XXX: not done ++ ImmediateValue *imm = insn->getSrc(s)->asImm(); ++ assert(imm); ++ emitField(0x33, 1, 1); ++ emitField(0x24, 13, imm->reg.data.u32); ++ } ++} ++ ++void ++CodeEmitterGV100::emitSUTarget() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ int target = 0; ++ ++ assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP); ++ ++ if (insn->tex.target == TEX_TARGET_BUFFER) { ++ target = 1; ++ } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) { ++ target = 2; ++ } else if (insn->tex.target == TEX_TARGET_2D || ++ insn->tex.target == TEX_TARGET_RECT) { ++ target = 3; ++ } else if (insn->tex.target == TEX_TARGET_2D_ARRAY || ++ insn->tex.target == TEX_TARGET_CUBE || ++ insn->tex.target == TEX_TARGET_CUBE_ARRAY) { ++ target = 4; ++ } else if (insn->tex.target == TEX_TARGET_3D) { ++ target = 5; ++ } else { ++ assert(insn->tex.target == TEX_TARGET_1D); ++ } ++ emitField(61, 3, target); ++} ++ ++void ++CodeEmitterGV100::emitSUATOM() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ uint8_t type = 0, subOp; ++ ++ if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) ++ emitInsn(0x396); // SUATOM.D.CAS ++ else ++ emitInsn(0x394); // SUATOM.D ++ ++ emitSUTarget(); ++ ++ // destination type ++ switch (insn->dType) { ++ case TYPE_S32: type = 1; break; ++ case TYPE_U64: type = 2; break; ++ case TYPE_F32: type = 3; break; ++ case TYPE_S64: type = 5; break; ++ default: ++ assert(insn->dType == TYPE_U32); ++ break; ++ } ++ ++ // atomic operation ++ if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) { ++ subOp = 0; ++ } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) { ++ subOp = 8; ++ } else { ++ subOp = insn->subOp; ++ } ++ ++ emitField(87, 4, subOp); ++ emitPRED (81); ++ emitField(79, 2, 1); ++ emitField(73, 3, type); ++ emitField(72, 1, 0); // .BA ++ emitGPR (32, insn->src(1)); ++ emitGPR (24, insn->src(0)); ++ emitGPR (16, insn->def(0)); ++ ++ emitSUHandle(2); ++} ++ ++void ++CodeEmitterGV100::emitSULD() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ int type = 0; ++ ++ if (insn->op == OP_SULDB) { ++ emitInsn(0x99a); ++ emitSUTarget(); ++ ++ switch (insn->dType) { ++ case TYPE_U8: type = 0; break; ++ case TYPE_S8: type = 1; break; ++ case TYPE_U16: type = 2; break; ++ case TYPE_S16: type = 3; break; ++ case TYPE_U32: type = 4; break; ++ case TYPE_U64: type = 5; break; ++ case TYPE_B128: type = 6; break; ++ default: ++ assert(0); ++ break; ++ } ++ emitField(73, 3, type); ++ } else { ++ emitInsn(0x998); ++ emitSUTarget(); ++ emitField(72, 4, 0xf); // rgba ++ } ++ ++ emitPRED (81); ++ emitLDSTc(77, 79); ++ ++ emitGPR (16, insn->def(0)); ++ emitGPR (24, insn->src(0)); ++ ++ emitSUHandle(1); ++} ++ ++void ++CodeEmitterGV100::emitSUST() ++{ ++ const TexInstruction *insn = this->insn->asTex(); ++ ++ emitInsn(0x99c); // SUST.P ++#if 0 ++ if (insn->op == OP_SUSTB) ++ emitField(0x34, 1, 1); ++#endif ++ emitSUTarget(); ++ ++ emitLDSTc(77, 79); ++ emitField(72, 4, 0xf); // rgba ++ emitGPR(32, insn->src(1)); ++ emitGPR(24, insn->src(0)); ++ emitSUHandle(2); ++} ++ ++/******************************************************************************* ++ * misc ++ ******************************************************************************/ ++ ++void ++CodeEmitterGV100::emitAL2P() ++{ ++ emitInsn (0x920); ++ emitO (79); ++ emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1); ++ emitField(40, 11, insn->src(0).get()->reg.data.offset); ++ emitGPR (24, insn->src(0).getIndirect(0)); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitBAR() ++{ ++ uint8_t subop, redop = 0x00; ++ ++ // 80 ++ // 01: DEFER_BLOCKING ++ // 78:77 ++ // 00: SYNC ++ // 01: ARV ++ // 02: RED ++ // 03: SCAN ++ // 75:74 ++ // 00: RED.POPC ++ // 01: RED.AND ++ // 02: RED.OR ++ ++ switch (insn->subOp) { ++ case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break; ++ case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break; ++ case NV50_IR_SUBOP_BAR_RED_OR : subop = 0x02; redop = 0x02; break; ++ case NV50_IR_SUBOP_BAR_ARRIVE : subop = 0x01; break; ++ default: ++ subop = 0x00; ++ assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC); ++ break; ++ } ++ ++ if (insn->src(0).getFile() == FILE_GPR) { ++ emitInsn ((1 << 9) | 0x11d); ++ emitGPR (32, insn->src(0)); //XXX: nvdisasm shows src0==src1 ++ } else { ++ ImmediateValue *imm = insn->getSrc(0)->asImm(); ++ assert(imm); ++ if (insn->src(1).getFile() == FILE_GPR) { ++ emitInsn ((4 << 9) | 0x11d); ++ emitGPR (32, insn->src(1)); ++ } else { ++ emitInsn ((5 << 9) | 0x11d); ++ } ++ emitField(54, 4, imm->reg.data.u32); ++ } ++ ++ emitField(77, 2, subop); ++ emitField(74, 2, redop); ++ ++ if (insn->srcExists(2) && (insn->predSrc != 2)) { ++ emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT)); ++ emitPRED (87, insn->src(2)); ++ } else { ++ emitField(87, 3, 7); ++ } ++} ++ ++void ++CodeEmitterGV100::emitCCTL() ++{ ++ if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) ++ emitInsn(0x98f); ++ else ++ emitInsn(0x990); ++ emitField(87, 4, insn->subOp); ++ emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8); ++ emitADDR (24, 32, 32, 0, insn->src(0)); ++} ++ ++void ++CodeEmitterGV100::emitMEMBAR() ++{ ++ emitInsn (0x992); ++ switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) { ++ case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break; ++ case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break; ++ case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break; ++ default: ++ assert(!"invalid scope"); ++ break; ++ } ++} ++ ++void ++CodeEmitterGV100::emitPIXLD() ++{ ++ emitInsn (0x925); ++ switch (insn->subOp) { ++ case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK ++ case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX ++ default: ++ assert(0); ++ break; ++ } ++ emitPRED (71); ++ emitGPR (16, insn->def(0)); ++} ++ ++void ++CodeEmitterGV100::emitPLOP3_LUT() ++{ ++ uint8_t op[2] = {}; ++ ++ switch (insn->op) { ++ case OP_AND: op[0] = 0xf0 & 0xcc; break; ++ case OP_OR : op[0] = 0xf0 | 0xcc; break; ++ case OP_XOR: op[0] = 0xf0 ^ 0xcc; break; ++ default: ++ assert(!"invalid PLOP3"); ++ break; ++ } ++ ++ emitInsn(0x81c); ++ emitNOT (90, insn->src(0)); ++ emitPRED(87, insn->src(0)); ++ emitPRED(84); // def(1) ++ emitPRED(81, insn->def(0)); ++ emitNOT (80, insn->src(1)); ++ emitPRED(77, insn->src(1)); ++ emitField(72, 5, op[0] >> 3); ++ emitNOT (71); // src(2) ++ emitPRED(68); // src(2) ++ emitField(64, 3, op[0] & 7); ++ emitField(16, 8, op[1]); ++} ++ ++void ++CodeEmitterGV100::emitVOTE() ++{ ++ const ImmediateValue *imm; ++ uint32_t u32; ++ ++ int r = -1, p = -1; ++ for (int i = 0; insn->defExists(i); i++) { ++ if (insn->def(i).getFile() == FILE_GPR) ++ r = i; ++ else if (insn->def(i).getFile() == FILE_PREDICATE) ++ p = i; ++ } ++ ++ emitInsn (0x806); ++ emitField(72, 2, insn->subOp); ++ if (r >= 0) ++ emitGPR (16, insn->def(r)); ++ else ++ emitGPR (16); ++ if (p >= 0) ++ emitPRED (81, insn->def(p)); ++ else ++ emitPRED (81); ++ ++ switch (insn->src(0).getFile()) { ++ case FILE_PREDICATE: ++ emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT)); ++ emitPRED (87, insn->src(0)); ++ break; ++ case FILE_IMMEDIATE: ++ imm = insn->getSrc(0)->asImm(); ++ assert(imm); ++ u32 = imm->reg.data.u32; ++ assert(u32 == 0 || u32 == 1); ++ emitField(90, 1, u32 == 0); ++ emitPRED (87); ++ break; ++ default: ++ assert(!"Unhandled src"); ++ break; ++ } ++} ++ ++bool ++CodeEmitterGV100::emitInstruction(Instruction *i) ++{ ++ insn = i; ++ ++ switch (insn->op) { ++ case OP_ABS: ++ assert(!isFloatType(insn->dType)); ++ emitIABS(); ++ break; ++ case OP_ADD: ++ if (isFloatType(insn->dType)) { ++ if (insn->dType == TYPE_F32) ++ emitFADD(); ++ else ++ emitDADD(); ++ } else { ++ emitIADD3(); ++ } ++ break; ++ case OP_AFETCH: ++ emitAL2P(); ++ break; ++ case OP_AND: ++ case OP_OR: ++ case OP_XOR: ++ if (insn->def(0).getFile() == FILE_PREDICATE) { ++ emitPLOP3_LUT(); ++ } else { ++ assert(!"invalid logop"); ++ emitNOP(); ++ } ++ break; ++ case OP_ATOM: ++ if (insn->src(0).getFile() == FILE_MEMORY_SHARED) ++ emitATOMS(); ++ else ++ if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS) ++ emitRED(); ++ else ++ emitATOM(); ++ break; ++ case OP_BAR: ++ emitBAR(); ++ break; ++ case OP_BFIND: ++ emitFLO(); ++ break; ++ case OP_BMSK: ++ emitBMSK(); ++ break; ++ case OP_BREV: ++ emitBREV(); ++ break; ++ case OP_BRA: ++ case OP_JOIN: //XXX ++ emitBRA(); ++ break; ++ case OP_CCTL: ++ emitCCTL(); ++ break; ++ case OP_CEIL: ++ case OP_CVT: ++ case OP_FLOOR: ++ case OP_TRUNC: ++ if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE || ++ insn->src(0).getFile() == FILE_PREDICATE)) { ++ emitMOV(); ++ } else if (isFloatType(insn->dType)) { ++ if (isFloatType(insn->sType)) { ++ if (insn->sType == insn->dType) ++ emitFRND(); ++ else ++ emitF2F(); ++ } else { ++ emitI2F(); ++ } ++ } else { ++ if (isFloatType(insn->sType)) { ++ emitF2I(); ++ } else { ++ assert(!"I2I"); ++ emitNOP(); ++ } ++ } ++ break; ++ case OP_COS: ++ case OP_EX2: ++ case OP_LG2: ++ case OP_RCP: ++ case OP_RSQ: ++ case OP_SIN: ++ case OP_SQRT: ++ emitMUFU(); ++ break; ++ case OP_DISCARD: ++ emitKILL(); ++ break; ++ case OP_EMIT: ++ case OP_FINAL: ++ case OP_RESTART: ++ emitOUT(); ++ break; ++ case OP_EXIT: ++ emitEXIT(); ++ break; ++ case OP_EXPORT: ++ emitAST(); ++ break; ++ case OP_FMA: ++ case OP_MAD: ++ if (isFloatType(insn->dType)) { ++ if (insn->dType == TYPE_F32) ++ emitFFMA(); ++ else ++ emitDFMA(); ++ } else { ++ if (typeSizeof(insn->dType) != 8) ++ emitIMAD(); ++ else ++ emitIMAD_WIDE(); ++ } ++ break; ++ case OP_JOINAT: //XXX ++ emitNOP(); ++ break; ++ case OP_LINTERP: ++ emitIPA(); ++ break; ++ case OP_LOAD: ++ switch (insn->src(0).getFile()) { ++ case FILE_MEMORY_CONST : emitLDC(); break; ++ case FILE_MEMORY_LOCAL : emitLDL(); break; ++ case FILE_MEMORY_SHARED: emitLDS(); break; ++ case FILE_MEMORY_GLOBAL: emitLD(); break; ++ default: ++ assert(!"invalid load"); ++ emitNOP(); ++ break; ++ } ++ break; ++ case OP_LOP3_LUT: ++ emitLOP3_LUT(); ++ break; ++ case OP_MAX: ++ case OP_MIN: ++ if (isFloatType(insn->dType)) { ++ if (insn->dType == TYPE_F32) { ++ emitFMNMX(); ++ } else { ++ assert(!"invalid FMNMX"); ++ emitNOP(); ++ } ++ } else { ++ assert(!"invalid MNMX"); ++ emitNOP(); ++ } ++ break; ++ case OP_MEMBAR: ++ emitMEMBAR(); ++ break; ++ case OP_MOV: ++ emitMOV(); ++ break; ++ case OP_MUL: ++ if (isFloatType(insn->dType)) { ++ if (insn->dType == TYPE_F32) ++ emitFMUL(); ++ else ++ emitDMUL(); ++ } else { ++ assert(!"invalid IMUL"); ++ emitNOP(); ++ } ++ break; ++ case OP_PERMT: ++ emitPRMT(); ++ break; ++ case OP_PFETCH: ++ emitISBERD(); ++ break; ++ case OP_PIXLD: ++ emitPIXLD(); ++ break; ++ case OP_POPCNT: ++ emitPOPC(); ++ break; ++ case OP_QUADOP: ++ emitFSWZADD(); ++ break; ++ case OP_RDSV: ++ if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv)) ++ emitCS2R(); ++ else ++ emitS2R(); ++ break; ++ case OP_SELP: ++ emitSEL(); ++ break; ++ case OP_SET: ++ case OP_SET_AND: ++ case OP_SET_OR: ++ case OP_SET_XOR: ++ if (insn->def(0).getFile() != FILE_PREDICATE) { ++ if (isFloatType(insn->dType)) { ++ if (insn->dType == TYPE_F32) { ++ emitFSET_BF(); ++ } else { ++ assert(!"invalid FSET"); ++ emitNOP(); ++ } ++ } else { ++ assert(!"invalid SET"); ++ emitNOP(); ++ } ++ } else { ++ if (isFloatType(insn->sType)) ++ if (insn->sType == TYPE_F64) ++ emitDSETP(); ++ else ++ emitFSETP(); ++ else ++ emitISETP(); ++ } ++ break; ++ case OP_SGXT: ++ emitSGXT(); ++ break; ++ case OP_SHF: ++ emitSHF(); ++ break; ++ case OP_SHFL: ++ emitSHFL(); ++ break; ++ case OP_SHLADD: ++ emitLEA(); ++ break; ++ case OP_STORE: ++ switch (insn->src(0).getFile()) { ++ case FILE_MEMORY_LOCAL : emitSTL(); break; ++ case FILE_MEMORY_SHARED: emitSTS(); break; ++ case FILE_MEMORY_GLOBAL: emitST(); break; ++ default: ++ assert(!"invalid store"); ++ emitNOP(); ++ break; ++ } ++ break; ++ case OP_SULDB: ++ case OP_SULDP: ++ emitSULD(); ++ break; ++ case OP_SUREDB: ++ case OP_SUREDP: ++ emitSUATOM(); ++ break; ++ case OP_SUSTB: ++ case OP_SUSTP: ++ emitSUST(); ++ break; ++ case OP_TEX: ++ case OP_TXB: ++ case OP_TXL: ++ emitTEX(); ++ break; ++ case OP_TXD: ++ emitTXD(); ++ break; ++ case OP_TXF: ++ emitTLD(); ++ break; ++ case OP_TXG: ++ emitTLD4(); ++ break; ++ case OP_TXLQ: ++ emitTMML(); ++ break; ++ case OP_TXQ: ++ emitTXQ(); ++ break; ++ case OP_VFETCH: ++ emitALD(); ++ break; ++ case OP_VOTE: ++ emitVOTE(); ++ break; ++ case OP_WARPSYNC: ++ emitWARPSYNC(); ++ break; ++ default: ++ assert(!"invalid opcode"); ++ emitNOP(); ++ break; ++ } ++ ++ code[3] &= 0x000001ff; ++ code[3] |= insn->sched << 9; ++ code += 4; ++ codeSize += 16; ++ return true; ++} ++ ++void ++CodeEmitterGV100::prepareEmission(BasicBlock *bb) ++{ ++ Function *func = bb->getFunction(); ++ Instruction *i; ++ int j; ++ ++ for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j); ++ ++ for (; j >= 0; --j) { ++ BasicBlock *in = func->bbArray[j]; ++ Instruction *exit = in->getExit(); ++ ++ if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) { ++ in->binSize -= 16; ++ func->binSize -= 16; ++ ++ for (++j; j < func->bbCount; ++j) ++ func->bbArray[j]->binPos -= 16; ++ ++ in->remove(exit); ++ } ++ bb->binPos = in->binPos + in->binSize; ++ if (in->binSize) // no more no-op branches to bb ++ break; ++ } ++ func->bbArray[func->bbCount++] = bb; ++ ++ if (!bb->getExit()) ++ return; ++ ++ for (i = bb->getEntry(); i; i = i->next) { ++ i->encSize = getMinEncodingSize(i); ++ bb->binSize += i->encSize; ++ } ++ ++ assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16)); ++ ++ func->binSize += bb->binSize; ++} ++ ++void ++CodeEmitterGV100::prepareEmission(Function *func) ++{ ++ SchedDataCalculatorGM107 sched(targ); ++ CodeEmitter::prepareEmission(func); ++ sched.run(func, true, true); ++} ++ ++void ++CodeEmitterGV100::prepareEmission(Program *prog) ++{ ++ for (ArrayList::Iterator fi = prog->allFuncs.iterator(); ++ !fi.end(); fi.next()) { ++ Function *func = reinterpret_cast(fi.get()); ++ func->binPos = prog->binSize; ++ prepareEmission(func); ++ prog->binSize += func->binSize; ++ } ++ ++ this->prog = prog; ++} ++ ++CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target) ++ : CodeEmitter(target), targ(target) ++{ ++ code = NULL; ++ codeSize = codeSizeLimit = 0; ++ relocInfo = NULL; ++} ++}; +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h +new file mode 100644 +index 00000000000..15ab717e460 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h +@@ -0,0 +1,403 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef __NV50_IR_EMIT_GV100_H__ ++#define __NV50_IR_EMIT_GV100_H__ ++#include "codegen/nv50_ir_target_gv100.h" ++ ++namespace nv50_ir { ++ ++class CodeEmitterGV100 : public CodeEmitter { ++public: ++ CodeEmitterGV100(TargetGV100 *target); ++ ++ virtual bool emitInstruction(Instruction *); ++ virtual uint32_t getMinEncodingSize(const Instruction *) const { return 16; } ++ ++private: ++ const Program *prog; ++ const TargetGV100 *targ; ++ const Instruction *insn; ++ ++ virtual void prepareEmission(Program *); ++ virtual void prepareEmission(Function *); ++ virtual void prepareEmission(BasicBlock *); ++ ++ inline void emitInsn(uint32_t op) { ++ code[0] = op; ++ code[1] = 0; ++ code[2] = 0; ++ code[3] = 0; ++ if (insn->predSrc >= 0) { ++ emitField(12, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id); ++ emitField(15, 1, insn->cc == CC_NOT_P); ++ } else { ++ emitField(12, 3, 7); ++ } ++ }; ++ ++ inline void emitField(int b, int s, uint64_t v) { ++ if (b >= 0) { ++ uint64_t m = ~0ULL >> (64 - s); ++ uint64_t d = v & m; ++ assert(!(v & ~m) || (v & ~m) == ~m); ++ if (b < 64 && b + s > 64) { ++ *(uint64_t *)&code[0] |= d << b; ++ *(uint64_t *)&code[2] |= d >> (64 - b); ++ } else { ++ *(uint64_t *)&code[(b/64*2)] |= d << (b & 0x3f); ++ } ++ } ++ }; ++ ++ inline void emitABS(int pos, int src, bool supported) ++ { ++ if (insn->src(src).mod.abs()) { ++ assert(supported); ++ emitField(pos, 1, 1); ++ } ++ } ++ ++ inline void emitABS(int pos, int src) ++ { ++ emitABS(pos, src, true); ++ } ++ ++ inline void emitNEG(int pos, int src, bool supported) { ++ if (insn->src(src).mod.neg()) { ++ assert(supported); ++ emitField(pos, 1, 1); ++ } ++ } ++ ++ inline void emitNEG(int pos, int src) { ++ emitNEG(pos, src, true); ++ } ++ ++ inline void emitNOT(int pos) { ++ emitField(pos, 1, 0); ++ }; ++ ++ inline void emitNOT(int pos, const ValueRef &ref) { ++ emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT))); ++ } ++ ++ inline void emitSAT(int pos) { ++ emitField(pos, 1, insn->saturate); ++ } ++ ++ inline void emitRND(int rmp, RoundMode rnd, int rip) { ++ int rm = 0, ri = 0; ++ switch (rnd) { ++ case ROUND_NI: ri = 1; ++ case ROUND_N : rm = 0; break; ++ case ROUND_MI: ri = 1; ++ case ROUND_M : rm = 1; break; ++ case ROUND_PI: ri = 1; ++ case ROUND_P : rm = 2; break; ++ case ROUND_ZI: ri = 1; ++ case ROUND_Z : rm = 3; break; ++ default: ++ assert(!"invalid round mode"); ++ break; ++ } ++ emitField(rip, 1, ri); ++ emitField(rmp, 2, rm); ++ } ++ ++ inline void emitRND(int pos) { ++ emitRND(pos, insn->rnd, -1); ++ } ++ ++ inline void emitFMZ(int pos, int len) { ++ emitField(pos, len, insn->dnz << 1 | insn->ftz); ++ } ++ ++ inline void emitPDIV(int pos) { ++ emitField(pos, 3, insn->postFactor + 4); ++ } ++ ++ inline void emitO(int pos) { ++ emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT); ++ } ++ ++ inline void emitP(int pos) { ++ emitField(pos, 1, insn->perPatch); ++ } ++ ++ inline void emitCond3(int pos, CondCode code) { ++ int data = 0; ++ ++ switch (code) { ++ case CC_FL : data = 0x00; break; ++ case CC_LTU: ++ case CC_LT : data = 0x01; break; ++ case CC_EQU: ++ case CC_EQ : data = 0x02; break; ++ case CC_LEU: ++ case CC_LE : data = 0x03; break; ++ case CC_GTU: ++ case CC_GT : data = 0x04; break; ++ case CC_NEU: ++ case CC_NE : data = 0x05; break; ++ case CC_GEU: ++ case CC_GE : data = 0x06; break; ++ case CC_TR : data = 0x07; break; ++ default: ++ assert(!"invalid cond3"); ++ break; ++ } ++ ++ emitField(pos, 3, data); ++ } ++ ++ inline void emitCond4(int pos, CondCode code) { ++ int data = 0; ++ ++ switch (code) { ++ case CC_FL: data = 0x00; break; ++ case CC_LT: data = 0x01; break; ++ case CC_EQ: data = 0x02; break; ++ case CC_LE: data = 0x03; break; ++ case CC_GT: data = 0x04; break; ++ case CC_NE: data = 0x05; break; ++ case CC_GE: data = 0x06; break; ++ // case CC_NUM: data = 0x07; break; ++ // case CC_NAN: data = 0x08; break; ++ case CC_LTU: data = 0x09; break; ++ case CC_EQU: data = 0x0a; break; ++ case CC_LEU: data = 0x0b; break; ++ case CC_GTU: data = 0x0c; break; ++ case CC_NEU: data = 0x0d; break; ++ case CC_GEU: data = 0x0e; break; ++ case CC_TR: data = 0x0f; break; ++ default: ++ assert(!"invalid cond4"); ++ break; ++ } ++ ++ emitField(pos, 4, data); ++ } ++ ++ inline void emitSYS(int pos, const Value *val) { ++ int id = val ? val->reg.data.id : -1; ++ ++ switch (id) { ++ case SV_LANEID : id = 0x00; break; ++ case SV_VERTEX_COUNT : id = 0x10; break; ++ case SV_INVOCATION_ID : id = 0x11; break; ++ case SV_THREAD_KILL : id = 0x13; break; ++ case SV_INVOCATION_INFO: id = 0x1d; break; ++ case SV_COMBINED_TID : id = 0x20; break; ++ case SV_TID : id = 0x21 + val->reg.data.sv.index; break; ++ case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break; ++ case SV_LANEMASK_EQ : id = 0x38; break; ++ case SV_LANEMASK_LT : id = 0x39; break; ++ case SV_LANEMASK_LE : id = 0x3a; break; ++ case SV_LANEMASK_GT : id = 0x3b; break; ++ case SV_LANEMASK_GE : id = 0x3c; break; ++ case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break; ++ default: ++ assert(!"invalid system value"); ++ id = 0; ++ break; ++ } ++ ++ emitField(pos, 8, id); ++ } ++ ++ inline void emitSYS(int pos, const ValueRef &ref) { ++ emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL); ++ } ++ ++ inline void emitGPR(int pos, const Value *val, int off) { ++ emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ? ++ val->reg.data.id + off: 255); ++ } ++ ++ inline void emitGPR(int pos, const Value *v) { ++ emitGPR(pos, v, 0); ++ } ++ ++ inline void emitGPR(int pos) { ++ emitGPR(pos, (const Value *)NULL); ++ } ++ ++ inline void emitGPR(int pos, const ValueRef &ref) { ++ emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL); ++ } ++ ++ inline void emitGPR(int pos, const ValueRef *ref) { ++ emitGPR(pos, ref ? ref->rep() : (const Value *)NULL); ++ } ++ ++ inline void emitGPR(int pos, const ValueDef &def) { ++ emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL); ++ } ++ ++ inline void emitGPR(int pos, const ValueDef &def, int off) { ++ emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL, off); ++ } ++ ++ inline void emitPRED(int pos, const Value *val) { ++ emitField(pos, 3, val ? val->reg.data.id : 7); ++ }; ++ ++ inline void emitPRED(int pos) { ++ emitPRED(pos, (const Value *)NULL); ++ } ++ ++ inline void emitPRED(int pos, const ValueRef &ref) { ++ emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL); ++ } ++ ++ inline void emitPRED(int pos, const ValueDef &def) { ++ emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL); ++ } ++ ++ inline void emitCBUF(int buf, int gpr, int off, int len, int align, ++ const ValueRef &ref) { ++ const Value *v = ref.get(); ++ const Symbol *s = v->asSym(); ++ ++ assert(!(s->reg.data.offset & ((1 << align) - 1))); ++ ++ emitField(buf, 5, v->reg.fileIndex); ++ if (gpr >= 0) ++ emitGPR(gpr, ref.getIndirect(0)); ++ emitField(off, 16, s->reg.data.offset); ++ } ++ ++ inline void emitIMMD(int pos, int len, const ValueRef &ref) { ++ const ImmediateValue *imm = ref.get()->asImm(); ++ uint32_t val = imm->reg.data.u32; ++ ++ if (insn->sType == TYPE_F64) { ++ assert(!(imm->reg.data.u64 & 0x00000000ffffffffULL)); ++ val = imm->reg.data.u64 >> 32; ++ } ++ ++ emitField(pos, len, val); ++ } ++ ++ inline void emitADDR(int gpr, int off, int len, int shr, ++ const ValueRef &ref) { ++ const Value *v = ref.get(); ++ assert(!(v->reg.data.offset & ((1 << shr) - 1))); ++ if (gpr >= 0) ++ emitGPR(gpr, ref.getIndirect(0)); ++ emitField(off, len, v->reg.data.offset >> shr); ++ } ++ ++ inline void emitFormA(uint16_t op, uint8_t forms, int src0, int src1, int src2); ++ inline void emitFormA_RRR(uint16_t op, int src1, int src2); ++ inline void emitFormA_RRI(uint16_t op, int src1, int src2); ++ inline void emitFormA_RRC(uint16_t op, int src1, int src2); ++ inline void emitFormA_I32(int src); ++ ++ void emitBRA(); ++ void emitEXIT(); ++ void emitKILL(); ++ void emitNOP(); ++ void emitWARPSYNC(); ++ ++ void emitCS2R(); ++ void emitF2F(); ++ void emitF2I(); ++ void emitFRND(); ++ void emitI2F(); ++ void emitMOV(); ++ void emitPRMT(); ++ void emitS2R(); ++ void emitSEL(); ++ void emitSHFL(); ++ ++ void emitFADD(); ++ void emitFFMA(); ++ void emitFMNMX(); ++ void emitFMUL(); ++ void emitFSET_BF(); ++ void emitFSETP(); ++ void emitFSWZADD(); ++ void emitMUFU(); ++ ++ void emitDADD(); ++ void emitDFMA(); ++ void emitDMUL(); ++ void emitDSETP(); ++ ++ void emitBMSK(); ++ void emitBREV(); ++ void emitFLO(); ++ void emitIABS(); ++ void emitIADD3(); ++ void emitIMAD(); ++ void emitIMAD_WIDE(); ++ void emitISETP(); ++ void emitLEA(); ++ void emitLOP3_LUT(); ++ void emitPOPC(); ++ void emitSGXT(); ++ void emitSHF(); ++ ++ void emitALD(); ++ void emitAST(); ++ void emitATOM(); ++ void emitATOMS(); ++ void emitIPA(); ++ void emitISBERD(); ++ void emitLDSTc(int, int); ++ void emitLDSTs(int, DataType); ++ void emitLD(); ++ void emitLDC(); ++ void emitLDL(); ++ void emitLDS(); ++ void emitOUT(); ++ void emitRED(); ++ void emitST(); ++ void emitSTL(); ++ void emitSTS(); ++ ++ void emitTEXs(int); ++ void emitTEX(); ++ void emitTLD(); ++ void emitTLD4(); ++ void emitTMML(); ++ void emitTXD(); ++ void emitTXQ(); ++ ++ void emitSUHandle(const int); ++ void emitSUTarget(); ++ void emitSUATOM(); ++ void emitSULD(); ++ void emitSUST(); ++ ++ void emitAL2P(); ++ void emitBAR(); ++ void emitCCTL(); ++ void emitMEMBAR(); ++ void emitPIXLD(); ++ void emitPLOP3_LUT(); ++ void emitVOTE(); ++}; ++ ++}; ++#endif +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +index bd78b76f384..eee9aa67256 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +@@ -170,6 +170,7 @@ private: + NirArrayLMemOffsets regToLmemOffset; + NirBlockMap blocks; + unsigned int curLoopDepth; ++ unsigned int curIfDepth; + + BasicBlock *exit; + Value *zero; +@@ -188,6 +189,7 @@ Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) + : ConverterCommon(prog, info), + nir(nir), + curLoopDepth(0), ++ curIfDepth(0), + clipVertexOutput(-1) + { + zero = mkImm((uint32_t)0); +@@ -571,6 +573,10 @@ Converter::getSubOp(nir_op op) + case nir_op_imul_high: + case nir_op_umul_high: + return NV50_IR_SUBOP_MUL_HIGH; ++ case nir_op_ishl: ++ case nir_op_ishr: ++ case nir_op_ushr: ++ return NV50_IR_SUBOP_SHIFT_WRAP; + default: + return 0; + } +@@ -909,7 +915,7 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, + uint16_t slots; + switch (stage) { + case Program::TYPE_GEOMETRY: +- slots = type->uniform_locations(); ++ slots = type->count_attribute_slots(false); + if (input) + slots /= info.gs.vertices_in; + break; +@@ -917,9 +923,9 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, + case Program::TYPE_TESSELLATION_EVAL: + // remove first dimension + if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL)) +- slots = type->uniform_locations(); ++ slots = type->count_attribute_slots(false); + else +- slots = type->fields.array->uniform_locations(); ++ slots = type->fields.array->count_attribute_slots(false); + break; + default: + slots = type->count_attribute_slots(false); +@@ -929,6 +935,24 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, + return slots; + } + ++static uint8_t ++getMaskForType(const glsl_type *type, uint8_t slot) { ++ uint16_t comp = type->without_array()->components(); ++ comp = comp ? comp : 4; ++ ++ if (glsl_base_type_is_64bit(type->without_array()->base_type)) { ++ comp *= 2; ++ if (comp > 4) { ++ if (slot % 2) ++ comp -= 4; ++ else ++ comp = 4; ++ } ++ } ++ ++ return (1 << comp) - 1; ++} ++ + bool Converter::assignSlots() { + unsigned name; + unsigned index; +@@ -981,16 +1005,8 @@ bool Converter::assignSlots() { + const glsl_type *type = var->type; + int slot = var->data.location; + uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var); +- uint32_t comp = type->is_array() ? type->without_array()->component_slots() +- : type->component_slots(); +- uint32_t frac = var->data.location_frac; + uint32_t vary = var->data.driver_location; + +- if (glsl_base_type_is_64bit(type->without_array()->base_type)) { +- if (comp > 2) +- slots *= 2; +- } +- + assert(vary + slots <= PIPE_MAX_SHADER_INPUTS); + + switch(prog->getType()) { +@@ -1014,6 +1030,8 @@ bool Converter::assignSlots() { + info->numPatchConstants = MAX2(info->numPatchConstants, index + slots); + break; + case Program::TYPE_VERTEX: ++ if (slot >= VERT_ATTRIB_GENERIC0) ++ slot = VERT_ATTRIB_GENERIC0 + vary; + vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index); + switch (name) { + case TGSI_SEMANTIC_EDGEFLAG: +@@ -1029,17 +1047,12 @@ bool Converter::assignSlots() { + } + + for (uint16_t i = 0u; i < slots; ++i, ++vary) { +- info->in[vary].id = vary; +- info->in[vary].patch = var->data.patch; +- info->in[vary].sn = name; +- info->in[vary].si = index + i; +- if (glsl_base_type_is_64bit(type->without_array()->base_type)) +- if (i & 0x1) +- info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4); +- else +- info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf); +- else +- info->in[vary].mask |= ((1 << comp) - 1) << frac; ++ nv50_ir_varying *v = &info->in[vary]; ++ ++ v->patch = var->data.patch; ++ v->sn = name; ++ v->si = index + i; ++ v->mask |= getMaskForType(type, i) << var->data.location_frac; + } + info->numInputs = std::max(info->numInputs, vary); + } +@@ -1048,16 +1061,8 @@ bool Converter::assignSlots() { + const glsl_type *type = var->type; + int slot = var->data.location; + uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var); +- uint32_t comp = type->is_array() ? type->without_array()->component_slots() +- : type->component_slots(); +- uint32_t frac = var->data.location_frac; + uint32_t vary = var->data.driver_location; + +- if (glsl_base_type_is_64bit(type->without_array()->base_type)) { +- if (comp > 2) +- slots *= 2; +- } +- + assert(vary < PIPE_MAX_SHADER_OUTPUTS); + + switch(prog->getType()) { +@@ -1067,7 +1072,11 @@ bool Converter::assignSlots() { + case TGSI_SEMANTIC_COLOR: + if (!var->data.fb_fetch_output) + info->prop.fp.numColourResults++; +- info->prop.fp.separateFragData = true; ++ ++ if (var->data.location == FRAG_RESULT_COLOR && ++ nir->info.outputs_written & BITFIELD64_BIT(var->data.location)) ++ info->prop.fp.separateFragData = true; ++ + // sometimes we get FRAG_RESULT_DATAX with data.index 0 + // sometimes we get FRAG_RESULT_DATA0 with data.index X + index = index == 0 ? var->data.index : index; +@@ -1118,20 +1127,14 @@ bool Converter::assignSlots() { + } + + for (uint16_t i = 0u; i < slots; ++i, ++vary) { +- info->out[vary].id = vary; +- info->out[vary].patch = var->data.patch; +- info->out[vary].sn = name; +- info->out[vary].si = index + i; +- if (glsl_base_type_is_64bit(type->without_array()->base_type)) +- if (i & 0x1) +- info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4); +- else +- info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf); +- else +- info->out[vary].mask |= ((1 << comp) - 1) << frac; ++ nv50_ir_varying *v = &info->out[vary]; ++ v->patch = var->data.patch; ++ v->sn = name; ++ v->si = index + i; ++ v->mask |= getMaskForType(type, i) << var->data.location_frac; + + if (nir->info.outputs_read & 1ull << slot) +- info->out[vary].oread = 1; ++ v->oread = 1; + } + info->numOutputs = std::max(info->numOutputs, vary); + } +@@ -1275,6 +1278,7 @@ Converter::parseNIR() + info->bin.tlsSpace = 0; + info->io.clipDistances = nir->info.clip_distance_array_size; + info->io.cullDistances = nir->info.cull_distance_array_size; ++ info->io.layer_viewport_relative = nir->info.layer_viewport_relative; + + switch(prog->getType()) { + case Program::TYPE_COMPUTE: +@@ -1291,7 +1295,7 @@ Converter::parseNIR() + info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; + info->prop.fp.readsSampleLocations = + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); +- info->prop.fp.usesDiscard = nir->info.fs.uses_discard; ++ info->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote; + info->prop.fp.usesSampleMaskIn = + !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); + break; +@@ -1426,64 +1430,69 @@ Converter::visit(nir_block *block) + bool + Converter::visit(nir_if *nif) + { ++ curIfDepth++; ++ + DataType sType = getSType(nif->condition, false, false); + Value *src = getSrc(&nif->condition, 0); + + nir_block *lastThen = nir_if_last_then_block(nif); + nir_block *lastElse = nir_if_last_else_block(nif); + +- assert(!lastThen->successors[1]); +- assert(!lastElse->successors[1]); +- ++ BasicBlock *headBB = bb; + BasicBlock *ifBB = convert(nir_if_first_then_block(nif)); + BasicBlock *elseBB = convert(nir_if_first_else_block(nif)); + + bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); + bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); + +- // we only insert joinats, if both nodes end up at the end of the if again. +- // the reason for this to not happens are breaks/continues/ret/... which +- // have their own handling +- if (lastThen->successors[0] == lastElse->successors[0]) +- bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]), +- CC_ALWAYS, NULL); +- ++ bool insertJoins = lastThen->successors[0] == lastElse->successors[0]; + mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType); + + foreach_list_typed(nir_cf_node, node, node, &nif->then_list) { + if (!visit(node)) + return false; + } ++ + setPosition(convert(lastThen), true); +- if (!bb->getExit() || +- !bb->getExit()->asFlow() || +- bb->getExit()->asFlow()->op == OP_JOIN) { ++ if (!bb->isTerminated()) { + BasicBlock *tailBB = convert(lastThen->successors[0]); + mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL); + bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD); ++ } else { ++ insertJoins = insertJoins && bb->getExit()->op == OP_BRA; + } + + foreach_list_typed(nir_cf_node, node, node, &nif->else_list) { + if (!visit(node)) + return false; + } ++ + setPosition(convert(lastElse), true); +- if (!bb->getExit() || +- !bb->getExit()->asFlow() || +- bb->getExit()->asFlow()->op == OP_JOIN) { ++ if (!bb->isTerminated()) { + BasicBlock *tailBB = convert(lastElse->successors[0]); + mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL); + bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD); ++ } else { ++ insertJoins = insertJoins && bb->getExit()->op == OP_BRA; + } + +- if (lastThen->successors[0] == lastElse->successors[0]) { +- setPosition(convert(lastThen->successors[0]), true); ++ /* only insert joins for the most outer if */ ++ if (--curIfDepth) ++ insertJoins = false; ++ ++ /* we made sure that all threads would converge at the same block */ ++ if (insertJoins) { ++ BasicBlock *conv = convert(lastThen->successors[0]); ++ setPosition(headBB->getExit(), false); ++ headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL); ++ setPosition(conv, false); + mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; + } + + return true; + } + ++// TODO: add convergency + bool + Converter::visit(nir_loop *loop) + { +@@ -1491,8 +1500,8 @@ Converter::visit(nir_loop *loop) + func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth); + + BasicBlock *loopBB = convert(nir_loop_first_block(loop)); +- BasicBlock *tailBB = +- convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node))); ++ BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node))); ++ + bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE); + + mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL); +@@ -1503,19 +1512,15 @@ Converter::visit(nir_loop *loop) + if (!visit(node)) + return false; + } +- Instruction *insn = bb->getExit(); +- if (bb->cfg.incidentCount() != 0) { +- if (!insn || !insn->asFlow()) { +- mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); +- bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); +- } else if (insn && insn->op == OP_BRA && !insn->getPredicate() && +- tailBB->cfg.incidentCount() == 0) { +- // RA doesn't like having blocks around with no incident edge, +- // so we create a fake one to make it happy +- bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE); +- } ++ ++ if (!bb->isTerminated()) { ++ mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); ++ bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); + } + ++ if (tailBB->cfg.incidentCount() == 0) ++ loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE); ++ + curLoopDepth -= 1; + + return true; +@@ -1560,6 +1565,7 @@ Converter::convert(nir_intrinsic_op intr) + return SV_DRAWID; + case nir_intrinsic_load_front_face: + return SV_FACE; ++ case nir_intrinsic_is_helper_invocation: + case nir_intrinsic_load_helper_invocation: + return SV_THREAD_KILL; + case nir_intrinsic_load_instance_id: +@@ -1617,6 +1623,7 @@ Converter::visit(nir_intrinsic_instr *insn) + { + nir_intrinsic_op op = insn->intrinsic; + const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op]; ++ unsigned dest_components = nir_intrinsic_dest_components(insn); + + switch (op) { + case nir_intrinsic_load_uniform: { +@@ -1624,7 +1631,7 @@ Converter::visit(nir_intrinsic_instr *insn) + const DataType dType = getDType(insn); + Value *indirect; + uint32_t coffset = getIndirect(insn, 0, 0, indirect); +- for (uint8_t i = 0; i < insn->num_components; ++i) { ++ for (uint8_t i = 0; i < dest_components; ++i) { + loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect); + } + break; +@@ -1635,7 +1642,7 @@ Converter::visit(nir_intrinsic_instr *insn) + DataType dType = getSType(insn->src[0], false, false); + uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) + continue; + +@@ -1652,6 +1659,7 @@ Converter::visit(nir_intrinsic_instr *insn) + break; + } + case Program::TYPE_GEOMETRY: ++ case Program::TYPE_TESSELLATION_EVAL: + case Program::TYPE_VERTEX: { + if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) { + mkMov(clipVtx[i], src); +@@ -1688,7 +1696,7 @@ Converter::visit(nir_intrinsic_instr *insn) + srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0))); + srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0))); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < dest_components; ++i) { + defs.push_back(newDefs[i]); + mask |= 1 << i; + } +@@ -1715,15 +1723,25 @@ Converter::visit(nir_intrinsic_instr *insn) + + // see load_barycentric_* handling + if (prog->getType() == Program::TYPE_FRAGMENT) { +- mode = translateInterpMode(&vary, nvirOp); + if (op == nir_intrinsic_load_interpolated_input) { + ImmediateValue immMode; + if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode)) +- mode |= immMode.reg.data.u32; ++ mode = immMode.reg.data.u32; ++ } ++ if (mode == NV50_IR_INTERP_DEFAULT) ++ mode |= translateInterpMode(&vary, nvirOp); ++ else { ++ if (vary.linear) { ++ nvirOp = OP_LINTERP; ++ mode |= NV50_IR_INTERP_LINEAR; ++ } else { ++ nvirOp = OP_PINTERP; ++ mode |= NV50_IR_INTERP_PERSPECTIVE; ++ } + } + } + +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < dest_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address); + if (prog->getType() == Program::TYPE_FRAGMENT) { +@@ -1814,9 +1832,11 @@ Converter::visit(nir_intrinsic_instr *insn) + loadImm(newDefs[1], mode); + break; + } ++ case nir_intrinsic_demote: + case nir_intrinsic_discard: + mkOp(OP_DISCARD, TYPE_NONE, NULL); + break; ++ case nir_intrinsic_demote_if: + case nir_intrinsic_discard_if: { + Value *pred = getSSA(1, FILE_PREDICATE); + if (insn->num_components > 1) { +@@ -1832,6 +1852,7 @@ Converter::visit(nir_intrinsic_instr *insn) + case nir_intrinsic_load_base_instance: + case nir_intrinsic_load_draw_id: + case nir_intrinsic_load_front_face: ++ case nir_intrinsic_is_helper_invocation: + case nir_intrinsic_load_helper_invocation: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_invocation_id: +@@ -1858,7 +1879,7 @@ Converter::visit(nir_intrinsic_instr *insn) + SVSemantic sv = convert(op); + LValues &newDefs = convert(&insn->dest); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) { + Value *def; + if (typeSizeof(dType) == 8) + def = getSSA(); +@@ -1910,12 +1931,12 @@ Converter::visit(nir_intrinsic_instr *insn) + + if (op == nir_intrinsic_read_first_invocation) { + mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; +- mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; ++ mkOp1(OP_BREV, TYPE_U32, tmp, tmp); + mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; + } else + tmp = getSrc(&insn->src[1], 0); + +- for (uint8_t i = 0; i < insn->num_components; ++i) { ++ for (uint8_t i = 0; i < dest_components; ++i) { + mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f)) + ->subOp = NV50_IR_SUBOP_SHFL_IDX; + } +@@ -1931,7 +1952,7 @@ Converter::visit(nir_intrinsic_instr *insn) + + Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), + mkImm(baseVertex), indirectVertex); +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < dest_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0, + indirectOffset, vtxBase, info->in[idx].patch); +@@ -1954,19 +1975,24 @@ Converter::visit(nir_intrinsic_instr *insn) + + vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < dest_components; ++i) { + uint32_t address = getSlotAddress(insn, idx, i); + loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0, + indirectOffset, vtxBase, info->in[idx].patch); + } + break; + } +- case nir_intrinsic_emit_vertex: ++ case nir_intrinsic_emit_vertex: { + if (info->io.genUserClip > 0) + handleUserClipPlanes(); +- // fallthrough ++ uint32_t idx = nir_intrinsic_stream_id(insn); ++ mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; ++ break; ++ } + case nir_intrinsic_end_primitive: { + uint32_t idx = nir_intrinsic_stream_id(insn); ++ if (idx) ++ break; + mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; + break; + } +@@ -1978,7 +2004,7 @@ Converter::visit(nir_intrinsic_instr *insn) + uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1; + uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < dest_components; ++i) { + loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i, + indirectOffset, indirectIndex); + } +@@ -2001,7 +2027,7 @@ Converter::visit(nir_intrinsic_instr *insn) + uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer); + uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) + continue; + Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType, +@@ -2020,7 +2046,7 @@ Converter::visit(nir_intrinsic_instr *insn) + uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer); + uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) ++ for (uint8_t i = 0u; i < dest_components; ++i) + loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i, + indirectOffset, indirectBuffer); + +@@ -2314,7 +2340,7 @@ Converter::visit(nir_intrinsic_instr *insn) + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) { ++ for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) + continue; + Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType)); +@@ -2328,7 +2354,7 @@ Converter::visit(nir_intrinsic_instr *insn) + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); + +- for (uint8_t i = 0u; i < insn->num_components; ++i) ++ for (uint8_t i = 0u; i < dest_components; ++i) + loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset); + + break; +@@ -2367,7 +2393,7 @@ Converter::visit(nir_intrinsic_instr *insn) + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); + +- for (auto i = 0u; i < insn->num_components; ++i) ++ for (auto i = 0u; i < dest_components; ++i) + loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset); + + info->io.globalAccess |= 0x1; +@@ -2376,7 +2402,7 @@ Converter::visit(nir_intrinsic_instr *insn) + case nir_intrinsic_store_global: { + DataType sType = getSType(insn->src[0], false, false); + +- for (auto i = 0u; i < insn->num_components; ++i) { ++ for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) + continue; + if (typeSizeof(sType) == 8) { +@@ -2418,7 +2444,6 @@ Converter::visit(nir_jump_instr *insn) + case nir_jump_continue: { + bool isBreak = insn->type == nir_jump_break; + nir_block *block = insn->instr.block; +- assert(!block->successors[1]); + BasicBlock *target = convert(block->successors[0]); + mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL); + bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK); +@@ -2774,7 +2799,7 @@ Converter::visit(nir_alu_instr *insn) + case nir_op_bfm: { + DEFAULT_CHECKS; + LValues &newDefs = convert(&insn->dest); +- mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1])); ++ mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W; + break; + } + case nir_op_bitfield_insert: { +@@ -2794,17 +2819,69 @@ Converter::visit(nir_alu_instr *insn) + case nir_op_bitfield_reverse: { + DEFAULT_CHECKS; + LValues &newDefs = convert(&insn->dest); +- mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; ++ mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0])); + break; + } + case nir_op_find_lsb: { + DEFAULT_CHECKS; + LValues &newDefs = convert(&insn->dest); + Value *tmp = getSSA(); +- mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV; ++ mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0])); + mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT; + break; + } ++ case nir_op_extract_u8: { ++ DEFAULT_CHECKS; ++ LValues &newDefs = convert(&insn->dest); ++ Value *prmt = getSSA(); ++ mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440)); ++ mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); ++ break; ++ } ++ case nir_op_extract_i8: { ++ DEFAULT_CHECKS; ++ LValues &newDefs = convert(&insn->dest); ++ Value *prmt = getSSA(); ++ mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880)); ++ mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); ++ break; ++ } ++ case nir_op_extract_u16: { ++ DEFAULT_CHECKS; ++ LValues &newDefs = convert(&insn->dest); ++ Value *prmt = getSSA(); ++ mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410)); ++ mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); ++ break; ++ } ++ case nir_op_extract_i16: { ++ DEFAULT_CHECKS; ++ LValues &newDefs = convert(&insn->dest); ++ Value *prmt = getSSA(); ++ mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910)); ++ mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0)); ++ break; ++ } ++ case nir_op_urol: { ++ DEFAULT_CHECKS; ++ LValues &newDefs = convert(&insn->dest); ++ mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), ++ getSrc(&insn->src[1]), getSrc(&insn->src[0])) ++ ->subOp = NV50_IR_SUBOP_SHF_L | ++ NV50_IR_SUBOP_SHF_W | ++ NV50_IR_SUBOP_SHF_HI; ++ break; ++ } ++ case nir_op_uror: { ++ DEFAULT_CHECKS; ++ LValues &newDefs = convert(&insn->dest); ++ mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), ++ getSrc(&insn->src[1]), getSrc(&insn->src[0])) ++ ->subOp = NV50_IR_SUBOP_SHF_R | ++ NV50_IR_SUBOP_SHF_W | ++ NV50_IR_SUBOP_SHF_LO; ++ break; ++ } + // boolean conversions + case nir_op_b2f32: { + DEFAULT_CHECKS; +@@ -2990,14 +3067,11 @@ Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_vari + CacheMode + Converter::convert(enum gl_access_qualifier access) + { +- switch (access) { +- case ACCESS_VOLATILE: ++ if (access & ACCESS_VOLATILE) + return CACHE_CV; +- case ACCESS_COHERENT: ++ if (access & ACCESS_COHERENT) + return CACHE_CG; +- default: +- return CACHE_CA; +- } ++ return CACHE_CA; + } + + CacheMode +@@ -3224,6 +3298,11 @@ Converter::run() + NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); + NIR_PASS_V(nir, nir_lower_phis_to_scalar); + ++ /*TODO: improve this lowering/optimisation loop so that we can use ++ * nir_opt_idiv_const effectively before this. ++ */ ++ NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise); ++ + do { + progress = false; + NIR_PASS(progress, nir, nir_copy_prop); +@@ -3285,3 +3364,125 @@ Program::makeFromNIR(struct nv50_ir_prog_info *info) + } + + } // namespace nv50_ir ++ ++static nir_shader_compiler_options ++nvir_nir_shader_compiler_options(int chipset) ++{ ++ nir_shader_compiler_options op = {}; ++ op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_ffma = false; ++ op.fuse_ffma = false; /* nir doesn't track mad vs fma */ ++ op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_flrp32 = true; ++ op.lower_flrp64 = true; ++ op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it ++ op.lower_fsat = false; ++ op.lower_fsqrt = false; // TODO: only before gm200 ++ op.lower_sincos = false; ++ op.lower_fmod = true; ++ op.lower_bitfield_extract = false; ++ op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_bitfield_insert = false; ++ op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_bitfield_insert_to_bitfield_select = false; ++ op.lower_bitfield_reverse = false; ++ op.lower_bit_count = false; ++ op.lower_ifind_msb = false; ++ op.lower_find_lsb = false; ++ op.lower_uadd_carry = true; // TODO ++ op.lower_usub_borrow = true; // TODO ++ op.lower_mul_high = false; ++ op.lower_negate = false; ++ op.lower_sub = true; ++ op.lower_scmp = true; // TODO: not implemented yet ++ op.lower_vector_cmp = false; ++ op.lower_idiv = true; ++ op.lower_bitops = false; ++ op.lower_isign = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_fdph = false; ++ op.lower_fdot = false; ++ op.fdot_replicates = false; // TODO ++ op.lower_ffloor = false; // TODO ++ op.lower_ffract = true; ++ op.lower_fceil = false; // TODO ++ op.lower_ftrunc = false; ++ op.lower_ldexp = true; ++ op.lower_pack_half_2x16 = true; ++ op.lower_pack_unorm_2x16 = true; ++ op.lower_pack_snorm_2x16 = true; ++ op.lower_pack_unorm_4x8 = true; ++ op.lower_pack_snorm_4x8 = true; ++ op.lower_unpack_half_2x16 = true; ++ op.lower_unpack_unorm_2x16 = true; ++ op.lower_unpack_snorm_2x16 = true; ++ op.lower_unpack_unorm_4x8 = true; ++ op.lower_unpack_snorm_4x8 = true; ++ op.lower_pack_split = false; ++ op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET); ++ op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET); ++ op.lower_all_io_to_temps = false; ++ op.lower_all_io_to_elements = false; ++ op.vertex_id_zero_based = false; ++ op.lower_base_vertex = false; ++ op.lower_helper_invocation = false; ++ op.optimize_sample_mask_in = false; ++ op.lower_cs_local_index_from_id = true; ++ op.lower_cs_local_id_from_index = false; ++ op.lower_device_index_to_zero = false; // TODO ++ op.lower_wpos_pntc = false; // TODO ++ op.lower_hadd = true; // TODO ++ op.lower_add_sat = true; // TODO ++ op.vectorize_io = false; ++ op.lower_to_scalar = false; ++ op.unify_interfaces = false; ++ op.use_interpolated_input_intrinsics = true; ++ op.lower_mul_2x32_64 = true; // TODO ++ op.lower_rotate = (chipset < NVISA_GV100_CHIPSET); ++ op.has_imul24 = false; ++ op.intel_vec4 = false; ++ op.max_unroll_iterations = 32; ++ op.lower_int64_options = (nir_lower_int64_options) ( ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) | ++ nir_lower_divmod64 | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) | ++ ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) | ++ nir_lower_ufind_msb64 ++ ); ++ op.lower_doubles_options = (nir_lower_doubles_options) ( ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) | ++ nir_lower_dmod | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0) ++ ); ++ return op; ++} ++ ++static const nir_shader_compiler_options gf100_nir_shader_compiler_options = ++nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET); ++static const nir_shader_compiler_options gm107_nir_shader_compiler_options = ++nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET); ++static const nir_shader_compiler_options gv100_nir_shader_compiler_options = ++nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET); ++ ++const nir_shader_compiler_options * ++nv50_ir_nir_shader_compiler_options(int chipset) ++{ ++ if (chipset >= NVISA_GV100_CHIPSET) ++ return &gv100_nir_shader_compiler_options; ++ if (chipset >= NVISA_GM107_CHIPSET) ++ return &gm107_nir_shader_compiler_options; ++ return &gf100_nir_shader_compiler_options; ++} +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +index 60f3d582a0b..3fd76f64de0 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +@@ -3401,8 +3401,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) + // ReadInvocationARB(src, findLSB(ballot(true))) + val0 = getScratch(); + mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; +- mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000)) +- ->subOp = NV50_IR_SUBOP_EXTBF_REV; ++ mkOp1(OP_BREV, TYPE_U32, val0, val0); + mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT; + src1 = val0; + /* fallthrough */ +@@ -3820,8 +3819,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); + val0 = getScratch(); +- geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000)); +- geni->subOp = NV50_IR_SUBOP_EXTBF_REV; ++ mkOp1(OP_BREV, TYPE_U32, val0, src0); + geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0); + geni->subOp = NV50_IR_SUBOP_BFIND_SAMT; + } +@@ -3836,8 +3834,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn) + case TGSI_OPCODE_BREV: + FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { + src0 = fetchSrc(0, c); +- geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000)); +- geni->subOp = NV50_IR_SUBOP_EXTBF_REV; ++ mkOp1(OP_BREV, TYPE_U32, dst0[c], src0); + } + break; + case TGSI_OPCODE_POPC: +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +index 49a5f3b01f2..9fad1dcfe89 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp +@@ -239,9 +239,8 @@ GM107LoweringPass::handlePFETCH(Instruction *i) + Value *tmp1 = bld.getScratch(); + Value *tmp2 = bld.getScratch(); + bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0)); +- bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16)); +- bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff)); +- bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff)); ++ bld.mkOp3(OP_PERMT, TYPE_U32, tmp1, tmp0, bld.mkImm(0x4442), bld.mkImm(0)); ++ bld.mkOp3(OP_PERMT, TYPE_U32, tmp0, tmp0, bld.mkImm(0x4440), bld.mkImm(0)); + if (i->getSrc(1)) + bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1)); + else +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h +index 71e5ea6417a..dfa1d035dac 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h +@@ -21,6 +21,7 @@ class GM107LegalizeSSA : public NVC0LegalizeSSA + private: + virtual bool visit(Instruction *); + ++protected: + void handlePFETCH(Instruction *); + void handleLOAD(Instruction *); + }; +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp +new file mode 100644 +index 00000000000..644d4928327 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp +@@ -0,0 +1,481 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include "codegen/nv50_ir.h" ++#include "codegen/nv50_ir_build_util.h" ++ ++#include "codegen/nv50_ir_target_nvc0.h" ++#include "codegen/nv50_ir_lowering_gv100.h" ++ ++#include ++ ++namespace nv50_ir { ++ ++bool ++GV100LegalizeSSA::handleCMP(Instruction *i) ++{ ++ Value *pred = bld.getSSA(1, FILE_PREDICATE); ++ ++ bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred, ++ i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz; ++ bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); ++ return true; ++} ++ ++// NIR deals with most of these for us, but codegen generates more in pointer ++// calculations from other lowering passes. ++bool ++GV100LegalizeSSA::handleIADD64(Instruction *i) ++{ ++ Value *carry = bld.getSSA(1, FILE_PREDICATE); ++ Value *def[2] = { bld.getSSA(), bld.getSSA() }; ++ Value *src[2][2]; ++ ++ for (int s = 0; s < 2; s++) { ++ if (i->getSrc(s)->reg.size == 8) { ++ bld.mkSplit(src[s], 4, i->getSrc(s)); ++ } else { ++ src[s][0] = i->getSrc(s); ++ src[s][1] = bld.mkImm(0); ++ } ++ } ++ ++ bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])-> ++ setFlagsDef(1, carry); ++ bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])-> ++ setFlagsSrc(2, carry); ++ bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]); ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i) ++{ ++ Value *def = bld.getSSA(8), *defs[2]; ++ Value *src2; ++ ++ if (i->srcExists(2) && ++ (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) { ++ Value *src2s[2] = { bld.getSSA(), bld.getSSA() }; ++ bld.mkMov(src2s[0], bld.mkImm(0)); ++ bld.mkMov(src2s[1], i->getSrc(2)); ++ src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0); ++ } else { ++ src2 = bld.mkImm(0); ++ } ++ ++ bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def, ++ i->getSrc(0), i->getSrc(1), src2); ++ ++ bld.mkSplit(defs, 4, def); ++ i->def(0).replace(defs[1], false); ++ return true; ++} ++ ++// XXX: We should be able to do this in GV100LoweringPass, but codegen messes ++// up somehow and swaps the condcode without swapping the sources. ++// - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test ++bool ++GV100LegalizeSSA::handleIMNMX(Instruction *i) ++{ ++ Value *pred = bld.getSSA(1, FILE_PREDICATE); ++ ++ bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred, ++ i->sType, i->getSrc(0), i->getSrc(1)); ++ bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleIMUL(Instruction *i) ++{ ++ if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) ++ return handleIMAD_HIGH(i); ++ ++ bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), ++ bld.mkImm(0)); ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleLOP2(Instruction *i) ++{ ++ uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0; ++ uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1; ++ uint8_t subOp; ++ ++ if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT)) ++ src0 = ~src0; ++ if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT)) ++ src1 = ~src1; ++ ++ switch (i->op) { ++ case OP_AND: subOp = src0 & src1; break; ++ case OP_OR : subOp = src0 | src1; break; ++ case OP_XOR: subOp = src0 ^ src1; break; ++ default: ++ assert(!"invalid LOP2 opcode"); ++ break; ++ } ++ ++ bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), ++ bld.mkImm(0))->subOp = subOp; ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleNOT(Instruction *i) ++{ ++ bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0), ++ bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1; ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handlePREEX2(Instruction *i) ++{ ++ i->def(0).replace(i->src(0), false); ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleQUADON(Instruction *i) ++{ ++ handleSHFL(i); // Inserts OP_WARPSYNC ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleQUADPOP(Instruction *i) ++{ ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleSET(Instruction *i) ++{ ++ Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL; ++ Value *pred = bld.getSSA(1, FILE_PREDICATE), *met; ++ Instruction *xsetp; ++ ++ if (isFloatType(i->dType)) { ++ if (i->sType == TYPE_F32) ++ return false; // HW has FSET.BF ++ met = bld.mkImm(0x3f800000); ++ } else { ++ met = bld.mkImm(0xffffffff); ++ } ++ ++ xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType, ++ i->getSrc(0), i->getSrc(1)); ++ xsetp->src(0).mod = i->src(0).mod; ++ xsetp->src(1).mod = i->src(1).mod; ++ xsetp->setSrc(2, src2); ++ xsetp->ftz = i->ftz; ++ ++ i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred); ++ i->src(2).mod = Modifier(NV50_IR_MOD_NOT); ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleSHFL(Instruction *i) ++{ ++ Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE); ++ sync->fixed = 1; ++ sync->setSrc(0, bld.mkImm(0xffffffff)); ++ i->bb->insertBefore(i, sync); ++ return false; ++} ++ ++bool ++GV100LegalizeSSA::handleShift(Instruction *i) ++{ ++ Value *zero = bld.mkImm(0); ++ Value *src1 = i->getSrc(1); ++ Value *src0, *src2; ++ uint8_t subOp = i->op == OP_SHL ? NV50_IR_SUBOP_SHF_L : NV50_IR_SUBOP_SHF_R; ++ ++ if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) { ++ src0 = i->getSrc(0); ++ src2 = zero; ++ } else { ++ src0 = zero; ++ src2 = i->getSrc(0); ++ subOp |= NV50_IR_SUBOP_SHF_HI; ++ } ++ if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP) ++ subOp |= NV50_IR_SUBOP_SHF_W; ++ ++ bld.mkOp3(OP_SHF, i->dType, i->getDef(0), src0, src1, src2)->subOp = subOp; ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::handleSUB(Instruction *i) ++{ ++ Instruction *xadd = ++ bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1)); ++ xadd->src(0).mod = i->src(0).mod; ++ xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); ++ xadd->ftz = i->ftz; ++ return true; ++} ++ ++bool ++GV100LegalizeSSA::visit(Instruction *i) ++{ ++ bool lowered = false; ++ ++ bld.setPosition(i, false); ++ if (i->sType == TYPE_F32 && i->dType != TYPE_F16 && ++ prog->getType() != Program::TYPE_COMPUTE) ++ handleFTZ(i); ++ ++ switch (i->op) { ++ case OP_AND: ++ case OP_OR: ++ case OP_XOR: ++ if (i->def(0).getFile() != FILE_PREDICATE) ++ lowered = handleLOP2(i); ++ break; ++ case OP_NOT: ++ lowered = handleNOT(i); ++ break; ++ case OP_SHL: ++ case OP_SHR: ++ lowered = handleShift(i); ++ break; ++ case OP_SET: ++ case OP_SET_AND: ++ case OP_SET_OR: ++ case OP_SET_XOR: ++ if (i->def(0).getFile() != FILE_PREDICATE) ++ lowered = handleSET(i); ++ break; ++ case OP_SLCT: ++ lowered = handleCMP(i); ++ break; ++ case OP_PREEX2: ++ lowered = handlePREEX2(i); ++ break; ++ case OP_MUL: ++ if (!isFloatType(i->dType)) ++ lowered = handleIMUL(i); ++ break; ++ case OP_MAD: ++ if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH) ++ lowered = handleIMAD_HIGH(i); ++ break; ++ case OP_SHFL: ++ lowered = handleSHFL(i); ++ break; ++ case OP_QUADON: ++ lowered = handleQUADON(i); ++ break; ++ case OP_QUADPOP: ++ lowered = handleQUADPOP(i); ++ break; ++ case OP_SUB: ++ lowered = handleSUB(i); ++ break; ++ case OP_MAX: ++ case OP_MIN: ++ if (!isFloatType(i->dType)) ++ lowered = handleIMNMX(i); ++ break; ++ case OP_ADD: ++ if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8) ++ lowered = handleIADD64(i); ++ break; ++ case OP_PFETCH: ++ handlePFETCH(i); ++ break; ++ case OP_LOAD: ++ handleLOAD(i); ++ break; ++ default: ++ break; ++ } ++ ++ if (lowered) ++ delete_Instruction(prog, i); ++ ++ return true; ++} ++ ++bool ++GV100LoweringPass::handleDMNMX(Instruction *i) ++{ ++ Value *pred = bld.getSSA(1, FILE_PREDICATE); ++ Value *src0[2], *src1[2], *dest[2]; ++ ++ bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred, ++ i->sType, i->getSrc(0), i->getSrc(1)); ++ bld.mkSplit(src0, 4, i->getSrc(0)); ++ bld.mkSplit(src1, 4, i->getSrc(1)); ++ bld.mkSplit(dest, 4, i->getDef(0)); ++ bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred); ++ bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred); ++ bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]); ++ return true; ++} ++ ++bool ++GV100LoweringPass::handleEXTBF(Instruction *i) ++{ ++ Value *bit = bld.getScratch(); ++ Value *cnt = bld.getScratch(); ++ Value *mask = bld.getScratch(); ++ Value *zero = bld.mkImm(0); ++ ++ bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero); ++ bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero); ++ bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt); ++ bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask); ++ bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit); ++ if (isSignedType(i->dType)) ++ bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt); ++ ++ return true; ++} ++ ++bool ++GV100LoweringPass::handleFLOW(Instruction *i) ++{ ++ i->op = OP_BRA; ++ return false; ++} ++ ++bool ++GV100LoweringPass::handleI2I(Instruction *i) ++{ ++ bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))-> ++ subOp = i->subOp; ++ bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0)); ++ return true; ++} ++ ++bool ++GV100LoweringPass::handleINSBF(Instruction *i) ++{ ++ Value *bit = bld.getScratch(); ++ Value *cnt = bld.getScratch(); ++ Value *mask = bld.getScratch(); ++ Value *src0 = bld.getScratch(); ++ Value *zero = bld.mkImm(0); ++ ++ bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero); ++ bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero); ++ bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt); ++ ++ bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask); ++ bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit); ++ ++ bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit); ++ bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)-> ++ subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c)); ++ ++ return true; ++} ++ ++bool ++GV100LoweringPass::handlePINTERP(Instruction *i) ++{ ++ Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL; ++ Instruction *ipa, *mul; ++ ++ ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2); ++ ipa->ipa = i->ipa; ++ mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1)); ++ ++ if (i->getInterpMode() == NV50_IR_INTERP_SC) { ++ ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE)); ++ mul->setPredicate(CC_NOT_P, ipa->getDef(1)); ++ } ++ ++ return true; ++} ++ ++bool ++GV100LoweringPass::handlePREFLOW(Instruction *i) ++{ ++ return true; ++} ++ ++bool ++GV100LoweringPass::handlePRESIN(Instruction *i) ++{ ++ const float f = 1.0 / (2.0 * 3.14159265); ++ bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f)); ++ return true; ++} ++ ++bool ++GV100LoweringPass::visit(Instruction *i) ++{ ++ bool lowered = false; ++ ++ bld.setPosition(i, false); ++ ++ switch (i->op) { ++ case OP_BREAK: ++ case OP_CONT: ++ lowered = handleFLOW(i); ++ break; ++ case OP_PREBREAK: ++ case OP_PRECONT: ++ lowered = handlePREFLOW(i); ++ break; ++ case OP_CVT: ++ if (i->src(0).getFile() != FILE_PREDICATE && ++ i->def(0).getFile() != FILE_PREDICATE && ++ !isFloatType(i->dType) && !isFloatType(i->sType)) ++ lowered = handleI2I(i); ++ break; ++ case OP_EXTBF: ++ lowered = handleEXTBF(i); ++ break; ++ case OP_INSBF: ++ lowered = handleINSBF(i); ++ break; ++ case OP_MAX: ++ case OP_MIN: ++ if (i->dType == TYPE_F64) ++ lowered = handleDMNMX(i); ++ break; ++ case OP_PINTERP: ++ lowered = handlePINTERP(i); ++ break; ++ case OP_PRESIN: ++ lowered = handlePRESIN(i); ++ break; ++ default: ++ break; ++ } ++ ++ if (lowered) ++ delete_Instruction(prog, i); ++ ++ return true; ++} ++ ++} // namespace nv50_ir +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h +new file mode 100644 +index 00000000000..d918c6e83eb +--- /dev/null ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h +@@ -0,0 +1,78 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef __NV50_IR_LOWERING_GV100_H__ ++#define __NV50_IR_LOWERING_GV100_H__ ++#include "codegen/nv50_ir_lowering_gm107.h" ++ ++namespace nv50_ir { ++ ++class GV100LoweringPass : public Pass ++{ ++public: ++ GV100LoweringPass(Program *p) { ++ bld.setProgram(p); ++ } ++ ++private: ++ BuildUtil bld; ++ ++ virtual bool visit(Instruction *); ++ ++ bool handleDMNMX(Instruction *); ++ bool handleEXTBF(Instruction *); ++ bool handleFLOW(Instruction *); ++ bool handleI2I(Instruction *); ++ bool handleINSBF(Instruction *); ++ bool handlePINTERP(Instruction *); ++ bool handlePREFLOW(Instruction *); ++ bool handlePRESIN(Instruction *); ++}; ++ ++class GV100LegalizeSSA : public GM107LegalizeSSA ++{ ++public: ++ GV100LegalizeSSA(Program *p) { ++ bld.setProgram(p); ++ } ++ ++private: ++ virtual bool visit(Function *) { return true; } ++ virtual bool visit(BasicBlock *) { return true; } ++ virtual bool visit(Instruction *); ++ ++ bool handleCMP(Instruction *); ++ bool handleIADD64(Instruction *); ++ bool handleIMAD_HIGH(Instruction *); ++ bool handleIMNMX(Instruction *); ++ bool handleIMUL(Instruction *); ++ bool handleLOP2(Instruction *); ++ bool handleNOT(Instruction *); ++ bool handlePREEX2(Instruction *); ++ bool handleQUADON(Instruction *); ++ bool handleQUADPOP(Instruction *); ++ bool handleSET(Instruction *); ++ bool handleSHFL(Instruction *); ++ bool handleShift(Instruction *); ++ bool handleSUB(Instruction *); ++}; ++} ++#endif +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +index a60881000fe..067f9abaca8 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +@@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp) + cmp->sType = hTy; + } + ++void ++NVC0LegalizeSSA::handleBREV(Instruction *i) ++{ ++ i->op = OP_EXTBF; ++ i->subOp = NV50_IR_SUBOP_EXTBF_REV; ++ i->setSrc(1, bld.mkImm(0x2000)); ++} ++ + bool + NVC0LegalizeSSA::visit(Function *fn) + { +@@ -354,6 +362,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb) + if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64) + handleSET(i->asCmp()); + break; ++ case OP_BREV: ++ handleBREV(i); ++ break; + default: + break; + } +@@ -856,11 +867,11 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) + next = hi; + } + +- if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS) +- replaceCvt(i); +- + if (i->op != OP_MOV && i->op != OP_PFETCH) + replaceZero(i); ++ ++ if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS) ++ replaceCvt(i); + } + } + if (!bb->getEntry()) +@@ -887,6 +898,8 @@ NVC0LoweringPass::visit(Function *fn) + gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); + if (fn->cfgExit) { + bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); ++ if (prog->getTarget()->getChipset() >= NVISA_GV100_CHIPSET) ++ bld.mkOp1(OP_FINAL, TYPE_NONE, NULL, gpEmitAddress)->fixed = 1; + bld.mkMovToReg(0, gpEmitAddress); + } + } +@@ -1714,7 +1727,8 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) + cctl->setPredicate(cas->cc, cas->getPredicate()); + } + +- if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { ++ if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS && ++ targ->getChipset() < NVISA_GV100_CHIPSET) { + // CAS is crazy. It's 2nd source is a double reg, and the 3rd source + // should be set to the high part of the double reg or bad things will + // happen elsewhere in the universe. +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +index b4c405a9ea5..8c99427d3c0 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +@@ -64,12 +64,14 @@ private: + void handleDIV(Instruction *); // integer division, modulus + void handleRCPRSQLib(Instruction *, Value *[]); + void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt +- void handleFTZ(Instruction *); + void handleSET(CmpInstruction *); + void handleTEXLOD(TexInstruction *); + void handleShift(Instruction *); ++ void handleBREV(Instruction *); + + protected: ++ void handleFTZ(Instruction *); ++ + BuildUtil bld; + }; + +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +index 2f46b0e886a..3a4ec3ca561 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +@@ -558,6 +558,19 @@ ConstantFolding::expr(Instruction *i, + memset(&res.data, 0, sizeof(res.data)); + + switch (i->op) { ++ case OP_SGXT: { ++ int bits = b->data.u32; ++ if (bits) { ++ uint32_t data = a->data.u32 & (0xffffffff >> (32 - bits)); ++ if (bits < 32 && (data & (1 << (bits - 1)))) ++ data = data - (1 << bits); ++ res.data.u32 = data; ++ } ++ break; ++ } ++ case OP_BMSK: ++ res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32; ++ break; + case OP_MAD: + case OP_FMA: + case OP_MUL: +@@ -780,6 +793,23 @@ ConstantFolding::expr(Instruction *i, + memset(&res.data, 0, sizeof(res.data)); + + switch (i->op) { ++ case OP_LOP3_LUT: ++ for (int n = 0; n < 32; n++) { ++ uint8_t lut = ((a->data.u32 >> n) & 1) << 2 | ++ ((b->data.u32 >> n) & 1) << 1 | ++ ((c->data.u32 >> n) & 1); ++ res.data.u32 |= !!(i->subOp & (1 << lut)) << n; ++ } ++ break; ++ case OP_PERMT: ++ if (!i->subOp) { ++ uint64_t input = (uint64_t)c->data.u32 << 32 | a->data.u32; ++ uint16_t permt = b->data.u32; ++ for (int n = 0 ; n < 4; n++, permt >>= 4) ++ res.data.u32 |= ((input >> ((permt & 0xf) * 8)) & 0xff) << n * 8; ++ } else ++ return; ++ break; + case OP_INSBF: { + int offset = b->data.u32 & 0xff; + int width = (b->data.u32 >> 8) & 0xff; +@@ -1526,6 +1556,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) + i->subOp = 0; + break; + } ++ case OP_BREV: { ++ uint32_t res = util_bitreverse(imm0.reg.data.u32); ++ i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res)); ++ i->op = OP_MOV; ++ break; ++ } + case OP_POPCNT: { + // Only deal with 1-arg POPCNT here + if (i->srcExists(1)) +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +index 5dcbf3c3e0c..ce0d2507dc1 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +@@ -93,8 +93,10 @@ const char *operationStr[OP_LAST + 1] = + "and", + "or", + "xor", ++ "lop3 lut", + "shl", + "shr", ++ "shf", + "max", + "min", + "sat", +@@ -142,6 +144,7 @@ const char *operationStr[OP_LAST + 1] = + "pinterp", + "emit", + "restart", ++ "final", + "tex", + "texbias", + "texlod", +@@ -177,7 +180,10 @@ const char *operationStr[OP_LAST + 1] = + "insbf", + "extbf", + "bfind", ++ "brev", ++ "bmsk", + "permt", ++ "sgxt", + "atom", + "bar", + "vadd", +@@ -193,6 +199,7 @@ const char *operationStr[OP_LAST + 1] = + "shfl", + "vote", + "bufq", ++ "warpsync", + "(invalid)" + }; + +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +index 6df2664da22..4e5b21d9176 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +@@ -988,6 +988,8 @@ GCRA::coalesce(ArrayList& insns) + case 0x110: + case 0x120: + case 0x130: ++ case 0x140: ++ case 0x160: + ret = doCoalesce(insns, JOIN_MASK_UNION); + break; + default: +@@ -2297,13 +2299,25 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex) + if (isTextureOp(tex->op)) + textureMask(tex); + +- if (isScalarTexGM107(tex)) { +- handleScalarTexGM107(tex); +- return; +- } ++ if (targ->getChipset() < NVISA_GV100_CHIPSET) { ++ if (isScalarTexGM107(tex)) { ++ handleScalarTexGM107(tex); ++ return; ++ } + +- assert(!tex->tex.scalar); +- condenseDefs(tex); ++ assert(!tex->tex.scalar); ++ condenseDefs(tex); ++ } else { ++ if (isTextureOp(tex->op)) { ++ int defCount = tex->defCount(0xff); ++ if (defCount > 3) ++ condenseDefs(tex, 2, 3); ++ if (defCount > 1) ++ condenseDefs(tex, 0, 1); ++ } else { ++ condenseDefs(tex); ++ } ++ } + + if (isSurfaceOp(tex->op)) { + int s = tex->tex.target.getDim() + +@@ -2485,6 +2499,8 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb) + case 0x110: + case 0x120: + case 0x130: ++ case 0x140: ++ case 0x160: + texConstraintGM107(tex); + break; + default: +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h +new file mode 100644 +index 00000000000..54443ae2770 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h +@@ -0,0 +1,156 @@ ++#ifndef __NV50_IR_SCHED_GM107_H__ ++#define __NV50_IR_SCHED_GM107_H__ ++namespace nv50_ir { ++ ++class SchedDataCalculatorGM107 : public Pass ++{ ++public: ++ SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {} ++ ++private: ++ struct RegScores ++ { ++ struct ScoreData { ++ int r[256]; ++ int p[8]; ++ int c; ++ } rd, wr; ++ int base; ++ ++ void rebase(const int base) ++ { ++ const int delta = this->base - base; ++ if (!delta) ++ return; ++ this->base = 0; ++ ++ for (int i = 0; i < 256; ++i) { ++ rd.r[i] += delta; ++ wr.r[i] += delta; ++ } ++ for (int i = 0; i < 8; ++i) { ++ rd.p[i] += delta; ++ wr.p[i] += delta; ++ } ++ rd.c += delta; ++ wr.c += delta; ++ } ++ void wipe() ++ { ++ memset(&rd, 0, sizeof(rd)); ++ memset(&wr, 0, sizeof(wr)); ++ } ++ int getLatest(const ScoreData& d) const ++ { ++ int max = 0; ++ for (int i = 0; i < 256; ++i) ++ if (d.r[i] > max) ++ max = d.r[i]; ++ for (int i = 0; i < 8; ++i) ++ if (d.p[i] > max) ++ max = d.p[i]; ++ if (d.c > max) ++ max = d.c; ++ return max; ++ } ++ inline int getLatestRd() const ++ { ++ return getLatest(rd); ++ } ++ inline int getLatestWr() const ++ { ++ return getLatest(wr); ++ } ++ inline int getLatest() const ++ { ++ return MAX2(getLatestRd(), getLatestWr()); ++ } ++ void setMax(const RegScores *that) ++ { ++ for (int i = 0; i < 256; ++i) { ++ rd.r[i] = MAX2(rd.r[i], that->rd.r[i]); ++ wr.r[i] = MAX2(wr.r[i], that->wr.r[i]); ++ } ++ for (int i = 0; i < 8; ++i) { ++ rd.p[i] = MAX2(rd.p[i], that->rd.p[i]); ++ wr.p[i] = MAX2(wr.p[i], that->wr.p[i]); ++ } ++ rd.c = MAX2(rd.c, that->rd.c); ++ wr.c = MAX2(wr.c, that->wr.c); ++ } ++ void print(int cycle) ++ { ++ for (int i = 0; i < 256; ++i) { ++ if (rd.r[i] > cycle) ++ INFO("rd $r%i @ %i\n", i, rd.r[i]); ++ if (wr.r[i] > cycle) ++ INFO("wr $r%i @ %i\n", i, wr.r[i]); ++ } ++ for (int i = 0; i < 8; ++i) { ++ if (rd.p[i] > cycle) ++ INFO("rd $p%i @ %i\n", i, rd.p[i]); ++ if (wr.p[i] > cycle) ++ INFO("wr $p%i @ %i\n", i, wr.p[i]); ++ } ++ if (rd.c > cycle) ++ INFO("rd $c @ %i\n", rd.c); ++ if (wr.c > cycle) ++ INFO("wr $c @ %i\n", wr.c); ++ } ++ }; ++ ++ RegScores *score; // for current BB ++ std::vector scoreBoards; ++ ++ const TargetGM107 *targ; ++ bool visit(Function *); ++ bool visit(BasicBlock *); ++ ++ void commitInsn(const Instruction *, int); ++ int calcDelay(const Instruction *, int) const; ++ void setDelay(Instruction *, int, const Instruction *); ++ void recordWr(const Value *, int, int); ++ void checkRd(const Value *, int, int&) const; ++ ++ inline void emitYield(Instruction *); ++ inline void emitStall(Instruction *, uint8_t); ++ inline void emitReuse(Instruction *, uint8_t); ++ inline void emitWrDepBar(Instruction *, uint8_t); ++ inline void emitRdDepBar(Instruction *, uint8_t); ++ inline void emitWtDepBar(Instruction *, uint8_t); ++ ++ inline int getStall(const Instruction *) const; ++ inline int getWrDepBar(const Instruction *) const; ++ inline int getRdDepBar(const Instruction *) const; ++ inline int getWtDepBar(const Instruction *) const; ++ ++ void setReuseFlag(Instruction *); ++ ++ inline void printSchedInfo(int, const Instruction *) const; ++ ++ struct LiveBarUse { ++ LiveBarUse(Instruction *insn, Instruction *usei) ++ : insn(insn), usei(usei) { } ++ Instruction *insn; ++ Instruction *usei; ++ }; ++ ++ struct LiveBarDef { ++ LiveBarDef(Instruction *insn, Instruction *defi) ++ : insn(insn), defi(defi) { } ++ Instruction *insn; ++ Instruction *defi; ++ }; ++ ++ bool insertBarriers(BasicBlock *); ++ ++ bool doesInsnWriteTo(const Instruction *insn, const Value *val) const; ++ Instruction *findFirstUse(const Instruction *) const; ++ Instruction *findFirstDef(const Instruction *) const; ++ ++ bool needRdDepBar(const Instruction *) const; ++ bool needWrDepBar(const Instruction *) const; ++}; ++ ++}; // namespace nv50_ir ++#endif +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +index 5c6d0570ae2..765375a47df 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp +@@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] = + 2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD + 3, 3, // SHLADD, XMAD + 1, 1, 1, // ABS, NEG, NOT +- 2, 2, 2, 2, 2, // AND, OR, XOR, SHL, SHR ++ 2, 2, 2, 3, 2, 2, 3, // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF + 2, 2, 1, // MAX, MIN, SAT + 1, 1, 1, 1, // CEIL, FLOOR, TRUNC, CVT + 3, 3, 3, 2, 3, 3, // SET_AND,OR,XOR, SET, SELP, SLCT +@@ -43,7 +43,7 @@ const uint8_t Target::operationSrcNr[] = + 0, 0, 0, // PRERET,CONT,BREAK + 0, 0, 0, 0, 0, 0, // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR + 1, 1, 1, 2, 1, 2, // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP +- 1, 1, // EMIT, RESTART ++ 1, 1, 1, // EMIT, RESTART, FINAL + 1, 1, 1, // TEX, TXB, TXL, + 1, 1, 1, 1, 1, 1, 2, // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP + 1, 1, 2, 2, 2, 2, 2, // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA +@@ -51,13 +51,15 @@ const uint8_t Target::operationSrcNr[] = + 0, // TEXBAR + 1, 1, // DFDX, DFDY + 1, 2, 1, 2, 0, 0, // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP +- 2, 3, 2, 1, 3, // POPCNT, INSBF, EXTBF, BFIND, PERMT ++ 2, 3, 2, 1, 1, 2, 3, // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT ++ 2, // SGXT + 2, 2, // ATOM, BAR + 2, 2, 2, 2, 3, 2, // VADD, VAVG, VMIN, VMAX, VSAD, VSET, + 2, 2, 2, 1, // VSHR, VSHL, VSEL, CCTL + 3, // SHFL + 1, // VOTE + 1, // BUFQ ++ 1, // WARPSYNC + 0 + }; + +@@ -75,10 +77,10 @@ const OpClass Target::operationClass[] = + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, + OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, +- // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR ++ // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF + OPCLASS_CONVERT, OPCLASS_CONVERT, +- OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, +- OPCLASS_SHIFT, OPCLASS_SHIFT, ++ OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, ++ OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT, + // MAX, MIN + OPCLASS_COMPARE, OPCLASS_COMPARE, + // SAT, CEIL, FLOOR, TRUNC; CVT +@@ -103,8 +105,8 @@ const OpClass Target::operationClass[] = + OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE, + // LINTERP, PINTERP + OPCLASS_SFU, OPCLASS_SFU, +- // EMIT, RESTART +- OPCLASS_CONTROL, OPCLASS_CONTROL, ++ // EMIT, RESTART, FINAL ++ OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL, + // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP + OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, + OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, +@@ -119,9 +121,9 @@ const OpClass Target::operationClass[] = + // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, + OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL, +- // POPCNT, INSBF, EXTBF, BFIND; PERMT ++ // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT ++ OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, + OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, +- OPCLASS_BITFIELD, + // ATOM, BAR + OPCLASS_ATOMIC, OPCLASS_CONTROL, + // VADD, VAVG, VMIN, VMAX +@@ -136,10 +138,13 @@ const OpClass Target::operationClass[] = + OPCLASS_OTHER, + // BUFQ + OPCLASS_OTHER, ++ // WARPSYNC ++ OPCLASS_OTHER, + OPCLASS_PSEUDO // LAST + }; + + ++extern Target *getTargetGV100(unsigned int chipset); + extern Target *getTargetGM107(unsigned int chipset); + extern Target *getTargetNVC0(unsigned int chipset); + extern Target *getTargetNV50(unsigned int chipset); +@@ -149,6 +154,9 @@ Target *Target::create(unsigned int chipset) + STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1); + STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1); + switch (chipset & ~0xf) { ++ case 0x160: ++ case 0x140: ++ return getTargetGV100(chipset); + case 0x110: + case 0x120: + case 0x130: +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +index afeca14d7d1..0f7db116577 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h +@@ -200,7 +200,7 @@ public: + uint8_t dstMods; + uint16_t srcFiles[3]; + uint16_t dstFiles; +- unsigned int minEncSize : 4; ++ unsigned int minEncSize : 5; + unsigned int vector : 1; + unsigned int predicate : 1; + unsigned int commutative : 1; +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp +new file mode 100644 +index 00000000000..fd969e1ece5 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp +@@ -0,0 +1,594 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include "codegen/nv50_ir_target_gv100.h" ++#include "codegen/nv50_ir_lowering_gv100.h" ++#include "codegen/nv50_ir_emit_gv100.h" ++ ++namespace nv50_ir { ++ ++void ++TargetGV100::initOpInfo() ++{ ++ unsigned int i, j; ++ ++ static const operation commutative[] = ++ { ++ OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_MAX, OP_MIN, ++ OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT ++ }; ++ ++ static const operation noDest[] = ++ { ++ OP_EXIT ++ }; ++ ++ static const operation noPred[] = ++ { ++ }; ++ ++ for (i = 0; i < DATA_FILE_COUNT; ++i) ++ nativeFileMap[i] = (DataFile)i; ++ nativeFileMap[FILE_ADDRESS] = FILE_GPR; ++ nativeFileMap[FILE_FLAGS] = FILE_PREDICATE; ++ ++ for (i = 0; i < OP_LAST; ++i) { ++ opInfo[i].variants = NULL; ++ opInfo[i].op = (operation)i; ++ opInfo[i].srcTypes = 1 << (int)TYPE_F32; ++ opInfo[i].dstTypes = 1 << (int)TYPE_F32; ++ opInfo[i].immdBits = 0; ++ opInfo[i].srcNr = operationSrcNr[i]; ++ ++ for (j = 0; j < opInfo[i].srcNr; ++j) { ++ opInfo[i].srcMods[j] = 0; ++ opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR; ++ } ++ opInfo[i].dstMods = 0; ++ opInfo[i].dstFiles = 1 << (int)FILE_GPR; ++ ++ opInfo[i].hasDest = 1; ++ opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA); ++ opInfo[i].commutative = false; /* set below */ ++ opInfo[i].pseudo = (i < OP_MOV); ++ opInfo[i].predicate = !opInfo[i].pseudo; ++ opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN); ++ opInfo[i].minEncSize = 16; ++ } ++ for (i = 0; i < ARRAY_SIZE(commutative); ++i) ++ opInfo[commutative[i]].commutative = true; ++ for (i = 0; i < ARRAY_SIZE(noDest); ++i) ++ opInfo[noDest[i]].hasDest = 0; ++ for (i = 0; i < ARRAY_SIZE(noPred); ++i) ++ opInfo[noPred[i]].predicate = 0; ++} ++ ++struct opInfo { ++ struct { ++ uint8_t files; ++ uint8_t mods; ++ } src[3]; ++}; ++ ++#define SRC_NONE 0 ++#define SRC_R (1 << FILE_GPR) ++#define SRC_I (1 << FILE_MEMORY_CONST) ++#define SRC_C (1 << FILE_IMMEDIATE) ++#define SRC_RC (SRC_R | SRC_C) ++#define SRC_RI (SRC_R | SRC_I ) ++#define SRC_RIC (SRC_R | SRC_I | SRC_C) ++ ++#define MOD_NONE 0 ++#define MOD_NEG NV50_IR_MOD_NEG ++#define MOD_ABS NV50_IR_MOD_ABS ++#define MOD_NOT NV50_IR_MOD_NOT ++#define MOD_NA (MOD_NEG | MOD_ABS) ++ ++#define OPINFO(O,SA,MA,SB,MB,SC,MC) \ ++static struct opInfo \ ++opInfo_##O = { \ ++ .src = { { SRC_##SA, MOD_##MA }, \ ++ { SRC_##SB, MOD_##MB }, \ ++ { SRC_##SC, MOD_##MC }}, \ ++}; ++ ++ ++/* Handled by GV100LegalizeSSA. */ ++OPINFO(FABS , RIC , NA , NONE, NONE, NONE, NONE); ++OPINFO(FCMP , R , NONE, RIC , NONE, RIC , NONE); //XXX: use FSEL for mods ++OPINFO(FNEG , RIC , NA , NONE, NONE, NONE, NONE); ++OPINFO(FSET , R , NA , RIC , NA , NONE, NONE); ++OPINFO(ICMP , R , NONE, RIC , NONE, RIC , NONE); ++OPINFO(IMUL , R , NONE, RIC , NONE, NONE, NONE); ++OPINFO(INEG , RIC , NEG , NONE, NONE, NONE, NONE); ++OPINFO(ISET , R , NONE, RIC , NONE, NONE, NONE); ++OPINFO(LOP2 , R , NOT , RIC , NOT , NONE, NONE); ++OPINFO(NOT , RIC , NONE, NONE, NONE, NONE, NONE); ++OPINFO(SAT , RIC , NA , NONE, NONE, NONE, NONE); ++OPINFO(SHL , RIC , NONE, RIC , NONE, NONE, NONE); ++OPINFO(SHR , RIC , NONE, RIC , NONE, NONE, NONE); ++OPINFO(SUB , R , NONE, RIC , NEG , NONE, NONE); ++OPINFO(IMNMX , R , NONE, RIC , NONE, NONE, NONE); ++ ++/* Handled by CodeEmitterGV100. */ ++OPINFO(AL2P , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(ALD , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(AST , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(ATOM , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(ATOMS , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(BAR , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(BRA , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(BMSK , R , NONE, RIC , NONE, NONE, NONE); ++OPINFO(BREV , RIC , NONE, NONE, NONE, NONE, NONE); ++OPINFO(CCTL , NONE, NONE, NONE, NONE, NONE, NONE); ++//OPINFO(CS2R , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(DADD , R , NA , RIC , NA , NONE, NONE); ++OPINFO(DFMA , R , NA , RIC , NA , RIC , NA ); ++OPINFO(DMUL , R , NA , RIC , NA , NONE, NONE); ++OPINFO(DSETP , R , NA , RIC , NA , NONE, NONE); ++OPINFO(EXIT , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(F2F , RIC , NA , NONE, NONE, NONE, NONE); ++OPINFO(F2I , RIC , NA , NONE, NONE, NONE, NONE); ++OPINFO(FADD , R , NA , RIC , NA , NONE, NONE); ++OPINFO(FFMA , R , NA , RIC , NA , RIC , NA ); ++OPINFO(FLO , RIC , NOT , NONE, NONE, NONE, NONE); ++OPINFO(FMNMX , R , NA , RIC , NA , NONE, NONE); ++OPINFO(FMUL , R , NA , RIC , NA , NONE, NONE); ++OPINFO(FRND , RIC , NA , NONE, NONE, NONE, NONE); ++OPINFO(FSET_BF , R , NA , RIC , NA , NONE, NONE); ++OPINFO(FSETP , R , NA , RIC , NA , NONE, NONE); ++OPINFO(FSWZADD , R , NONE, R , NONE, NONE, NONE); ++OPINFO(I2F , RIC , NONE, NONE, NONE, NONE, NONE); ++OPINFO(IABS , RIC , NONE, NONE, NONE, NONE, NONE); ++OPINFO(IADD3 , R , NEG , RIC , NEG , R , NEG ); ++OPINFO(IMAD , R , NONE, RIC , NONE, RIC , NEG ); ++OPINFO(IMAD_WIDE, R , NONE, RIC , NONE, RC , NEG ); ++OPINFO(IPA , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(ISBERD , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(ISETP , R , NONE, RIC , NONE, NONE, NONE); ++OPINFO(KILL , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(LD , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(LDC , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(LDL , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(LDS , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(LEA , R , NEG , I , NONE, RIC , NEG ); ++OPINFO(LOP3_LUT , R , NONE, RIC , NONE, R , NONE); ++OPINFO(MEMBAR , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(MOV , RIC , NONE, NONE, NONE, NONE, NONE); ++OPINFO(MUFU , RIC , NA , NONE, NONE, NONE, NONE); ++OPINFO(NOP , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(OUT , R , NONE, RI , NONE, NONE, NONE); ++OPINFO(PIXLD , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(PLOP3_LUT, NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(POPC , RIC , NOT , NONE, NONE, NONE, NONE); ++OPINFO(PRMT , R , NONE, RIC , NONE, RIC , NONE); ++OPINFO(RED , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(SGXT , R , NONE, RIC , NONE, NONE, NONE); ++OPINFO(S2R , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(SEL , R , NONE, RIC , NONE, NONE, NONE); ++OPINFO(SHF , R , NONE, RIC , NONE, RIC , NONE); ++OPINFO(SHFL , R , NONE, R , NONE, R , NONE); ++OPINFO(ST , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(STL , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(STS , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(SUATOM , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(SULD , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(SUST , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(TEX , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(TLD , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(TLD4 , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(TMML , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(TXD , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(TXQ , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(VOTE , NONE, NONE, NONE, NONE, NONE, NONE); ++OPINFO(WARPSYNC , R , NONE, NONE, NONE, NONE, NONE); ++ ++static const struct opInfo * ++getOpInfo(const Instruction *i) ++{ ++ switch (i->op) { ++ case OP_ABS: ++ if (isFloatType(i->dType)) ++ return &opInfo_FABS; ++ return &opInfo_IABS; ++ case OP_ADD: ++ if (isFloatType(i->dType)) { ++ if (i->dType == TYPE_F32) ++ return &opInfo_FADD; ++ else ++ return &opInfo_DADD; ++ } else { ++ return &opInfo_IADD3; ++ } ++ break; ++ case OP_AFETCH: return &opInfo_AL2P; ++ case OP_AND: ++ case OP_OR: ++ case OP_XOR: ++ if (i->def(0).getFile() == FILE_PREDICATE) ++ return &opInfo_PLOP3_LUT; ++ return &opInfo_LOP2; ++ case OP_ATOM: ++ if (i->src(0).getFile() == FILE_MEMORY_SHARED) ++ return &opInfo_ATOMS; ++ else ++ if (!i->defExists(0) && i->subOp < NV50_IR_SUBOP_ATOM_CAS) ++ return &opInfo_RED; ++ else ++ return &opInfo_ATOM; ++ break; ++ case OP_BAR: return &opInfo_BAR; ++ case OP_BFIND: return &opInfo_FLO; ++ case OP_BMSK: return &opInfo_BMSK; ++ case OP_BREV: return &opInfo_BREV; ++ case OP_BRA: ++ case OP_JOIN: return &opInfo_BRA; //XXX ++ case OP_CCTL: return &opInfo_CCTL; ++ case OP_CEIL: ++ case OP_CVT: ++ case OP_FLOOR: ++ case OP_TRUNC: ++ if (i->op == OP_CVT && (i->def(0).getFile() == FILE_PREDICATE || ++ i->src(0).getFile() == FILE_PREDICATE)) { ++ return &opInfo_MOV; ++ } else if (isFloatType(i->dType)) { ++ if (isFloatType(i->sType)) { ++ if (i->sType == i->dType) ++ return &opInfo_FRND; ++ else ++ return &opInfo_F2F; ++ } else { ++ return &opInfo_I2F; ++ } ++ } else { ++ if (isFloatType(i->sType)) ++ return &opInfo_F2I; ++ } ++ break; ++ case OP_COS: ++ case OP_EX2: ++ case OP_LG2: ++ case OP_RCP: ++ case OP_RSQ: ++ case OP_SIN: ++ case OP_SQRT: return &opInfo_MUFU; ++ case OP_DISCARD: return &opInfo_KILL; ++ case OP_EMIT: ++ case OP_FINAL: ++ case OP_RESTART: return &opInfo_OUT; ++ case OP_EXIT: return &opInfo_EXIT; ++ case OP_EXPORT: return &opInfo_AST; ++ case OP_FMA: ++ case OP_MAD: ++ if (isFloatType(i->dType)) { ++ if (i->dType == TYPE_F32) ++ return &opInfo_FFMA; ++ else ++ return &opInfo_DFMA; ++ } else { ++ if (typeSizeof(i->dType) != 8) ++ return &opInfo_IMAD; ++ else ++ return &opInfo_IMAD_WIDE; ++ } ++ break; ++ case OP_JOINAT: return &opInfo_NOP; //XXX ++ case OP_LINTERP: return &opInfo_IPA; ++ case OP_LOAD: ++ switch (i->src(0).getFile()) { ++ case FILE_MEMORY_CONST : return &opInfo_LDC; ++ case FILE_MEMORY_LOCAL : return &opInfo_LDL; ++ case FILE_MEMORY_SHARED: return &opInfo_LDS; ++ case FILE_MEMORY_GLOBAL: return &opInfo_LD; ++ default: ++ break; ++ } ++ break; ++ case OP_LOP3_LUT: return &opInfo_LOP3_LUT; ++ case OP_MAX: ++ case OP_MIN: ++ if (isFloatType(i->dType)) { ++ if (i->dType == TYPE_F32) ++ return &opInfo_FMNMX; ++ } else { ++ return &opInfo_IMNMX; ++ } ++ break; ++ case OP_MEMBAR: return &opInfo_MEMBAR; ++ case OP_MOV: return &opInfo_MOV; ++ case OP_MUL: ++ if (isFloatType(i->dType)) { ++ if (i->dType == TYPE_F32) ++ return &opInfo_FMUL; ++ else ++ return &opInfo_DMUL; ++ } ++ return &opInfo_IMUL; ++ case OP_NEG: ++ if (isFloatType(i->dType)) ++ return &opInfo_FNEG; ++ return &opInfo_INEG; ++ case OP_NOT: return &opInfo_NOT; ++ case OP_PERMT: return &opInfo_PRMT; ++ case OP_PFETCH: return &opInfo_ISBERD; ++ case OP_PIXLD: return &opInfo_PIXLD; ++ case OP_POPCNT: return &opInfo_POPC; ++ case OP_QUADOP: return &opInfo_FSWZADD; ++ case OP_RDSV: ++#if 0 ++ if (targ->isCS2RSV(i->getSrc(0)->reg.data.sv.sv)) ++ return &opInfo_CS2R; ++#endif ++ return &opInfo_S2R; ++ case OP_SAT: return &opInfo_SAT; ++ case OP_SELP: return &opInfo_SEL; ++ case OP_SET: ++ case OP_SET_AND: ++ case OP_SET_OR: ++ case OP_SET_XOR: ++ if (i->def(0).getFile() != FILE_PREDICATE) { ++ if (isFloatType(i->dType)) { ++ if (i->dType == TYPE_F32) ++ return &opInfo_FSET_BF; ++ } else { ++ if (isFloatType(i->sType)) ++ return &opInfo_FSET; ++ return &opInfo_ISET; ++ } ++ } else { ++ if (isFloatType(i->sType)) ++ if (i->sType == TYPE_F64) ++ return &opInfo_DSETP; ++ else ++ return &opInfo_FSETP; ++ else ++ return &opInfo_ISETP; ++ } ++ break; ++ case OP_SGXT: return &opInfo_SGXT; ++ case OP_SHF: return &opInfo_SHF; ++ case OP_SHFL: return &opInfo_SHFL; ++ case OP_SHL: return &opInfo_SHL; ++ case OP_SHLADD: return &opInfo_LEA; ++ case OP_SHR: return &opInfo_SHR; ++ case OP_SLCT: ++ if (isFloatType(i->sType)) ++ return &opInfo_FCMP; ++ return &opInfo_ICMP; ++ case OP_STORE: ++ switch (i->src(0).getFile()) { ++ case FILE_MEMORY_LOCAL : return &opInfo_STL; ++ case FILE_MEMORY_SHARED: return &opInfo_STS; ++ case FILE_MEMORY_GLOBAL: return &opInfo_ST; ++ default: ++ break; ++ } ++ break; ++ case OP_SUB: return &opInfo_SUB; ++ case OP_SULDB: ++ case OP_SULDP: return &opInfo_SULD; ++ case OP_SUREDB: ++ case OP_SUREDP: return &opInfo_SUATOM; ++ case OP_SUSTB: ++ case OP_SUSTP: return &opInfo_SUST; ++ case OP_TEX: ++ case OP_TXB: ++ case OP_TXL: return &opInfo_TEX; ++ case OP_TXD: return &opInfo_TXD; ++ case OP_TXF: return &opInfo_TLD; ++ case OP_TXG: return &opInfo_TLD4; ++ case OP_TXLQ: return &opInfo_TMML; ++ case OP_TXQ: return &opInfo_TXQ; ++ case OP_VFETCH: return &opInfo_ALD; ++ case OP_VOTE: return &opInfo_VOTE; ++ case OP_WARPSYNC: return &opInfo_WARPSYNC; ++ default: ++ break; ++ } ++ return NULL; ++} ++ ++bool ++TargetGV100::isSatSupported(const Instruction *i) const ++{ ++ switch (i->dType) { ++ case TYPE_F32: ++ switch (i->op) { ++ case OP_ADD: ++ case OP_FMA: ++ case OP_MAD: ++ case OP_MUL: return true; ++ default: ++ break; ++ } ++ break; ++ default: ++ break; ++ } ++ return false; ++} ++ ++bool ++TargetGV100::isModSupported(const Instruction *i, int s, Modifier mod) const ++{ ++ const struct opInfo *info = nv50_ir::getOpInfo(i); ++ uint8_t mods = 0; ++ if (info && s < (int)ARRAY_SIZE(info->src)) ++ mods = info->src[s].mods; ++ return (mod & Modifier(mods)) == mod; ++} ++ ++bool ++TargetGV100::isOpSupported(operation op, DataType ty) const ++{ ++ if (op == OP_MAD || op == OP_FMA) ++ return true; ++ if (ty == TYPE_F32) { ++ if (op == OP_MAX) ++ return true; ++ } ++ if (op == OP_RSQ) ++ return true; ++ if (op == OP_SET || ++ op == OP_SET_AND || ++ op == OP_SET_OR || ++ op == OP_SET_XOR) ++ return true; ++ if (op == OP_SHLADD) ++ return true; ++ return false; ++} ++ ++bool ++TargetGV100::isBarrierRequired(const Instruction *i) const ++{ ++ switch (i->op) { ++ case OP_BREV: ++ return true; ++ default: ++ break; ++ } ++ ++ return TargetGM107::isBarrierRequired(i); ++} ++ ++bool ++TargetGV100::insnCanLoad(const Instruction *i, int s, ++ const Instruction *ld) const ++{ ++ const struct opInfo *info = nv50_ir::getOpInfo(i); ++ uint16_t files = 0; ++ ++ if (ld->src(0).getFile() == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0) ++ return (!i->isPseudo() && ++ !i->asTex() && ++ i->op != OP_EXPORT && i->op != OP_STORE); ++ ++ if (ld->src(0).isIndirect(0)) ++ return false; ++ ++ if (info && s < (int)ARRAY_SIZE(info->src)) { ++ files = info->src[s].files; ++ if ((s == 1 && i->srcExists(2) && i->src(2).getFile() != FILE_GPR) || ++ (s == 2 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR)) { ++ files &= ~(1 << FILE_MEMORY_CONST); ++ files &= ~(1 << FILE_IMMEDIATE); ++ } else ++ if ((i->op == OP_SHL || i->op == OP_SHR) && ++ ((s == 0 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR) || ++ (s == 1 && i->srcExists(0) && i->src(0).getFile() != FILE_GPR))) { ++ files &= ~(1 << FILE_MEMORY_CONST); ++ files &= ~(1 << FILE_IMMEDIATE); ++ } ++ } ++ ++ if (ld->src(0).getFile() == FILE_IMMEDIATE) { ++ if (i->sType == TYPE_F64) { ++ if (ld->getSrc(0)->asImm()->reg.data.u64 & 0x00000000ffffffff) ++ return false; ++ } ++ } ++ ++ return (files & (1 << ld->src(0).getFile())); ++} ++ ++void ++TargetGV100::getBuiltinCode(const uint32_t **code, uint32_t *size) const ++{ ++ //XXX: find out why gv100 (tu1xx is fine) hangs without this ++ static uint32_t builtin[] = { ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ 0x0000794d, 0x00000000, 0x03800000, 0x03ffde00, ++ }; ++ *code = builtin; ++ *size = sizeof(builtin); ++} ++ ++uint32_t ++TargetGV100::getBuiltinOffset(int builtin) const ++{ ++ return 0; ++} ++ ++bool ++TargetGV100::runLegalizePass(Program *prog, CGStage stage) const ++{ ++ if (stage == CG_STAGE_PRE_SSA) { ++ GM107LoweringPass pass1(prog); ++ GV100LoweringPass pass2(prog); ++ pass1.run(prog, false, true); ++ pass2.run(prog, false, true); ++ return true; ++ } else ++ if (stage == CG_STAGE_SSA) { ++ GV100LegalizeSSA pass(prog); ++ return pass.run(prog, false, true); ++ } else ++ if (stage == CG_STAGE_POST_RA) { ++ NVC0LegalizePostRA pass(prog); ++ return pass.run(prog, false, true); ++ } ++ return false; ++} ++ ++CodeEmitter * ++TargetGV100::getCodeEmitter(Program::Type type) ++{ ++ return new CodeEmitterGV100(this); ++} ++ ++TargetGV100::TargetGV100(unsigned int chipset) ++ : TargetGM107(chipset) ++{ ++ initOpInfo(); ++}; ++ ++Target *getTargetGV100(unsigned int chipset) ++{ ++ return new TargetGV100(chipset); ++} ++ ++}; +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h +new file mode 100644 +index 00000000000..897e6a22d30 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h +@@ -0,0 +1,52 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef __NV50_IR_TARGET_GV100_H__ ++#define __NV50_IR_TARGET_GV100_H__ ++#include "codegen/nv50_ir_target_gm107.h" ++ ++namespace nv50_ir { ++ ++class TargetGV100 : public TargetGM107 { ++public: ++ TargetGV100(unsigned int chipset); ++ ++ virtual CodeEmitter *getCodeEmitter(Program::Type); ++ ++ virtual bool runLegalizePass(Program *, CGStage stage) const; ++ ++ virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const; ++ virtual uint32_t getBuiltinOffset(int builtin) const; ++ ++ virtual bool insnCanLoad(const Instruction *, int, const Instruction *) const; ++ virtual bool isOpSupported(operation, DataType) const; ++ virtual bool isModSupported(const Instruction *, int s, Modifier) const; ++ virtual bool isSatSupported(const Instruction *) const; ++ ++ virtual bool isBarrierRequired(const Instruction *) const; ++ ++private: ++ void initOpInfo(); ++ void initProps(const struct opProperties *, int); ++}; ++ ++}; ++#endif +diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +index 60134b445db..ed5b343ccba 100644 +--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp ++++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp +@@ -30,7 +30,7 @@ Target *getTargetNVC0(unsigned int chipset) + } + + TargetNVC0::TargetNVC0(unsigned int card) : +- Target(card < 0x110, false, card >= 0xe4) ++ Target(card < 0x110, false, card >= 0xe4 && card < 0x140) + { + chipset = card; + initOpInfo(); +diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build +index 7a1d18a6394..68cfebdf20c 100644 +--- a/src/gallium/drivers/nouveau/meson.build ++++ b/src/gallium/drivers/nouveau/meson.build +@@ -150,17 +150,31 @@ files_libnouveau = files( + 'codegen/nv50_ir_util.cpp', + 'codegen/nv50_ir_util.h', + 'codegen/unordered_set.h', ++ 'codegen/nv50_ir_emit_gv100.cpp', ++ 'codegen/nv50_ir_emit_gv100.h', + 'codegen/nv50_ir_emit_gk110.cpp', + 'codegen/nv50_ir_emit_gm107.cpp', + 'codegen/nv50_ir_emit_nvc0.cpp', ++ 'codegen/nv50_ir_lowering_gv100.cpp', ++ 'codegen/nv50_ir_lowering_gv100.h', + 'codegen/nv50_ir_lowering_gm107.cpp', + 'codegen/nv50_ir_lowering_gm107.h', + 'codegen/nv50_ir_lowering_nvc0.cpp', + 'codegen/nv50_ir_lowering_nvc0.h', ++ 'codegen/nv50_ir_target_gv100.cpp', ++ 'codegen/nv50_ir_target_gv100.h', + 'codegen/nv50_ir_target_gm107.cpp', + 'codegen/nv50_ir_target_gm107.h', + 'codegen/nv50_ir_target_nvc0.cpp', + 'codegen/nv50_ir_target_nvc0.h', ++ 'nvc0/cla0c0qmd.h', ++ 'nvc0/clc0c0qmd.h', ++ 'nvc0/clc3c0qmd.h', ++ 'nvc0/drf.h', ++ 'nvc0/qmd.h', ++ 'nvc0/qmda0c0.c', ++ 'nvc0/qmdc0c0.c', ++ 'nvc0/qmdc3c0.c', + 'nvc0/gm107_texture.xml.h', + 'nvc0/nvc0_3d.xml.h', + 'nvc0/nvc0_compute.c', +diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c +index de9cce3812a..8606ba43c1a 100644 +--- a/src/gallium/drivers/nouveau/nouveau_screen.c ++++ b/src/gallium/drivers/nouveau/nouveau_screen.c +@@ -188,7 +188,11 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) + if (nv_dbg) + nouveau_mesa_debug = atoi(nv_dbg); + +- screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false); ++ if (dev->chipset < 0x140) ++ screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false); ++ else ++ screen->prefer_nir = true; ++ + screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false); + if (screen->force_enable_cl) + glsl_type_singleton_init_or_ref(); +diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h +index 899d73d7398..31e7cf82233 100644 +--- a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h ++++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h +@@ -218,9 +218,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define NV50_2D_PATTERN_SELECT_BITMAP_1X64 0x00000002 + #define NV50_2D_PATTERN_SELECT_COLOR 0x00000003 + +-#define NVC0_2D_UNK02B8(i0) (0x000002b8 + 0x4*(i0)) +-#define NVC0_2D_UNK02B8__ESIZE 0x00000004 +-#define NVC0_2D_UNK02B8__LEN 0x00000009 ++#define NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8 + + #define NVC0_2D_UNK2DC 0x000002dc + +diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h +index 664bfae9f64..fac195d4846 100644 +--- a/src/gallium/drivers/nouveau/nv_object.xml.h ++++ b/src/gallium/drivers/nouveau/nv_object.xml.h +@@ -195,6 +195,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define GM200_3D_CLASS 0x0000b197 + #define GP100_3D_CLASS 0x0000c097 + #define GP102_3D_CLASS 0x0000c197 ++#define GV100_3D_CLASS 0x0000c397 ++#define TU102_3D_CLASS 0x0000c597 + #define NV50_2D_CLASS 0x0000502d + #define NVC0_2D_CLASS 0x0000902d + #define NV50_COMPUTE_CLASS 0x000050c0 +@@ -207,6 +209,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define GM200_COMPUTE_CLASS 0x0000b1c0 + #define GP100_COMPUTE_CLASS 0x0000c0c0 + #define GP104_COMPUTE_CLASS 0x0000c1c0 ++#define GV100_COMPUTE_CLASS 0x0000c3c0 ++#define TU102_COMPUTE_CLASS 0x0000c5c0 + #define NV84_CRYPT_CLASS 0x000074c1 + #define BLOB_NVC0_PCOPY1_CLASS 0x000090b8 + #define BLOB_NVC0_PCOPY0_CLASS 0x000090b5 +diff --git a/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h +new file mode 100644 +index 00000000000..c0829f1cdc2 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h +@@ -0,0 +1,660 @@ ++/******************************************************************************* ++ Copyright (c) 2016 NVIDIA Corporation ++ ++ Permission is hereby granted, free of charge, to any person obtaining a copy ++ of this software and associated documentation files (the "Software"), to ++ deal in the Software without restriction, including without limitation the ++ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ sell copies of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ ++*******************************************************************************/ ++ ++/* AUTO GENERATED FILE -- DO NOT EDIT */ ++ ++#ifndef __CLA0C0QMD_H__ ++#define __CLA0C0QMD_H__ ++ ++/* ++** Queue Meta Data, Version 00_06 ++ */ ++ ++// The below C preprocessor definitions describe "multi-word" structures, where ++// fields may have bit numbers beyond 32. For example, MW(127:96) means ++// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)" ++// syntax is to distinguish from similar "X:Y" single-word definitions: the ++// macros historically used for single-word definitions would fail with ++// multi-word definitions. ++// ++// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel ++// interface layer of nvidia.ko for an example of how to manipulate ++// these MW(X:Y) definitions. ++ ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_A MW(30:0) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_B MW(31:31) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_C MW(62:32) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_D MW(63:63) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_E MW(94:64) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_F MW(95:95) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_G MW(126:96) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_H MW(127:127) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_A_A MW(159:128) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_I MW(191:160) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_J MW(196:192) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_A MW(199:197) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K MW(200:200) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L MW(201:201) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) ++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) ++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_B MW(207:204) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_M MW(222:208) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N MW(223:223) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_O MW(248:224) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_C MW(249:249) ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE MW(253:253) ++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE MW(254:254) ++#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) ++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_PROGRAM_OFFSET MW(287:256) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_P MW(319:288) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Q MW(327:320) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_D MW(335:328) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_R MW(351:336) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_S MW(357:352) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_E MW(365:358) ++#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE MW(366:366) ++#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 ++#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE MW(369:368) ++#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 ++#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 ++#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T MW(370:370) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U MW(371:371) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_THROTTLED MW(372:372) ++#define NVA0C0_QMDV00_06_THROTTLED_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_THROTTLED_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_A MW(376:376) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_B MW(377:377) ++#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT MW(378:378) ++#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT__32 0x00000000 ++#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 ++#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING MW(379:379) ++#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000 ++#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001 ++#define NVA0C0_QMDV00_06_SAMPLER_INDEX MW(382:382) ++#define NVA0C0_QMDV00_06_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 ++#define NVA0C0_QMDV00_06_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_E3_A MW(383:383) ++#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH MW(415:384) ++#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT MW(431:416) ++#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH MW(447:432) ++#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH_RESUME MW(479:448) ++#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT_RESUME MW(495:480) ++#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH_RESUME MW(511:496) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_V MW(535:512) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_F MW(542:536) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W MW(543:543) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_SHARED_MEMORY_SIZE MW(561:544) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_G MW(575:562) ++#define NVA0C0_QMDV00_06_QMD_VERSION MW(579:576) ++#define NVA0C0_QMDV00_06_QMD_MAJOR_VERSION MW(583:580) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_H MW(591:584) ++#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION0 MW(607:592) ++#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION1 MW(623:608) ++#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION2 MW(639:624) ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_I MW(668:648) ++#define NVA0C0_QMDV00_06_L1_CONFIGURATION MW(671:669) ++#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001 ++#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002 ++#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_X MW(703:672) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Y MW(735:704) ++#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_LOWER MW(767:736) ++#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_UPPER MW(775:768) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_J MW(783:776) ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP MW(790:788) ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_K MW(791:791) ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT MW(793:792) ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE MW(794:794) ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE MW(799:799) ++#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVA0C0_QMDV00_06_RELEASE0_PAYLOAD MW(831:800) ++#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_LOWER MW(863:832) ++#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_UPPER MW(871:864) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_L MW(879:872) ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP MW(886:884) ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVA0C0_QMDV00_06_QMD_RESERVED_M MW(887:887) ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT MW(889:888) ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE MW(890:890) ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE MW(895:895) ++#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVA0C0_QMDV00_06_RELEASE1_PAYLOAD MW(927:896) ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64)) ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64)) ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64)) ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64)) ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 ++#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64)) ++#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440) ++#define NVA0C0_QMDV00_06_QMD_RESERVED_N MW(1466:1464) ++#define NVA0C0_QMDV00_06_BARRIER_COUNT MW(1471:1467) ++#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472) ++#define NVA0C0_QMDV00_06_REGISTER_COUNT MW(1503:1496) ++#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504) ++#define NVA0C0_QMDV00_06_SASS_VERSION MW(1535:1528) ++#define NVA0C0_QMDV00_06_QMD_SPARE_A MW(1567:1536) ++#define NVA0C0_QMDV00_06_QMD_SPARE_B MW(1599:1568) ++#define NVA0C0_QMDV00_06_QMD_SPARE_C MW(1631:1600) ++#define NVA0C0_QMDV00_06_QMD_SPARE_D MW(1663:1632) ++#define NVA0C0_QMDV00_06_QMD_SPARE_E MW(1695:1664) ++#define NVA0C0_QMDV00_06_QMD_SPARE_F MW(1727:1696) ++#define NVA0C0_QMDV00_06_QMD_SPARE_G MW(1759:1728) ++#define NVA0C0_QMDV00_06_QMD_SPARE_H MW(1791:1760) ++#define NVA0C0_QMDV00_06_QMD_SPARE_I MW(1823:1792) ++#define NVA0C0_QMDV00_06_QMD_SPARE_J MW(1855:1824) ++#define NVA0C0_QMDV00_06_QMD_SPARE_K MW(1887:1856) ++#define NVA0C0_QMDV00_06_QMD_SPARE_L MW(1919:1888) ++#define NVA0C0_QMDV00_06_QMD_SPARE_M MW(1951:1920) ++#define NVA0C0_QMDV00_06_QMD_SPARE_N MW(1983:1952) ++#define NVA0C0_QMDV00_06_DEBUG_ID_UPPER MW(2015:1984) ++#define NVA0C0_QMDV00_06_DEBUG_ID_LOWER MW(2047:2016) ++ ++ ++/* ++** Queue Meta Data, Version 01_06 ++ */ ++ ++#define NVA0C0_QMDV01_06_OUTER_PUT MW(30:0) ++#define NVA0C0_QMDV01_06_OUTER_OVERFLOW MW(31:31) ++#define NVA0C0_QMDV01_06_OUTER_GET MW(62:32) ++#define NVA0C0_QMDV01_06_OUTER_STICKY_OVERFLOW MW(63:63) ++#define NVA0C0_QMDV01_06_INNER_GET MW(94:64) ++#define NVA0C0_QMDV01_06_INNER_OVERFLOW MW(95:95) ++#define NVA0C0_QMDV01_06_INNER_PUT MW(126:96) ++#define NVA0C0_QMDV01_06_INNER_STICKY_OVERFLOW MW(127:127) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_A_A MW(159:128) ++#define NVA0C0_QMDV01_06_SCHEDULER_NEXT_QMD_POINTER MW(191:160) ++#define NVA0C0_QMDV01_06_QMD_GROUP_ID MW(197:192) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_A MW(199:198) ++#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE MW(200:200) ++#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201) ++#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) ++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) ++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS MW(204:204) ++#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_QMD_RESERVED_B MW(207:205) ++#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_ADDR MW(222:208) ++#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID MW(223:223) ++#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_SIZE MW(248:224) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_C MW(249:249) ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE MW(253:253) ++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE MW(254:254) ++#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) ++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_PROGRAM_OFFSET MW(287:256) ++#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) ++#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_D MW(335:328) ++#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) ++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_ID MW(357:352) ++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) ++#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE MW(366:366) ++#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 ++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) ++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE MW(369:368) ++#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 ++#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 ++#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 ++#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS MW(370:370) ++#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) ++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_THROTTLED MW(372:372) ++#define NVA0C0_QMDV01_06_THROTTLED_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_THROTTLED_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR MW(376:376) ++#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_LEGACY 0x00000000 ++#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001 ++#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR MW(377:377) ++#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000 ++#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001 ++#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT MW(378:378) ++#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT__32 0x00000000 ++#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 ++#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING MW(379:379) ++#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000 ++#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001 ++#define NVA0C0_QMDV01_06_SAMPLER_INDEX MW(382:382) ++#define NVA0C0_QMDV01_06_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 ++#define NVA0C0_QMDV01_06_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 ++#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION MW(383:383) ++#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000 ++#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001 ++#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH MW(415:384) ++#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT MW(431:416) ++#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH MW(447:432) ++#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH_RESUME MW(479:448) ++#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT_RESUME MW(495:480) ++#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH_RESUME MW(511:496) ++#define NVA0C0_QMDV01_06_LAUNCH_QUOTA MW(535:512) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_F MW(542:536) ++#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE MW(543:543) ++#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_SHARED_MEMORY_SIZE MW(561:544) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_G MW(575:562) ++#define NVA0C0_QMDV01_06_QMD_VERSION MW(579:576) ++#define NVA0C0_QMDV01_06_QMD_MAJOR_VERSION MW(583:580) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_H MW(591:584) ++#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION0 MW(607:592) ++#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION1 MW(623:608) ++#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION2 MW(639:624) ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_QMD_RESERVED_I MW(668:648) ++#define NVA0C0_QMDV01_06_L1_CONFIGURATION MW(671:669) ++#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001 ++#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002 ++#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003 ++#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_LOWER MW(703:672) ++#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_UPPER MW(735:704) ++#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_LOWER MW(767:736) ++#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_UPPER MW(775:768) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_J MW(783:776) ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP MW(790:788) ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVA0C0_QMDV01_06_QMD_RESERVED_K MW(791:791) ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT MW(793:792) ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE MW(794:794) ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE MW(799:799) ++#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVA0C0_QMDV01_06_RELEASE0_PAYLOAD MW(831:800) ++#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_LOWER MW(863:832) ++#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_UPPER MW(871:864) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_L MW(879:872) ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP MW(886:884) ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVA0C0_QMDV01_06_QMD_RESERVED_M MW(887:887) ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT MW(889:888) ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE MW(890:890) ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE MW(895:895) ++#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVA0C0_QMDV01_06_RELEASE1_PAYLOAD MW(927:896) ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64)) ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64)) ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64)) ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64)) ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64)) ++#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_N MW(1466:1464) ++#define NVA0C0_QMDV01_06_BARRIER_COUNT MW(1471:1467) ++#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472) ++#define NVA0C0_QMDV01_06_REGISTER_COUNT MW(1503:1496) ++#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504) ++#define NVA0C0_QMDV01_06_SASS_VERSION MW(1535:1528) ++#define NVA0C0_QMDV01_06_HW_ONLY_INNER_GET MW(1566:1536) ++#define NVA0C0_QMDV01_06_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567) ++#define NVA0C0_QMDV01_06_HW_ONLY_INNER_PUT MW(1598:1568) ++#define NVA0C0_QMDV01_06_HW_ONLY_SCHEDULE_ON_PUT_UPDATE_ENABLE MW(1599:1599) ++#define NVA0C0_QMDV01_06_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(1606:1600) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_Q MW(1609:1607) ++#define NVA0C0_QMDV01_06_COALESCE_WAITING_PERIOD MW(1617:1610) ++#define NVA0C0_QMDV01_06_QMD_RESERVED_R MW(1631:1618) ++#define NVA0C0_QMDV01_06_QMD_SPARE_D MW(1663:1632) ++#define NVA0C0_QMDV01_06_QMD_SPARE_E MW(1695:1664) ++#define NVA0C0_QMDV01_06_QMD_SPARE_F MW(1727:1696) ++#define NVA0C0_QMDV01_06_QMD_SPARE_G MW(1759:1728) ++#define NVA0C0_QMDV01_06_QMD_SPARE_H MW(1791:1760) ++#define NVA0C0_QMDV01_06_QMD_SPARE_I MW(1823:1792) ++#define NVA0C0_QMDV01_06_QMD_SPARE_J MW(1855:1824) ++#define NVA0C0_QMDV01_06_QMD_SPARE_K MW(1887:1856) ++#define NVA0C0_QMDV01_06_QMD_SPARE_L MW(1919:1888) ++#define NVA0C0_QMDV01_06_QMD_SPARE_M MW(1951:1920) ++#define NVA0C0_QMDV01_06_QMD_SPARE_N MW(1983:1952) ++#define NVA0C0_QMDV01_06_DEBUG_ID_UPPER MW(2015:1984) ++#define NVA0C0_QMDV01_06_DEBUG_ID_LOWER MW(2047:2016) ++ ++ ++/* ++** Queue Meta Data, Version 01_07 ++ */ ++ ++#define NVA0C0_QMDV01_07_OUTER_PUT MW(30:0) ++#define NVA0C0_QMDV01_07_OUTER_OVERFLOW MW(31:31) ++#define NVA0C0_QMDV01_07_OUTER_GET MW(62:32) ++#define NVA0C0_QMDV01_07_OUTER_STICKY_OVERFLOW MW(63:63) ++#define NVA0C0_QMDV01_07_INNER_GET MW(94:64) ++#define NVA0C0_QMDV01_07_INNER_OVERFLOW MW(95:95) ++#define NVA0C0_QMDV01_07_INNER_PUT MW(126:96) ++#define NVA0C0_QMDV01_07_INNER_STICKY_OVERFLOW MW(127:127) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_A_A MW(159:128) ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_POINTER MW(191:160) ++#define NVA0C0_QMDV01_07_QMD_GROUP_ID MW(197:192) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_A MW(200:198) ++#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201) ++#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) ++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) ++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS MW(204:204) ++#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205) ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE MW(206:206) ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID 0x00000001 ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY MW(207:207) ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_QMD_RESERVED_B MW(223:208) ++#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE MW(248:224) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_C MW(249:249) ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE MW(253:253) ++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE MW(254:254) ++#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) ++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_PROGRAM_OFFSET MW(287:256) ++#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) ++#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_D MW(335:328) ++#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) ++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID MW(357:352) ++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) ++#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE MW(366:366) ++#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 ++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) ++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE MW(369:368) ++#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 ++#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 ++#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 ++#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS MW(370:370) ++#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) ++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_THROTTLED MW(372:372) ++#define NVA0C0_QMDV01_07_THROTTLED_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_THROTTLED_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR MW(376:376) ++#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY 0x00000000 ++#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001 ++#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR MW(377:377) ++#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000 ++#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001 ++#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT MW(378:378) ++#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32 0x00000000 ++#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 ++#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING MW(379:379) ++#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000 ++#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001 ++#define NVA0C0_QMDV01_07_SAMPLER_INDEX MW(382:382) ++#define NVA0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 ++#define NVA0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 ++#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION MW(383:383) ++#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000 ++#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001 ++#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH MW(415:384) ++#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT MW(431:416) ++#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH MW(447:432) ++#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME MW(479:448) ++#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME MW(495:480) ++#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME MW(511:496) ++#define NVA0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) ++#define NVA0C0_QMDV01_07_COALESCE_WAITING_PERIOD MW(529:522) ++#define NVA0C0_QMDV01_07_SHARED_MEMORY_SIZE MW(561:544) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_G MW(575:562) ++#define NVA0C0_QMDV01_07_QMD_VERSION MW(579:576) ++#define NVA0C0_QMDV01_07_QMD_MAJOR_VERSION MW(583:580) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_H MW(591:584) ++#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION0 MW(607:592) ++#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION1 MW(623:608) ++#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION2 MW(639:624) ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_QMD_RESERVED_I MW(668:648) ++#define NVA0C0_QMDV01_07_L1_CONFIGURATION MW(671:669) ++#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001 ++#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002 ++#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003 ++#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_LOWER MW(703:672) ++#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_UPPER MW(735:704) ++#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER MW(767:736) ++#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER MW(775:768) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_J MW(783:776) ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP MW(790:788) ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVA0C0_QMDV01_07_QMD_RESERVED_K MW(791:791) ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT MW(793:792) ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE MW(794:794) ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE MW(799:799) ++#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVA0C0_QMDV01_07_RELEASE0_PAYLOAD MW(831:800) ++#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER MW(863:832) ++#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER MW(871:864) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_L MW(879:872) ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP MW(886:884) ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVA0C0_QMDV01_07_QMD_RESERVED_M MW(887:887) ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT MW(889:888) ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE MW(890:890) ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE MW(895:895) ++#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVA0C0_QMDV01_07_RELEASE1_PAYLOAD MW(927:896) ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64)) ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64)) ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64)) ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64)) ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64)) ++#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_N MW(1466:1464) ++#define NVA0C0_QMDV01_07_BARRIER_COUNT MW(1471:1467) ++#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472) ++#define NVA0C0_QMDV01_07_REGISTER_COUNT MW(1503:1496) ++#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504) ++#define NVA0C0_QMDV01_07_SASS_VERSION MW(1535:1528) ++#define NVA0C0_QMDV01_07_HW_ONLY_INNER_GET MW(1566:1536) ++#define NVA0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567) ++#define NVA0C0_QMDV01_07_HW_ONLY_INNER_PUT MW(1598:1568) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_P MW(1599:1599) ++#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600) ++#define NVA0C0_QMDV01_07_QMD_RESERVED_Q MW(1630:1630) ++#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631) ++#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 ++#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 ++#define NVA0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632) ++#define NVA0C0_QMDV01_07_QMD_SPARE_E MW(1695:1664) ++#define NVA0C0_QMDV01_07_QMD_SPARE_F MW(1727:1696) ++#define NVA0C0_QMDV01_07_QMD_SPARE_G MW(1759:1728) ++#define NVA0C0_QMDV01_07_QMD_SPARE_H MW(1791:1760) ++#define NVA0C0_QMDV01_07_QMD_SPARE_I MW(1823:1792) ++#define NVA0C0_QMDV01_07_QMD_SPARE_J MW(1855:1824) ++#define NVA0C0_QMDV01_07_QMD_SPARE_K MW(1887:1856) ++#define NVA0C0_QMDV01_07_QMD_SPARE_L MW(1919:1888) ++#define NVA0C0_QMDV01_07_QMD_SPARE_M MW(1951:1920) ++#define NVA0C0_QMDV01_07_QMD_SPARE_N MW(1983:1952) ++#define NVA0C0_QMDV01_07_DEBUG_ID_UPPER MW(2015:1984) ++#define NVA0C0_QMDV01_07_DEBUG_ID_LOWER MW(2047:2016) ++ ++ ++ ++#endif // #ifndef __CLA0C0QMD_H__ +diff --git a/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h +new file mode 100644 +index 00000000000..040bdcd9dcb +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h +@@ -0,0 +1,665 @@ ++/******************************************************************************* ++ Copyright (c) 2016 NVIDIA Corporation ++ ++ Permission is hereby granted, free of charge, to any person obtaining a copy ++ of this software and associated documentation files (the "Software"), to ++ deal in the Software without restriction, including without limitation the ++ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ sell copies of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ ++*******************************************************************************/ ++ ++/* AUTO GENERATED FILE -- DO NOT EDIT */ ++ ++#ifndef __CLC0C0QMD_H__ ++#define __CLC0C0QMD_H__ ++ ++/* ++** Queue Meta Data, Version 01_07 ++ */ ++ ++// The below C preprocessor definitions describe "multi-word" structures, where ++// fields may have bit numbers beyond 32. For example, MW(127:96) means ++// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)" ++// syntax is to distinguish from similar "X:Y" single-word definitions: the ++// macros historically used for single-word definitions would fail with ++// multi-word definitions. ++// ++// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel ++// interface layer of nvidia.ko for an example of how to manipulate ++// these MW(X:Y) definitions. ++ ++#define NVC0C0_QMDV01_07_OUTER_PUT MW(30:0) ++#define NVC0C0_QMDV01_07_OUTER_OVERFLOW MW(31:31) ++#define NVC0C0_QMDV01_07_OUTER_GET MW(62:32) ++#define NVC0C0_QMDV01_07_OUTER_STICKY_OVERFLOW MW(63:63) ++#define NVC0C0_QMDV01_07_INNER_GET MW(94:64) ++#define NVC0C0_QMDV01_07_INNER_OVERFLOW MW(95:95) ++#define NVC0C0_QMDV01_07_INNER_PUT MW(126:96) ++#define NVC0C0_QMDV01_07_INNER_STICKY_OVERFLOW MW(127:127) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_A_A MW(159:128) ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_POINTER MW(191:160) ++#define NVC0C0_QMDV01_07_QMD_GROUP_ID MW(197:192) ++#define NVC0C0_QMDV01_07_SM_GLOBAL_CACHING_ENABLE MW(198:198) ++#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION MW(199:199) ++#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_IS_QUEUE MW(200:200) ++#define NVC0C0_QMDV01_07_IS_QUEUE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_IS_QUEUE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201) ++#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) ++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) ++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS MW(204:204) ++#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205) ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE MW(206:206) ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID 0x00000001 ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY MW(207:207) ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_QMD_RESERVED_B MW(223:208) ++#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE MW(248:224) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_C MW(249:249) ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE MW(253:253) ++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE MW(254:254) ++#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) ++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_PROGRAM_OFFSET MW(287:256) ++#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) ++#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_D MW(335:328) ++#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) ++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID MW(357:352) ++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) ++#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE MW(366:366) ++#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 ++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) ++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE MW(369:368) ++#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 ++#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 ++#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 ++#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS MW(370:370) ++#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) ++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_THROTTLED MW(372:372) ++#define NVC0C0_QMDV01_07_THROTTLED_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_THROTTLED_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR MW(376:376) ++#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY 0x00000000 ++#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE 0x00000001 ++#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR MW(377:377) ++#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO 0x00000000 ++#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE 0x00000001 ++#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT MW(378:378) ++#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32 0x00000000 ++#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 ++#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING MW(379:379) ++#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000 ++#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001 ++#define NVC0C0_QMDV01_07_SAMPLER_INDEX MW(382:382) ++#define NVC0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 ++#define NVC0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 ++#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION MW(383:383) ++#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS 0x00000000 ++#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS 0x00000001 ++#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH MW(415:384) ++#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT MW(431:416) ++#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH MW(447:432) ++#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME MW(479:448) ++#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME MW(495:480) ++#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME MW(511:496) ++#define NVC0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) ++#define NVC0C0_QMDV01_07_COALESCE_WAITING_PERIOD MW(529:522) ++#define NVC0C0_QMDV01_07_SHARED_MEMORY_SIZE MW(561:544) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_G MW(575:562) ++#define NVC0C0_QMDV01_07_QMD_VERSION MW(579:576) ++#define NVC0C0_QMDV01_07_QMD_MAJOR_VERSION MW(583:580) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_H MW(591:584) ++#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION0 MW(607:592) ++#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION1 MW(623:608) ++#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION2 MW(639:624) ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_QMD_RESERVED_I MW(668:648) ++#define NVC0C0_QMDV01_07_L1_CONFIGURATION MW(671:669) ++#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001 ++#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002 ++#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003 ++#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_LOWER MW(703:672) ++#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_UPPER MW(735:704) ++#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER MW(767:736) ++#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER MW(775:768) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_J MW(783:776) ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP MW(790:788) ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVC0C0_QMDV01_07_QMD_RESERVED_K MW(791:791) ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT MW(793:792) ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE MW(794:794) ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE MW(799:799) ++#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVC0C0_QMDV01_07_RELEASE0_PAYLOAD MW(831:800) ++#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER MW(863:832) ++#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER MW(871:864) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_L MW(879:872) ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP MW(886:884) ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVC0C0_QMDV01_07_QMD_RESERVED_M MW(887:887) ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT MW(889:888) ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE MW(890:890) ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE MW(895:895) ++#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVC0C0_QMDV01_07_RELEASE1_PAYLOAD MW(927:896) ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i) MW((959+(i)*64):(928+(i)*64)) ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i) MW((967+(i)*64):(960+(i)*64)) ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((973+(i)*64):(968+(i)*64)) ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i) MW((974+(i)*64):(974+(i)*64)) ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i) MW((991+(i)*64):(975+(i)*64)) ++#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE MW(1463:1440) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_N MW(1466:1464) ++#define NVC0C0_QMDV01_07_BARRIER_COUNT MW(1471:1467) ++#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(1495:1472) ++#define NVC0C0_QMDV01_07_REGISTER_COUNT MW(1503:1496) ++#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1527:1504) ++#define NVC0C0_QMDV01_07_SASS_VERSION MW(1535:1528) ++#define NVC0C0_QMDV01_07_HW_ONLY_INNER_GET MW(1566:1536) ++#define NVC0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567) ++#define NVC0C0_QMDV01_07_HW_ONLY_INNER_PUT MW(1598:1568) ++#define NVC0C0_QMDV01_07_HW_ONLY_SCG_TYPE MW(1599:1599) ++#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600) ++#define NVC0C0_QMDV01_07_QMD_RESERVED_Q MW(1630:1630) ++#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631) ++#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 ++#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 ++#define NVC0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632) ++#define NVC0C0_QMDV01_07_QMD_SPARE_E MW(1695:1664) ++#define NVC0C0_QMDV01_07_QMD_SPARE_F MW(1727:1696) ++#define NVC0C0_QMDV01_07_QMD_SPARE_G MW(1759:1728) ++#define NVC0C0_QMDV01_07_QMD_SPARE_H MW(1791:1760) ++#define NVC0C0_QMDV01_07_QMD_SPARE_I MW(1823:1792) ++#define NVC0C0_QMDV01_07_QMD_SPARE_J MW(1855:1824) ++#define NVC0C0_QMDV01_07_QMD_SPARE_K MW(1887:1856) ++#define NVC0C0_QMDV01_07_QMD_SPARE_L MW(1919:1888) ++#define NVC0C0_QMDV01_07_QMD_SPARE_M MW(1951:1920) ++#define NVC0C0_QMDV01_07_QMD_SPARE_N MW(1983:1952) ++#define NVC0C0_QMDV01_07_DEBUG_ID_UPPER MW(2015:1984) ++#define NVC0C0_QMDV01_07_DEBUG_ID_LOWER MW(2047:2016) ++ ++ ++/* ++** Queue Meta Data, Version 02_00 ++ */ ++ ++#define NVC0C0_QMDV02_00_OUTER_PUT MW(30:0) ++#define NVC0C0_QMDV02_00_OUTER_OVERFLOW MW(31:31) ++#define NVC0C0_QMDV02_00_OUTER_GET MW(62:32) ++#define NVC0C0_QMDV02_00_OUTER_STICKY_OVERFLOW MW(63:63) ++#define NVC0C0_QMDV02_00_INNER_GET MW(94:64) ++#define NVC0C0_QMDV02_00_INNER_OVERFLOW MW(95:95) ++#define NVC0C0_QMDV02_00_INNER_PUT MW(126:96) ++#define NVC0C0_QMDV02_00_INNER_STICKY_OVERFLOW MW(127:127) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_A_A MW(159:128) ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_POINTER MW(191:160) ++#define NVC0C0_QMDV02_00_QMD_GROUP_ID MW(197:192) ++#define NVC0C0_QMDV02_00_SM_GLOBAL_CACHING_ENABLE MW(198:198) ++#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION MW(199:199) ++#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_IS_QUEUE MW(200:200) ++#define NVC0C0_QMDV02_00_IS_QUEUE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_IS_QUEUE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(201:201) ++#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0 MW(202:202) ++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1 MW(203:203) ++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS MW(204:204) ++#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE MW(205:205) ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE MW(206:206) ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_GRID 0x00000001 ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY MW(207:207) ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_QMD_RESERVED_B MW(223:208) ++#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_SIZE MW(248:224) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_C MW(249:249) ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE MW(250:250) ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(251:251) ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE MW(252:252) ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE MW(253:253) ++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE MW(254:254) ++#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE MW(255:255) ++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_PROGRAM_OFFSET MW(287:256) ++#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) ++#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_D MW(335:328) ++#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) ++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_ID MW(357:352) ++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) ++#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE MW(366:366) ++#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 ++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) ++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE MW(369:368) ++#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 ++#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 ++#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 ++#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS MW(370:370) ++#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) ++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_THROTTLED MW(372:372) ++#define NVC0C0_QMDV02_00_THROTTLED_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_THROTTLED_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT MW(378:378) ++#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT__32 0x00000000 ++#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 ++#define NVC0C0_QMDV02_00_SAMPLER_INDEX MW(382:382) ++#define NVC0C0_QMDV02_00_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 ++#define NVC0C0_QMDV02_00_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 ++#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH MW(415:384) ++#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT MW(431:416) ++#define NVC0C0_QMDV02_00_QMD_RESERVED13A MW(447:432) ++#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH MW(463:448) ++#define NVC0C0_QMDV02_00_QMD_RESERVED14A MW(479:464) ++#define NVC0C0_QMDV02_00_QMD_RESERVED15A MW(511:480) ++#define NVC0C0_QMDV02_00_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) ++#define NVC0C0_QMDV02_00_COALESCE_WAITING_PERIOD MW(529:522) ++#define NVC0C0_QMDV02_00_SHARED_MEMORY_SIZE MW(561:544) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_G MW(575:562) ++#define NVC0C0_QMDV02_00_QMD_VERSION MW(579:576) ++#define NVC0C0_QMDV02_00_QMD_MAJOR_VERSION MW(583:580) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_H MW(591:584) ++#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION0 MW(607:592) ++#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION1 MW(623:608) ++#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION2 MW(639:624) ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_QMD_RESERVED_I MW(671:648) ++#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_LOWER MW(703:672) ++#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_UPPER MW(735:704) ++#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_LOWER MW(767:736) ++#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_UPPER MW(775:768) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_J MW(783:776) ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP MW(790:788) ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVC0C0_QMDV02_00_QMD_RESERVED_K MW(791:791) ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT MW(793:792) ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE MW(794:794) ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE MW(799:799) ++#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVC0C0_QMDV02_00_RELEASE0_PAYLOAD MW(831:800) ++#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_LOWER MW(863:832) ++#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_UPPER MW(871:864) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_L MW(879:872) ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP MW(886:884) ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVC0C0_QMDV02_00_QMD_RESERVED_M MW(887:887) ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT MW(889:888) ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE MW(890:890) ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE MW(895:895) ++#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVC0C0_QMDV02_00_RELEASE1_PAYLOAD MW(927:896) ++#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_N MW(954:952) ++#define NVC0C0_QMDV02_00_BARRIER_COUNT MW(959:955) ++#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960) ++#define NVC0C0_QMDV02_00_REGISTER_COUNT MW(991:984) ++#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992) ++#define NVC0C0_QMDV02_00_SASS_VERSION MW(1023:1016) ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64)) ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64)) ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64)) ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64)) ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64)) ++#define NVC0C0_QMDV02_00_HW_ONLY_INNER_GET MW(1566:1536) ++#define NVC0C0_QMDV02_00_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1567:1567) ++#define NVC0C0_QMDV02_00_HW_ONLY_INNER_PUT MW(1598:1568) ++#define NVC0C0_QMDV02_00_HW_ONLY_SCG_TYPE MW(1599:1599) ++#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1629:1600) ++#define NVC0C0_QMDV02_00_QMD_RESERVED_Q MW(1630:1630) ++#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1631:1631) ++#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 ++#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 ++#define NVC0C0_QMDV02_00_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1663:1632) ++#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH_RESUME MW(1695:1664) ++#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT_RESUME MW(1711:1696) ++#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH_RESUME MW(1727:1712) ++#define NVC0C0_QMDV02_00_QMD_SPARE_G MW(1759:1728) ++#define NVC0C0_QMDV02_00_QMD_SPARE_H MW(1791:1760) ++#define NVC0C0_QMDV02_00_QMD_SPARE_I MW(1823:1792) ++#define NVC0C0_QMDV02_00_QMD_SPARE_J MW(1855:1824) ++#define NVC0C0_QMDV02_00_QMD_SPARE_K MW(1887:1856) ++#define NVC0C0_QMDV02_00_QMD_SPARE_L MW(1919:1888) ++#define NVC0C0_QMDV02_00_QMD_SPARE_M MW(1951:1920) ++#define NVC0C0_QMDV02_00_QMD_SPARE_N MW(1983:1952) ++#define NVC0C0_QMDV02_00_DEBUG_ID_UPPER MW(2015:1984) ++#define NVC0C0_QMDV02_00_DEBUG_ID_LOWER MW(2047:2016) ++ ++ ++/* ++** Queue Meta Data, Version 02_01 ++ */ ++ ++#define NVC0C0_QMDV02_01_OUTER_PUT MW(30:0) ++#define NVC0C0_QMDV02_01_OUTER_OVERFLOW MW(31:31) ++#define NVC0C0_QMDV02_01_OUTER_GET MW(62:32) ++#define NVC0C0_QMDV02_01_OUTER_STICKY_OVERFLOW MW(63:63) ++#define NVC0C0_QMDV02_01_INNER_GET MW(94:64) ++#define NVC0C0_QMDV02_01_INNER_OVERFLOW MW(95:95) ++#define NVC0C0_QMDV02_01_INNER_PUT MW(126:96) ++#define NVC0C0_QMDV02_01_INNER_STICKY_OVERFLOW MW(127:127) ++#define NVC0C0_QMDV02_01_QMD_GROUP_ID MW(133:128) ++#define NVC0C0_QMDV02_01_SM_GLOBAL_CACHING_ENABLE MW(134:134) ++#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION MW(135:135) ++#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_IS_QUEUE MW(136:136) ++#define NVC0C0_QMDV02_01_IS_QUEUE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_IS_QUEUE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(137:137) ++#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0 MW(138:138) ++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1 MW(139:139) ++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS MW(140:140) ++#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE MW(141:141) ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE MW(142:142) ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_GRID 0x00000001 ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY MW(143:143) ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_QMD_RESERVED_B MW(159:144) ++#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_SIZE MW(184:160) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_C MW(185:185) ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE MW(186:186) ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(187:187) ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE MW(188:188) ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE MW(189:189) ++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE MW(190:190) ++#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE MW(191:191) ++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH_RESUME MW(223:192) ++#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT_RESUME MW(239:224) ++#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH_RESUME MW(255:240) ++#define NVC0C0_QMDV02_01_PROGRAM_OFFSET MW(287:256) ++#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) ++#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_D MW(335:328) ++#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) ++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_ID MW(357:352) ++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) ++#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE MW(366:366) ++#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 ++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) ++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE MW(369:368) ++#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 ++#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 ++#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 ++#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS MW(370:370) ++#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) ++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_THROTTLED MW(372:372) ++#define NVC0C0_QMDV02_01_THROTTLED_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_THROTTLED_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT MW(378:378) ++#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT__32 0x00000000 ++#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 ++#define NVC0C0_QMDV02_01_SAMPLER_INDEX MW(382:382) ++#define NVC0C0_QMDV02_01_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 ++#define NVC0C0_QMDV02_01_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 ++#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH MW(415:384) ++#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT MW(431:416) ++#define NVC0C0_QMDV02_01_QMD_RESERVED13A MW(447:432) ++#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH MW(463:448) ++#define NVC0C0_QMDV02_01_QMD_RESERVED14A MW(479:464) ++#define NVC0C0_QMDV02_01_DEPENDENT_QMD_POINTER MW(511:480) ++#define NVC0C0_QMDV02_01_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) ++#define NVC0C0_QMDV02_01_COALESCE_WAITING_PERIOD MW(529:522) ++#define NVC0C0_QMDV02_01_SHARED_MEMORY_SIZE MW(561:544) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_G MW(575:562) ++#define NVC0C0_QMDV02_01_QMD_VERSION MW(579:576) ++#define NVC0C0_QMDV02_01_QMD_MAJOR_VERSION MW(583:580) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_H MW(591:584) ++#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION0 MW(607:592) ++#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION1 MW(623:608) ++#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION2 MW(639:624) ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_QMD_RESERVED_I MW(671:648) ++#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_LOWER MW(703:672) ++#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_UPPER MW(735:704) ++#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_LOWER MW(767:736) ++#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_UPPER MW(775:768) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_J MW(783:776) ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP MW(790:788) ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVC0C0_QMDV02_01_QMD_RESERVED_K MW(791:791) ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT MW(793:792) ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE MW(794:794) ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE MW(799:799) ++#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVC0C0_QMDV02_01_RELEASE0_PAYLOAD MW(831:800) ++#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_LOWER MW(863:832) ++#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_UPPER MW(871:864) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_L MW(879:872) ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP MW(886:884) ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVC0C0_QMDV02_01_QMD_RESERVED_M MW(887:887) ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT MW(889:888) ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE MW(890:890) ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE MW(895:895) ++#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVC0C0_QMDV02_01_RELEASE1_PAYLOAD MW(927:896) ++#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_N MW(954:952) ++#define NVC0C0_QMDV02_01_BARRIER_COUNT MW(959:955) ++#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960) ++#define NVC0C0_QMDV02_01_REGISTER_COUNT MW(991:984) ++#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992) ++#define NVC0C0_QMDV02_01_SASS_VERSION MW(1023:1016) ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64)) ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64)) ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64)) ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64)) ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64)) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_R MW(1567:1536) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_S MW(1599:1568) ++#define NVC0C0_QMDV02_01_HW_ONLY_INNER_GET MW(1630:1600) ++#define NVC0C0_QMDV02_01_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1631:1631) ++#define NVC0C0_QMDV02_01_HW_ONLY_INNER_PUT MW(1662:1632) ++#define NVC0C0_QMDV02_01_HW_ONLY_SCG_TYPE MW(1663:1663) ++#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1693:1664) ++#define NVC0C0_QMDV02_01_QMD_RESERVED_Q MW(1694:1694) ++#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1695:1695) ++#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 ++#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 ++#define NVC0C0_QMDV02_01_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1727:1696) ++#define NVC0C0_QMDV02_01_QMD_SPARE_G MW(1759:1728) ++#define NVC0C0_QMDV02_01_QMD_SPARE_H MW(1791:1760) ++#define NVC0C0_QMDV02_01_QMD_SPARE_I MW(1823:1792) ++#define NVC0C0_QMDV02_01_QMD_SPARE_J MW(1855:1824) ++#define NVC0C0_QMDV02_01_QMD_SPARE_K MW(1887:1856) ++#define NVC0C0_QMDV02_01_QMD_SPARE_L MW(1919:1888) ++#define NVC0C0_QMDV02_01_QMD_SPARE_M MW(1951:1920) ++#define NVC0C0_QMDV02_01_QMD_SPARE_N MW(1983:1952) ++#define NVC0C0_QMDV02_01_DEBUG_ID_UPPER MW(2015:1984) ++#define NVC0C0_QMDV02_01_DEBUG_ID_LOWER MW(2047:2016) ++ ++ ++ ++#endif // #ifndef __CLC0C0QMD_H__ +diff --git a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h +new file mode 100644 +index 00000000000..588cc639d32 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h +@@ -0,0 +1,245 @@ ++/******************************************************************************* ++ Copyright (c) 2001-2010 NVIDIA Corporation ++ ++ Permission is hereby granted, free of charge, to any person obtaining a copy ++ of this software and associated documentation files (the "Software"), to ++ deal in the Software without restriction, including without limitation the ++ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or ++ sell copies of the Software, and to permit persons to whom the Software is ++ furnished to do so, subject to the following conditions: ++ ++ The above copyright notice and this permission notice shall be ++ included in all copies or substantial portions of the Software. ++ ++ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++ DEALINGS IN THE SOFTWARE. ++ ++*******************************************************************************/ ++ ++/* AUTO GENERATED FILE -- DO NOT EDIT */ ++ ++#ifndef __CLC3C0QMD_H__ ++#define __CLC3C0QMD_H__ ++ ++/* ++** Queue Meta Data, Version 02_02 ++ */ ++ ++// The below C preprocessor definitions describe "multi-word" structures, where ++// fields may have bit numbers beyond 32. For example, MW(127:96) means ++// the field is in bits 0-31 of word number 3 of the structure. The "MW(X:Y)" ++// syntax is to distinguish from similar "X:Y" single-word definitions: the ++// macros historically used for single-word definitions would fail with ++// multi-word definitions. ++// ++// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel ++// interface layer of nvidia.ko for an example of how to manipulate ++// these MW(X:Y) definitions. ++ ++#define NVC3C0_QMDV02_02_OUTER_PUT MW(30:0) ++#define NVC3C0_QMDV02_02_OUTER_OVERFLOW MW(31:31) ++#define NVC3C0_QMDV02_02_OUTER_GET MW(62:32) ++#define NVC3C0_QMDV02_02_OUTER_STICKY_OVERFLOW MW(63:63) ++#define NVC3C0_QMDV02_02_INNER_GET MW(94:64) ++#define NVC3C0_QMDV02_02_INNER_OVERFLOW MW(95:95) ++#define NVC3C0_QMDV02_02_INNER_PUT MW(126:96) ++#define NVC3C0_QMDV02_02_INNER_STICKY_OVERFLOW MW(127:127) ++#define NVC3C0_QMDV02_02_QMD_GROUP_ID MW(133:128) ++#define NVC3C0_QMDV02_02_SM_GLOBAL_CACHING_ENABLE MW(134:134) ++#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION MW(135:135) ++#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_IS_QUEUE MW(136:136) ++#define NVC3C0_QMDV02_02_IS_QUEUE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_IS_QUEUE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST MW(137:137) ++#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0 MW(138:138) ++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1 MW(139:139) ++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS MW(140:140) ++#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE MW(141:141) ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE MW(142:142) ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_QUEUE 0x00000000 ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_GRID 0x00000001 ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY MW(143:143) ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_QMD_RESERVED_B MW(159:144) ++#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_SIZE MW(184:160) ++#define NVC3C0_QMDV02_02_QMD_RESERVED_C MW(185:185) ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE MW(186:186) ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE MW(187:187) ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE MW(188:188) ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE MW(189:189) ++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE MW(190:190) ++#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE MW(191:191) ++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH_RESUME MW(223:192) ++#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT_RESUME MW(239:224) ++#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH_RESUME MW(255:240) ++#define NVC3C0_QMDV02_02_PROGRAM_OFFSET MW(287:256) ++#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_LOWER MW(319:288) ++#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_UPPER MW(327:320) ++#define NVC3C0_QMDV02_02_QMD_RESERVED_D MW(335:328) ++#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ENTRY_SIZE MW(351:336) ++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_ID MW(357:352) ++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE MW(365:358) ++#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE MW(366:366) ++#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_NONE 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR 0x00000001 ++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE MW(367:367) ++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE MW(369:368) ++#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_NONE 0x00000000 ++#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_SYSMEMBAR 0x00000001 ++#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_MEMBAR 0x00000003 ++#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS MW(370:370) ++#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE MW(371:371) ++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT MW(378:378) ++#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT__32 0x00000000 ++#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT_NO_CHECK 0x00000001 ++#define NVC3C0_QMDV02_02_SAMPLER_INDEX MW(382:382) ++#define NVC3C0_QMDV02_02_SAMPLER_INDEX_INDEPENDENTLY 0x00000000 ++#define NVC3C0_QMDV02_02_SAMPLER_INDEX_VIA_HEADER_INDEX 0x00000001 ++#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH MW(415:384) ++#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT MW(431:416) ++#define NVC3C0_QMDV02_02_QMD_RESERVED13A MW(447:432) ++#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH MW(463:448) ++#define NVC3C0_QMDV02_02_QMD_RESERVED14A MW(479:464) ++#define NVC3C0_QMDV02_02_DEPENDENT_QMD_POINTER MW(511:480) ++#define NVC3C0_QMDV02_02_QUEUE_ENTRIES_PER_CTA_MINUS_ONE MW(518:512) ++#define NVC3C0_QMDV02_02_COALESCE_WAITING_PERIOD MW(529:522) ++#define NVC3C0_QMDV02_02_SHARED_MEMORY_SIZE MW(561:544) ++#define NVC3C0_QMDV02_02_MIN_SM_CONFIG_SHARED_MEM_SIZE MW(568:562) ++#define NVC3C0_QMDV02_02_MAX_SM_CONFIG_SHARED_MEM_SIZE MW(575:569) ++#define NVC3C0_QMDV02_02_QMD_VERSION MW(579:576) ++#define NVC3C0_QMDV02_02_QMD_MAJOR_VERSION MW(583:580) ++#define NVC3C0_QMDV02_02_QMD_RESERVED_H MW(591:584) ++#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION0 MW(607:592) ++#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION1 MW(623:608) ++#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION2 MW(639:624) ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID(i) MW((640+(i)*1):(640+(i)*1)) ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_REGISTER_COUNT_V MW(656:648) ++#define NVC3C0_QMDV02_02_TARGET_SM_CONFIG_SHARED_MEM_SIZE MW(663:657) ++#define NVC3C0_QMDV02_02_FREE_CTA_SLOTS_EMPTY_SM MW(671:664) ++#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_LOWER MW(703:672) ++#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_UPPER MW(735:704) ++#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_LOWER MW(767:736) ++#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_UPPER MW(775:768) ++#define NVC3C0_QMDV02_02_QMD_RESERVED_J MW(783:776) ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP MW(790:788) ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_INC 0x00000003 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_AND 0x00000005 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_OR 0x00000006 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVC3C0_QMDV02_02_QMD_RESERVED_K MW(791:791) ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT MW(793:792) ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE MW(794:794) ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE MW(799:799) ++#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVC3C0_QMDV02_02_RELEASE0_PAYLOAD MW(831:800) ++#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_LOWER MW(863:832) ++#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_UPPER MW(871:864) ++#define NVC3C0_QMDV02_02_QMD_RESERVED_L MW(879:872) ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP MW(886:884) ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_ADD 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MIN 0x00000001 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MAX 0x00000002 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_INC 0x00000003 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_DEC 0x00000004 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_AND 0x00000005 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_OR 0x00000006 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_XOR 0x00000007 ++#define NVC3C0_QMDV02_02_QMD_RESERVED_M MW(887:887) ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT MW(889:888) ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_SIGNED_32 0x00000001 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE MW(890:890) ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE MW(895:895) ++#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS 0x00000000 ++#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_ONE_WORD 0x00000001 ++#define NVC3C0_QMDV02_02_RELEASE1_PAYLOAD MW(927:896) ++#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_LOW_SIZE MW(951:928) ++#define NVC3C0_QMDV02_02_QMD_RESERVED_N MW(954:952) ++#define NVC3C0_QMDV02_02_BARRIER_COUNT MW(959:955) ++#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_HIGH_SIZE MW(983:960) ++#define NVC3C0_QMDV02_02_REGISTER_COUNT MW(991:984) ++#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_CRS_SIZE MW(1015:992) ++#define NVC3C0_QMDV02_02_SASS_VERSION MW(1023:1016) ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_LOWER(i) MW((1055+(i)*64):(1024+(i)*64)) ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_UPPER(i) MW((1072+(i)*64):(1056+(i)*64)) ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_RESERVED_ADDR(i) MW((1073+(i)*64):(1073+(i)*64)) ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE(i) MW((1074+(i)*64):(1074+(i)*64)) ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_SIZE_SHIFTED4(i) MW((1087+(i)*64):(1075+(i)*64)) ++#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_LOWER MW(1567:1536) ++#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_UPPER MW(1584:1568) ++#define NVC3C0_QMDV02_02_QMD_RESERVED_S MW(1599:1585) ++#define NVC3C0_QMDV02_02_HW_ONLY_INNER_GET MW(1630:1600) ++#define NVC3C0_QMDV02_02_HW_ONLY_REQUIRE_SCHEDULING_PCAS MW(1631:1631) ++#define NVC3C0_QMDV02_02_HW_ONLY_INNER_PUT MW(1662:1632) ++#define NVC3C0_QMDV02_02_HW_ONLY_SCG_TYPE MW(1663:1663) ++#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX MW(1693:1664) ++#define NVC3C0_QMDV02_02_QMD_RESERVED_Q MW(1694:1694) ++#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID MW(1695:1695) ++#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE 0x00000000 ++#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE 0x00000001 ++#define NVC3C0_QMDV02_02_HW_ONLY_SKED_NEXT_QMD_POINTER MW(1727:1696) ++#define NVC3C0_QMDV02_02_QMD_SPARE_G MW(1759:1728) ++#define NVC3C0_QMDV02_02_QMD_SPARE_H MW(1791:1760) ++#define NVC3C0_QMDV02_02_QMD_SPARE_I MW(1823:1792) ++#define NVC3C0_QMDV02_02_QMD_SPARE_J MW(1855:1824) ++#define NVC3C0_QMDV02_02_QMD_SPARE_K MW(1887:1856) ++#define NVC3C0_QMDV02_02_QMD_SPARE_L MW(1919:1888) ++#define NVC3C0_QMDV02_02_QMD_SPARE_M MW(1951:1920) ++#define NVC3C0_QMDV02_02_QMD_SPARE_N MW(1983:1952) ++#define NVC3C0_QMDV02_02_DEBUG_ID_UPPER MW(2015:1984) ++#define NVC3C0_QMDV02_02_DEBUG_ID_LOWER MW(2047:2016) ++ ++ ++ ++#endif // #ifndef __CLC3C0QMD_H__ +diff --git a/src/gallium/drivers/nouveau/nvc0/drf.h b/src/gallium/drivers/nouveau/nvc0/drf.h +new file mode 100644 +index 00000000000..bf95c8c3185 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/drf.h +@@ -0,0 +1,119 @@ ++/* ++ * Copyright 2019 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef __NVHW_DRF_H__ ++#define __NVHW_DRF_H__ ++ ++/* Helpers common to all DRF accessors. */ ++#define DRF_LO(drf) (0 ? drf) ++#define DRF_HI(drf) (1 ? drf) ++#define DRF_BITS(drf) (DRF_HI(drf) - DRF_LO(drf) + 1) ++#define DRF_MASK(drf) (~0ULL >> (64 - DRF_BITS(drf))) ++#define DRF_SMASK(drf) (DRF_MASK(drf) << DRF_LO(drf)) ++ ++/* Helpers for DRF-MW accessors. */ ++#define DRF_MX_MW(drf) drf ++#define DRF_MX(drf) DRF_MX_##drf ++#define DRF_MW(drf) DRF_MX(drf) ++#define DRF_MW_SPANS(o,drf) (DRF_LW_IDX((o),drf) != DRF_HW_IDX((o),drf)) ++#define DRF_MW_SIZE(o) (sizeof((o)[0]) * 8) ++ ++#define DRF_LW_IDX(o,drf) (DRF_LO(DRF_MW(drf)) / DRF_MW_SIZE(o)) ++#define DRF_LW_LO(o,drf) (DRF_LO(DRF_MW(drf)) % DRF_MW_SIZE(o)) ++#define DRF_LW_HI(o,drf) (DRF_MW_SPANS((o),drf) ? (DRF_MW_SIZE(o) - 1) : DRF_HW_HI((o),drf)) ++#define DRF_LW_BITS(o,drf) (DRF_LW_HI((o),drf) - DRF_LW_LO((o),drf) + 1) ++#define DRF_LW_MASK(o,drf) (~0ULL >> (64 - DRF_LW_BITS((o),drf))) ++#define DRF_LW_SMASK(o,drf) (DRF_LW_MASK((o),drf) << DRF_LW_LO((o),drf)) ++#define DRF_LW_GET(o,drf) (((o)[DRF_LW_IDX((o),drf)] >> DRF_LW_LO((o),drf)) & DRF_LW_MASK((o),drf)) ++#define DRF_LW_VAL(o,drf,v) (((v) & DRF_LW_MASK((o),drf)) << DRF_LW_LO((o),drf)) ++#define DRF_LW_CLR(o,drf) ((o)[DRF_LW_IDX((o),drf)] & ~DRF_LW_SMASK((o),drf)) ++#define DRF_LW_SET(o,drf,v) (DRF_LW_CLR((o),drf) | DRF_LW_VAL((o),drf,(v))) ++ ++#define DRF_HW_IDX(o,drf) (DRF_HI(DRF_MW(drf)) / DRF_MW_SIZE(o)) ++#define DRF_HW_LO(o,drf) 0 ++#define DRF_HW_HI(o,drf) (DRF_HI(DRF_MW(drf)) % DRF_MW_SIZE(o)) ++#define DRF_HW_BITS(o,drf) (DRF_HW_HI((o),drf) - DRF_HW_LO((o),drf) + 1) ++#define DRF_HW_MASK(o,drf) (~0ULL >> (64 - DRF_HW_BITS((o),drf))) ++#define DRF_HW_SMASK(o,drf) (DRF_HW_MASK((o),drf) << DRF_HW_LO((o),drf)) ++#define DRF_HW_GET(o,drf) ((o)[DRF_HW_IDX(o,drf)] & DRF_HW_SMASK((o),drf)) ++#define DRF_HW_VAL(o,drf,v) (((long long)(v) >> DRF_LW_BITS((o),drf)) & DRF_HW_SMASK((o),drf)) ++#define DRF_HW_CLR(o,drf) ((o)[DRF_HW_IDX((o),drf)] & ~DRF_HW_SMASK((o),drf)) ++#define DRF_HW_SET(o,drf,v) (DRF_HW_CLR((o),drf) | DRF_HW_VAL((o),drf,(v))) ++ ++/* DRF accessors. */ ++#define NVVAL_X(drf,v) (((v) & DRF_MASK(drf)) << DRF_LO(drf)) ++#define NVVAL_N(X,d,r,f, v) NVVAL_X(d##_##r##_##f, (v)) ++#define NVVAL_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), (v)) ++#define NVVAL_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL ++#define NVVAL(A...) NVVAL_(X, ##A, NVVAL_I, NVVAL_N)(X, ##A) ++ ++#define NVDEF_N(X,d,r,f, v) NVVAL_X(d##_##r##_##f, d##_##r##_##f##_##v) ++#define NVDEF_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), d##_##r##_##f##_##v) ++#define NVDEF_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL ++#define NVDEF(A...) NVDEF_(X, ##A, NVDEF_I, NVDEF_N)(X, ##A) ++ ++#define NVVAL_GET_X(o,drf) (((o) >> DRF_LO(drf)) & DRF_MASK(drf)) ++#define NVVAL_GET_N(X,o,d,r,f ) NVVAL_GET_X(o, d##_##r##_##f) ++#define NVVAL_GET_I(X,o,d,r,f,i) NVVAL_GET_X(o, d##_##r##_##f(i)) ++#define NVVAL_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL ++#define NVVAL_GET(A...) NVVAL_GET_(X, ##A, NVVAL_GET_I, NVVAL_GET_N)(X, ##A) ++ ++#define NVVAL_SET_X(o,drf,v) (((o) & ~DRF_SMASK(drf)) | NVVAL_X(drf, (v))) ++#define NVVAL_SET_N(X,o,d,r,f, v) NVVAL_SET_X(o, d##_##r##_##f, (v)) ++#define NVVAL_SET_I(X,o,d,r,f,i,v) NVVAL_SET_X(o, d##_##r##_##f(i), (v)) ++#define NVVAL_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL ++#define NVVAL_SET(A...) NVVAL_SET_(X, ##A, NVVAL_SET_I, NVVAL_SET_N)(X, ##A) ++ ++#define NVDEF_SET_N(X,o,d,r,f, v) \ ++ NVVAL_SET_X(o, d##_##r##_##f, d##_##r##_##f##_##v) ++#define NVDEF_SET_I(X,o,d,r,f,i,v) \ ++ NVVAL_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v) ++#define NVDEF_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL ++#define NVDEF_SET(A...) NVDEF_SET_(X, ##A, NVDEF_SET_I, NVDEF_SET_N)(X, ##A) ++ ++/* DRF-MW accessors. */ ++#define NVVAL_MW_GET_X(o,drf) \ ++ ((DRF_MW_SPANS((o),drf) ? \ ++ (DRF_HW_GET((o),drf) << DRF_LW_BITS((o),drf)) : 0) | DRF_LW_GET((o),drf)) ++#define NVVAL_MW_GET_N(X,o,d,r,f ) NVVAL_MW_GET_X((o), d##_##r##_##f) ++#define NVVAL_MW_GET_I(X,o,d,r,f,i) NVVAL_MW_GET_X((o), d##_##r##_##f(i)) ++#define NVVAL_MW_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL ++#define NVVAL_MW_GET(A...) NVVAL_MW_GET_(X, ##A, NVVAL_MW_GET_I, NVVAL_MW_GET_N)(X, ##A) ++ ++#define NVVAL_MW_SET_X(o,drf,v) do { \ ++ (o)[DRF_LW_IDX((o),drf)] = DRF_LW_SET((o),drf,(v)); \ ++ if (DRF_MW_SPANS((o),drf)) \ ++ (o)[DRF_HW_IDX((o),drf)] = DRF_HW_SET((o),drf,(v)); \ ++} while(0) ++#define NVVAL_MW_SET_N(X,o,d,r,f, v) NVVAL_MW_SET_X((o), d##_##r##_##f, (v)) ++#define NVVAL_MW_SET_I(X,o,d,r,f,i,v) NVVAL_MW_SET_X((o), d##_##r##_##f(i), (v)) ++#define NVVAL_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL ++#define NVVAL_MW_SET(A...) \ ++ NVVAL_MW_SET_(X, ##A, NVVAL_MW_SET_I, NVVAL_MW_SET_N)(X, ##A) ++ ++#define NVDEF_MW_SET_N(X,o,d,r,f, v) \ ++ NVVAL_MW_SET_X(o, d##_##r##_##f, d##_##r##_##f##_##v) ++#define NVDEF_MW_SET_I(X,o,d,r,f,i,v) \ ++ NVVAL_MW_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v) ++#define NVDEF_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL ++#define NVDEF_MW_SET(A...) \ ++ NVDEF_MW_SET_(X, ##A, NVDEF_MW_SET_I, NVDEF_MW_SET_N)(X, ##A) ++#endif +diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h +new file mode 100644 +index 00000000000..390741cbd04 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h +@@ -0,0 +1,904 @@ ++#define NV_MME_PRED_MODE_UUUU 0 ++#define NV_MME_PRED_MODE_TTTT 1 ++#define NV_MME_PRED_MODE_FFFF 2 ++#define NV_MME_PRED_MODE_TTUU 3 ++#define NV_MME_PRED_MODE_FFUU 4 ++#define NV_MME_PRED_MODE_TFUU 5 ++#define NV_MME_PRED_MODE_TUUU 6 ++#define NV_MME_PRED_MODE_FUUU 7 ++#define NV_MME_PRED_MODE_UUTT 8 ++#define NV_MME_PRED_MODE_UUTF 9 ++#define NV_MME_PRED_MODE_UUTU 10 ++#define NV_MME_PRED_MODE_UUFT 11 ++#define NV_MME_PRED_MODE_UUFF 12 ++#define NV_MME_PRED_MODE_UUFU 13 ++#define NV_MME_PRED_MODE_UUUT 14 ++#define NV_MME_PRED_MODE_UUUF 15 ++ ++#define NV_MME_REG_R0 0 ++#define NV_MME_REG_R1 1 ++#define NV_MME_REG_R2 2 ++#define NV_MME_REG_R3 3 ++#define NV_MME_REG_R4 4 ++#define NV_MME_REG_R5 5 ++#define NV_MME_REG_R6 6 ++#define NV_MME_REG_R7 7 ++#define NV_MME_REG_R8 8 ++#define NV_MME_REG_R9 9 ++#define NV_MME_REG_R10 10 ++#define NV_MME_REG_R11 11 ++#define NV_MME_REG_R12 12 ++#define NV_MME_REG_R13 13 ++#define NV_MME_REG_R14 14 ++#define NV_MME_REG_R15 15 ++#define NV_MME_REG_R16 16 ++#define NV_MME_REG_R17 17 ++#define NV_MME_REG_R18 18 ++#define NV_MME_REG_R19 19 ++#define NV_MME_REG_R20 20 ++#define NV_MME_REG_R21 21 ++#define NV_MME_REG_R22 22 ++#define NV_MME_REG_R23 23 ++#define NV_MME_REG_ZERO 24 ++#define NV_MME_REG_IMMED 25 ++#define NV_MME_REG_IMMEDPAIR 26 ++#define NV_MME_REG_IMMED32 27 ++#define NV_MME_REG_LOAD0 28 ++#define NV_MME_REG_LOAD1 29 ++ ++#define NV_MME_ALU_ADD 0 ++#define NV_MME_ALU_ADDC 1 ++#define NV_MME_ALU_SUB 2 ++#define NV_MME_ALU_SUBB 3 ++#define NV_MME_ALU_MUL 4 ++#define NV_MME_ALU_MULH 5 ++#define NV_MME_ALU_MULU 6 ++#define NV_MME_ALU_EXTENDED 7 ++#define NV_MME_ALU_CLZ 8 ++#define NV_MME_ALU_SLL 9 ++#define NV_MME_ALU_SRL 10 ++#define NV_MME_ALU_SRA 11 ++#define NV_MME_ALU_AND 12 ++#define NV_MME_ALU_NAND 13 ++#define NV_MME_ALU_OR 14 ++#define NV_MME_ALU_XOR 15 ++#define NV_MME_ALU_MERGE 16 ++#define NV_MME_ALU_SLT 17 ++#define NV_MME_ALU_SLTU 18 ++#define NV_MME_ALU_SLE 19 ++#define NV_MME_ALU_SLEU 20 ++#define NV_MME_ALU_SEQ 21 ++#define NV_MME_ALU_STATE 22 ++#define NV_MME_ALU_LOOP 23 ++#define NV_MME_ALU_JAL 24 ++#define NV_MME_ALU_BLT 25 ++#define NV_MME_ALU_BLTU 26 ++#define NV_MME_ALU_BLE 27 ++#define NV_MME_ALU_BLEU 28 ++#define NV_MME_ALU_BEQ 29 ++#define NV_MME_ALU_DREAD 30 ++#define NV_MME_ALU_DWRITE 31 ++ ++#define NV_MME_OUT_NONE 0 ++#define NV_MME_OUT_ALU0 1 ++#define NV_MME_OUT_ALU1 2 ++#define NV_MME_OUT_LOAD0 3 ++#define NV_MME_OUT_LOAD1 4 ++#define NV_MME_OUT_IMMED0 5 ++#define NV_MME_OUT_IMMED1 6 ++#define NV_MME_OUT_RESERVED 7 ++#define NV_MME_OUT_IMMEDHIGH0 8 ++#define NV_MME_OUT_IMMEDHIGH1 9 ++#define NV_MME_OUT_IMMED32_0 10 ++ ++#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1) \ ++ ((e1) << (92 - 64) | (m1) << (89 - 64) | \ ++ (e0) << (85 - 64) | (m0) << (82 - 64) | \ ++ (i1) << (66 - 64) | (b1) >> (64 - 61)), \ ++ (((b1) & 7) << (61 - 32) | (a1) << (56 - 32) | \ ++ (d1) << (51 - 32) | (o1) << (46 - 32) | \ ++ (i0) >> (32 - 30)), \ ++ (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 | \ ++ (pr) << 5 | (pm) << 1 | (en)) ++ ++#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1) \ ++ MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO, \ ++ NV_MME_ALU_##o0, NV_MME_REG_##d0, \ ++ NV_MME_REG_##a0, NV_MME_REG_##b0, (i0), \ ++ NV_MME_ALU_##o1, NV_MME_REG_##d1, \ ++ NV_MME_REG_##a1, NV_MME_REG_##b1, (i1), \ ++ NV_MME_OUT_##m0, NV_MME_OUT_##e0, \ ++ NV_MME_OUT_##m1, NV_MME_OUT_##e1) ++ ++uint32_t mmec597_per_instance_bf[] = { ++// r1 = load(); // count ++// r3 = load(); // mask ++// mthd(0x1880, 1); // VERTEX_ARRAY_PER_INSTANCE[0] ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x1880/4, IMMED0, NONE, ++ ADD, R3, LOAD1, ZERO, 0, NONE, NONE), ++// while (HW_LOOP_COUNT < r1) { ++// send(r3 & 1); ++// r3 >>= 1; ++// } ++ MME_INSN(0, LOOP, ZERO, R1, ZERO, 0x0003, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, AND, ZERO, R3, IMMED, 1, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, SRL, R3, R3, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_vertex_array_select[] = { ++// r1 = load(); // array ++// r2 = load(); // limit hi ++// r3 = load(); // limit lo ++// r4 = load(); // start hi ++// r5 = load(); // start lo ++// r6 = (r1 & 0x1f) << 2; ++// r7 = (r1 & 0x1f) << 1; ++// mthd(0x1c04 + r6, 1); // VERTEX_ARRAY_START_HIGH[] ++// send(r4); ++// send(r5); ++// mthd(0x0600 + r7, 1); // VERTEX_ARRAY_LIMIT_HIGH[] ++// send(r2); ++// send(r3); ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R2, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R4, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE, ++ MERGE, R6, ZERO, R1, (2<<10)|(5<<5)|0, NONE, NONE), ++ MME_INSN(0, MERGE, R7, ZERO, R1, (1<<10)|(5<<5)|0, ALU1, NONE, ++ ADD, ZERO, R6, IMMED, (1<<12)|0x1c04/4, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, R5, ZERO, 0, NONE, ALU1), ++ MME_INSN(1, ADD, ZERO, R7, IMMED, (1<<12)|0x0600/4, ALU0, ALU1, ++ ADD, ZERO, R2, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_blend_enables[] = { ++// r1 = load(); // enable mask ++// mthd(0x1360, 1); // NVC0_3D_BLEND_ENABLE[] ++// send((r1 >> 0) & 1); ++// send((r1 >> 1) & 1); ++// send((r1 >> 2) & 1); ++// send((r1 >> 3) & 1); ++// send((r1 >> 4) & 1); ++// send((r1 >> 5) & 1); ++// send((r1 >> 6) & 1); ++// send((r1 >> 7) & 1); ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, IMMED1, NONE, ++ ADD, ZERO, ZERO, ZERO, (1<<12)|0x1360/4, NONE, NONE), ++ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|0, NONE, ALU0, ++ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|1, NONE, ALU1), ++ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|2, NONE, ALU0, ++ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|3, NONE, ALU1), ++ MME_INSN(1, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|4, NONE, ALU0, ++ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|5, NONE, ALU1), ++ MME_INSN(0, MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|6, NONE, ALU0, ++ MERGE, ZERO, ZERO, R1, (0<<10)|(1<<5)|7, NONE, ALU1), ++}; ++ ++uint32_t mmec597_poly_mode_front[] = { ++// r1 = load(); ++// mthd(0x0dac,0); // POLYGON_MODE_FRONT ++// send(r1); ++// r2 = read(0x0db0); // POLYGON_MODE_BACK ++// r3 = read(0x20c0); // SP_SELECT[3] ++// r7 = r1 | r2; ++// r4 = read(0x2100); // SP_SELECT[4] ++// r6 = 0x60; ++// r7 = r7 & 1; ++// if (r7 != 0) ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0dac/4, IMMED0, ALU0, ++ STATE, R2, IMMED, ZERO, 0x0db0/4, NONE, NONE), ++ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE, ++ OR, R7, R1, R2, 0, NONE, NONE), ++ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, ++ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), ++ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = 0x200; ++ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r7 = r3 | r4; ++// r7 = r7 & 1; ++// if (r7 != 0) ++ MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = 0; ++ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x02ec, 0); ++// send(r6); ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_poly_mode_back[] = { ++// r1 = load(); ++// mthd(0x0db0,0); // POLYGON_MODE_BACK ++// send(r1); ++// r2 = read(0x0dac); // POLYGON_MODE_FRONT ++// r3 = read(0x20c0); // SP_SELECT[3] ++// r7 = r1 | r2; ++// r4 = read(0x2100); // SP_SELECT[4] ++// r6 = 0x60; ++// r7 = r7 & 1; ++// if (r7 != 0) ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x0db0/4, IMMED0, ALU0, ++ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), ++ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x20c0/4, NONE, NONE, ++ OR, R7, R1, R2, 0, NONE, NONE), ++ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, ++ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), ++ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = 0x200; ++ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r7 = r3 | r4; ++// r7 = r7 & 1; ++// if (r7 != 0) ++ MME_INSN(0, OR, R7, R3, R4, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = 0; ++ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x02ec, 0); ++// send(r6); ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_gp_select[] = { ++// r1 = load(); ++// mthd(0x2100,0); // SP_SELECT[4] ++// send(r1); ++// r2 = read(0x0dac); // POLYGON_MODE_FRONT ++// r3 = read(0x0db0); // POLYGON_MODE_BACK ++// r7 = r2 | r3; ++// r4 = read(0x20c0); // SP_SELECT[3] ++// r6 = 0x60; ++// r7 = r7 & 1; ++// if (r7 != 0) ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x2100/4, IMMED0, ALU0, ++ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), ++ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE, ++ OR, R7, R2, R3, 0, NONE, NONE), ++ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x20c0/4, NONE, NONE, ++ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), ++ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = 0x200; ++ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r7 = r1 | r4; ++// r7 = r7 & 1; ++// if (r7 != 0) ++ MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = 0; ++ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x02ec, 0); ++// send(r6); ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_tep_select[] = { ++// r1 = load(); ++// mthd(0x20c0,0); // SP_SELECT[3] ++// send(r1); ++// r2 = read(0x0dac); // POLYGON_MODE_FRONT ++// r3 = read(0x0db0); // POLYGON_MODE_BACK ++// r7 = r2 | r3; ++// r4 = read(0x2100); // SP_SELECT[4] ++// r6 = 0x60; ++// r7 = r7 & 1; ++// if (r7 != 0) ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, (0<<12)|0x20c0/4, IMMED0, ALU0, ++ STATE, R2, IMMED, ZERO, 0x0dac/4, NONE, NONE), ++ MME_INSN(0, STATE, R3, IMMED, ZERO, 0x0db0/4, NONE, NONE, ++ OR, R7, R2, R3, 0, NONE, NONE), ++ MME_INSN(0, STATE, R4, IMMED, ZERO, 0x2100/4, NONE, NONE, ++ ADD, R6, IMMED, ZERO, 0x60, NONE, NONE), ++ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = 0x200; ++ MME_INSN(0, ADD, R6, IMMED, ZERO, 0x200, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r7 = r1 | r4; ++// r7 = r7 & 1; ++// if (r7 != 0) ++ MME_INSN(0, OR, R7, R1, R4, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, AND, R7, R7, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R7, ZERO, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = 0; ++ MME_INSN(0, ADD, R6, ZERO, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x02ec, 0); ++// send(r6); ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x02ec/4, IMMED0, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R6, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_draw_arrays_indirect[] = { ++// r1 = load(); // mode ++// r5 = read(0x1438); // VB_INSTANCE_BASE ++// r6 = load(); // start_drawid ++// r7 = load(); // numparams ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R6, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, ++ STATE, R5, IMMED, ZERO, 0x1438/4, NONE, NONE), ++// while (HW_LOOP_COUNT < r7) { ++// r2 = load(); // count ++// r3 = load(); // instance_count ++// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST ++// send(load()); // start ++// r4 = load(); // start_instance ++// if (r3) { ++ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000c, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE, ++ ADD, R3, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, ++ ADD, R4, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x238c, 1); // CB_POS ++// send(256 + 160); ++// send(0); // base_vertex ++// send(r4); // start_instance ++// send(r6); // draw id ++// mthd(0x1438, 0); // VB_INSTANCE_BASE ++// send(r4); ++// r1 = r1 & ~(1<<26); // clear INSTANCE_NEXT ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, ++ ADD, ZERO, ZERO, ZERO, 256 + 160, NONE, ALU0), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, R6, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0, ++ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), ++// do { ++// mthd(0x1618, 0); // VERTEX_BEGIN_GL ++// send(r1); // mode ++// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT ++// send(r2); // count ++// mthd(0x1614, 0); // VERTEX_END_GL ++// send(0); ++// r1 |= (1<<26); // set INSTANCE_NEXT ++// } while(--r3); ++// } ++ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, ++ ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, ++ ADD, R4, IMMED, ZERO, 1, NONE, NONE), ++ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, ++ SUB, R3, R3, IMMED, 1, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = r6 + 1; ++// }; ++ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x1438, 0); // restore VB_INSTANCE_BASE ++// send(r5); ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_draw_elts_indirect[] = { ++// r1 = load(); // mode ++// r8 = read(0x1434); // VB_ELEMENT_BASE ++// r9 = read(0x1438); // VB_INSTANCE_BASE ++// r6 = load(); // start_drawid ++// r7 = load(); // numparams ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, ++ STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE), ++ MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE, ++ ADD, R6, LOAD0, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// while (HW_LOOP_COUNT < r7) { ++// r3 = load(); // count ++// r2 = load(); // instance_count ++// mthd(0x17dc, 0); // INDEX_BATCH_FIRST ++// send(load()); // start ++// r4 = load(); // index_bias ++// mthd(0x238c, 1); // CB_POS ++// send(256 + 160); ++// send(r4); // index_bias ++// r5 = load(); // start_instance ++// if (r2) { ++ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x000d, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0x17dc/4, IMMED0, NONE, ++ ADD, R2, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, ++ ADD, R4, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, ++ ADD, ZERO, R4, ZERO, 256 + 160, NONE, ALU1), ++ MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE, ++ ADD, R5, LOAD0, ZERO, 0, NONE, NONE), ++// send(r5); // start_instance ++// send(r6); // draw_id ++// mthd(0x1434, 1); // VB_ELEMENT_BASE ++// send(r4); // index_bias ++// send(r5); // start_instance ++// mthd(0x1118, 0); // VERTEX_ID_BASE ++// send(r4); // index_bias ++// r1 &= ~(1 << 26); // clear INSTANCE_NEXT ++ MME_INSN(0, ADD, ZERO, R5, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, R6, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, ++ ADD, ZERO, R5, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1118/4, IMMED0, ALU0, ++ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), ++// do { ++// mthd(0x1618, 0); // VERTEX_BEGIN_GL ++// send(r1); // mode ++// mthd(0x17e0, 0); // INDEX_BATCH_COUNT ++// send(r3); // count ++// mthd(0x1614, 0); // VERTEX_END_GL ++// send(0); ++// r1 |= (1 << 26); // set INSTANCE_NEXT ++// } while (--r2); ++// } ++ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, ++ ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, ++ ADD, R4, IMMED, ZERO, 1, NONE, NONE), ++ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, ++ SUB, R2, R2, IMMED, 1, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = r6 + 1; ++// }; ++ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x1434, 1); ++// send(r8); // restore VB_ELEMENT_BASE ++// send(r9); // restore VB_INSTANCE_BASE ++// mthd(0x1118, 0); ++// send(r8); // restore VERTEX_ID_BASE ++ MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, ++ ADD, ZERO, R9, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R8, ZERO, 0x1118/4, IMMED0, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_draw_arrays_indirect_count[] = { ++// r1 = load(); // mode ++// r6 = load(); // start_drawid ++// r7 = load(); // numparams ++// r5 = load(); // totaldraws ++// r8 = read(0x1438); // VB_INSTANCE_BASE ++// r5 = r5 - r6; // remaining draws ++// if (r5 > r7) ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R6, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R5, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1438/4, NONE, NONE, ++ SUB, R5, R5, R6, 0, NONE, NONE), ++ MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r5 = r7; ++ MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// if (r5 >= 0) { ++ MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000e, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// while (HW_LOOP_COUNT < r5) { ++// r2 = load(); // count ++// r3 = load(); // instance_count ++// mthd(0x0d74, 0); // VERTEX_BUFFER_FIRST ++// send(load()); // start ++// r4 = load(); // start_instance ++// if (r3) { ++ MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000c, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0x0d74/4, IMMED0, NONE, ++ ADD, R3, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, ALU0, ++ ADD, R4, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R3, ZERO, (2<<14)|0x0008, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x238c, 1); // CB_POS ++// send(256 + 160); ++// send(0); // base_vertex ++// send(r4); // start_instance ++// send(r6); // draw_id ++// mthd(0x1438, 0); // VB_INSTANCE_BASE ++// send(r4); ++// r1 &= ~(1 << 26); // clear INSTANCE_NEXT ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x238c/4, IMMED0, IMMED1, ++ ADD, ZERO, ZERO, ZERO, 256+160, NONE, ALU0), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, R6, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, 0x1438/4, IMMED0, ALU0, ++ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), ++// do { ++// mthd(0x1618, 0); // VERTEX_BEGIN_GL ++// send(r1); // mode ++// mthd(0x0d78, 0); // VERTEX_BUFFER_COUNT ++// send(r2); ++// mthd(0x1614, 0); // VERTEX_END_GL ++// send(0); ++// r1 |= (1 << 26); // set INSTANCE_NEXT ++// } while (--r3); ++// } ++ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, ++ ADD, ZERO, R2, ZERO, 0x0d78/4, IMMED1, ALU1), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, ++ ADD, R4, IMMED, ZERO, 1, NONE, NONE), ++ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, ++ SUB, R3, R3, IMMED, 1, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x3ffd, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = r6 + 1; // draw_id++ ++// } ++ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r7 = r7 - r5; // unneeded params ++// } ++ MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// while (HW_LOOP_COUNT < r7) { ++// load(); ++// load(); ++// load(); ++// load(); ++// } ++ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0003, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), ++// exit mthd(0x1438, 0); // VB_INSTANCE_BASE ++// send(r8); ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0x1438/4, IMMED0, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R8, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_draw_elts_indirect_count[] = { ++// r8 = read(0x1434); ++// r1 = load(); ++// r9 = read(0x1438); ++// r6 = load(); ++// r7 = load(); ++// r5 = load(); ++// r5 = r5 - r6; ++// if (r5 > r7) ++ MME_INSN(0, STATE, R8, IMMED, ZERO, 0x1434/4, NONE, NONE, ++ ADD, R1, LOAD0, ZERO, 0, NONE, NONE), ++ MME_INSN(0, STATE, R9, IMMED, ZERO, 0x1438/4, NONE, NONE, ++ ADD, R6, LOAD0, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R7, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R5, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, SUB, R5, R5, R6, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BLE, ZERO, R5, R7, (2<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r5 = r7; ++ MME_INSN(0, ADD, R5, R7, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// if (r5 >= 0) { ++ MME_INSN(0, BLT, ZERO, R5, ZERO, (2<<14)|0x000f, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// while (HW_LOOP_COUNT < r5) { ++// r3 = load(); ++// r2 = load(); ++// mthd(0x17dc, 0); ++// send(load()); ++// r4 = load(); ++// mthd(0x238c, 1); ++// send(256 + 160); ++// send(r4); ++// r10 = load(); ++// if (r2) { ++ MME_INSN(0, LOOP, ZERO, R5, ZERO, 0x000d, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R3, LOAD0, ZERO, (0<<12)|0x17dc/4, IMMED0, NONE, ++ ADD, R2, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, (1<<12)|0x238c/4, NONE, ALU0, ++ ADD, R4, LOAD1, ZERO, 256 + 160, IMMED0, IMMED1), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, 0, NONE, ALU0, ++ ADD, R10, LOAD0, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R2, ZERO, (2<<14)|0x0008, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// send(r10); ++// send(r6); ++// mthd(0x1434, 1); ++// send(r4); ++// send(r10); ++// mthd(0x1118, 0); ++// send(r4); ++// r1 &= ~(1 << 26); ++ MME_INSN(0, ADD, ZERO, R10, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, R6, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, ++ ADD, ZERO, R10, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R4, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0, ++ MERGE, R1, R1, ZERO, (26<<10)|(1<<5)|0, NONE, NONE), ++// do { ++// mthd(0x1618, 0); ++// send(r1); ++// mthd(0x17e0, 0); ++// send(r3); ++// mthd(0x1614, 0); ++// send(0); ++// r1 |= (1 << 26); ++// } while (--r2); ++// } ++ MME_INSN(0, ADD, ZERO, R1, ZERO, 0x1618/4, IMMED0, ALU0, ++ ADD, ZERO, R3, ZERO, 0x17e0/4, IMMED1, ALU1), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1614/4, IMMED0, ALU0, ++ ADD, R4, IMMED, ZERO, 1, NONE, NONE), ++ MME_INSN(0, MERGE, R1, R1, R4, (26<<10)|(1<<5)|0, NONE, NONE, ++ SUB, R2, R2, IMMED, 1, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R2, ZERO, (1<<14)|0x3ffd, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r6 = r6 + 1; ++// } ++ MME_INSN(0, ADD, R6, R6, IMMED, 1, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// r7 = r7 - r5; // unneeded params ++// } ++ MME_INSN(0, SUB, R7, R7, R5, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// while (HW_LOOP_COUNT < r7) { ++// r2 = load(); ++// r2 = load(); ++// r2 = load(); ++// r2 = load(); ++// r2 = load(); ++// } ++ MME_INSN(0, LOOP, ZERO, R7, ZERO, 0x0004, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, ZERO, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x1434, 1); ++// send(r8); ++// send(r9); ++// exit mthd(0x1118, 0); ++// send(r8); ++ MME_INSN(1, ADD, ZERO, R8, ZERO, (1<<12)|0x1434/4, IMMED0, ALU0, ++ ADD, ZERO, R9, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R8, ZERO, (0<<12)|0x1118/4, IMMED0, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_query_buffer_write[] = { ++// r1 = load(); // clamp value ++// r2 = load(); // end value (lo) ++// r3 = load(); // end value (hi) ++// r4 = load(); // start value (lo) ++// r5 = load(); // start value (hi) ++// r8 = load(); // desired sequence ++// r9 = load(); // actual sequence ++// r7 = load(); // query address (hi) ++// r6 = load(); // query address (lo) ++// if (r9 >= r8) { ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R2, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R4, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R5, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R8, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R9, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R7, LOAD1, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R6, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BLT, ZERO, R9, R8, (2<<14)|0x000e, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// [r3,r2] = [r3,r2] - [r5,r4]; ++// if (r1) { ++ MME_INSN(0, SUB, R2, R2, R4, 0, NONE, NONE, ++ SUBB, R3, R3, R5, 0, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R1, ZERO, (2<<14)|0x0004, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// if (r3 != 0 || r1 < r2) ++// r2 = r1; ++// } ++ MME_INSN(0, BEQ, ZERO, R3, ZERO, (1<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, BLTU, ZERO, R1, R2, (1<<14)|0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R2, R1, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x1b00, 1); ++// send(r7); ++// send(r6); ++// send(r2) ++// send(0x10000000); ++// if (!r1) { ++ MME_INSN(0, ADD, ZERO, R7, ZERO, (1<<12)|0x1b00/4, IMMED0, ALU0, ++ ADD, ZERO, R6, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, ++ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), ++ MME_INSN(0, BEQ, ZERO, R1, ZERO, (1<<14)|0x0004, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// [r7,r6] = [r7,r6] + 4; ++// mthd(0x1b00, 1); ++// send(r7); ++// send(r6); ++// send(r3); ++// send(0x10000000); ++// } ++ MME_INSN(0, ADD, ZERO, R6, IMMED, 4, IMMED1, ALU1, ++ ADDC, ZERO, R7, ZERO, (1<<12)|0x1b00/4, NONE, ALU0), ++ MME_INSN(0, ADD, ZERO, R3, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, ++ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), ++// mthd(0x0110, 0); ++// send(0); ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (0<<12)|0x0110/4, IMMED0, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// } ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_conservative_raster_state[] = { ++// r1 = load(); ++// mthd(0x3400, 1); ++// send(0); ++// send(((r1 >> 8) & 7) << 23); ++// send(0x03800000); ++// mthd(0x2310, 1); ++// send(0x00418800); ++// r2 = r1 & 0xf; ++// r3 = 16; ++// r2 = r2 | (((r1 >> 4) & 0xf) << 8); ++// mthd(0x0a1c, 8); ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, (1<<12)|0x3400/4, IMMED0, IMMED1, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, MERGE, ZERO, ZERO, R1, (23<<10)|(3<<5)|8, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0380, NONE, IMMED32_0, ++ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, (1<<12)|0x2310/4, IMMED0, NONE, ++ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x0041, NONE, IMMED32_0, ++ ADD, ZERO, ZERO, ZERO, 0x8800, NONE, NONE), ++ MME_INSN(0, AND, R2, R1, IMMED, 0xf, NONE, NONE, ++ ADD, R3, ZERO, IMMED, 16, NONE, NONE), ++ MME_INSN(0, MERGE, R2, R2, R1, (8<<10)|(4<<5)|4, IMMED1, NONE, ++ ADD, ZERO, ZERO, ZERO, (8<<12)|0x0a1c/4, NONE, NONE), ++// while (HW_LOOP_COUNT < r3) ++// send(r2); ++ MME_INSN(0, LOOP, ZERO, R3, ZERO, 0x0002, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++// mthd(0x1148, 0); ++// send(1); ++ MME_INSN(1, ADD, ZERO, ZERO, ZERO, (0<<12)|0x1148/4, IMMED0, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 1, NONE, IMMED1, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++}; ++ ++uint32_t mmec597_compute_counter[] = { ++// r0 = load(); ++// r1 = 1; ++// r2 = 0; ++// while (HW_LOOP_COUNT < r2) { ++ MME_INSN(0, ADD, R0, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, R1, IMMED, ZERO, 1, NONE, NONE), ++ MME_INSN(0, LOOP, ZERO, R0, ZERO, 0x0003, NONE, NONE, ++ ADD, R2, ZERO, ZERO, 0, NONE, NONE), ++// r3 = load(); ++// [r1,r0] *= r3; ++// } ++ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, MULU, R1, R1, R3, 0, NONE, NONE, ++ MULH, R2, ZERO, ZERO, 0, NONE, NONE), ++// r3 = read(0x3410); ++// r4 = read(0x3414); ++// [r4,r3] += [r2,r1]; ++// mthd(0x3410, 1); ++// send(r3); ++// send(r4); ++ MME_INSN(0, STATE, ZERO, ZERO, ZERO, 0x3410/4, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(1, STATE, ZERO, ZERO, ZERO, 0x3414/4, NONE, NONE, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, R3, R3, R1, (1<<12)|0x3410/4, IMMED0, ALU0, ++ ADDC, R4, R4, R2, 0, NONE, ALU1), ++}; ++ ++uint32_t mmec597_compute_counter_to_query[] = { ++// r1 = load(); ++// r3 = read(0x3410); ++// r2 = load(); ++// r4 = read(0x3414); ++// [r2,r1] = [r2,r1] + [r4,r3]; ++// mthd(0x1b00, 1); ++// r3 = load(); ++// send(r3); ++// r4 = load(); ++// send(r4); ++// send(r1); ++// send(0x10000000); ++ MME_INSN(0, ADD, R1, LOAD0, ZERO, 0, NONE, NONE, ++ STATE, R3, IMMED, ZERO, 0x3410/4, NONE, NONE), ++ MME_INSN(0, ADD, R2, LOAD0, ZERO, 0, NONE, NONE, ++ STATE, R4, IMMED, ZERO, 0x3414/4, NONE, NONE), ++ MME_INSN(0, ADD, R1, R1, R3, (1<<12)|0x1b00/4, IMMED0, NONE, ++ ADDC, R2, R2, R4, 0, NONE, NONE), ++ MME_INSN(0, ADD, R3, LOAD0, ZERO, 0, NONE, ALU0, ++ ADD, R4, LOAD1, ZERO, 0, NONE, ALU1), ++ MME_INSN(0, ADD, ZERO, R1, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, ++ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), ++// [r3,r4] = [r3,r4] + 4; ++// mthd(0x1b00, 1); ++// send(r3); ++// send(r4); ++// send(r2); ++// send(0x10000000); ++ MME_INSN(0, ADD, ZERO, R4, IMMED, 4, IMMED1, ALU1, ++ ADDC, ZERO, R3, ZERO, (1<<12)|0x1b00/4, NONE, ALU0), ++ MME_INSN(1, ADD, ZERO, R2, ZERO, 0, NONE, ALU0, ++ ADD, ZERO, ZERO, ZERO, 0, NONE, NONE), ++ MME_INSN(0, ADD, ZERO, ZERO, ZERO, 0x1000, NONE, IMMED32_0, ++ ADD, ZERO, ZERO, ZERO, 0x0000, NONE, NONE), ++}; +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h +index 221bab3105b..539bdc75022 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h +@@ -157,6 +157,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define NVC0_3D_UNK0220__ESIZE 0x00000004 + #define NVC0_3D_UNK0220__LEN 0x00000028 + ++#define TU102_3D_INDEX_ARRAY_LIMIT_HIGH 0x00000238 ++ ++#define TU102_3D_INDEX_ARRAY_LIMIT_LOW 0x0000023c ++ ++#define TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE 0x000002b8 ++ + #define NVC0_3D_UNK02C0 0x000002c0 + + #define NVC0_3D_UNK02C4 0x000002c4 +@@ -278,6 +284,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define NVC0_3D_UNK0400__ESIZE 0x00000004 + #define NVC0_3D_UNK0400__LEN 0x000000c0 + ++#define TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00000600 + 0x8*(i0)) ++#define TU102_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00000604 + 0x8*(i0)) ++ + #define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0)) + #define NVC0_3D_TFB_STREAM__ESIZE 0x00000010 + #define NVC0_3D_TFB_STREAM__LEN 0x00000004 +@@ -1787,6 +1796,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + #define NVC0_3D_SP_UNK14__ESIZE 0x00000004 + #define NVC0_3D_SP_UNK14__LEN 0x00000004 + ++#define GV100_3D_SP_ADDRESS_HIGH(i0) (0x00002014 + 0x40*(i0)) ++#define GV100_3D_SP_ADDRESS_LOW(i0) (0x00002018 + 0x40*(i0)) ++ + #define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0)) + #define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010 + #define NVC0_3D_TEX_LIMITS__LEN 0x00000005 +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +index c897e4e8b97..69131fa22d3 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +@@ -37,6 +37,55 @@ nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d) + return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d); + } + ++static uint32_t ++tu102_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed) ++{ ++ uint32_t kind; ++ ++ if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR)) ++ return 0; ++ if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR)) ++ return 0; ++ ++ switch (mt->base.base.format) { ++ case PIPE_FORMAT_Z16_UNORM: ++ if (compressed) ++ kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC ++ else ++ kind = 0x01; // NV_MMU_PTE_KIND_Z16 ++ break; ++ case PIPE_FORMAT_X8Z24_UNORM: ++ case PIPE_FORMAT_S8X24_UINT: ++ case PIPE_FORMAT_S8_UINT_Z24_UNORM: ++ if (compressed) ++ kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC ++ else ++ kind = 0x05; // NV_MMU_PTE_KIND_Z24S8 ++ break; ++ case PIPE_FORMAT_X24S8_UINT: ++ case PIPE_FORMAT_Z24X8_UNORM: ++ case PIPE_FORMAT_Z24_UNORM_S8_UINT: ++ if (compressed) ++ kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC ++ else ++ kind = 0x03; // NV_MMU_PTE_KIND_S8Z24 ++ break; ++ case PIPE_FORMAT_X32_S8X24_UINT: ++ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: ++ if (compressed) ++ kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC ++ else ++ kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S8 ++ break; ++ case PIPE_FORMAT_Z32_FLOAT: ++ default: ++ kind = 0x06; ++ break; ++ } ++ ++ return kind; ++} ++ + static uint32_t + nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed) + { +@@ -357,7 +406,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen, + if (pt->bind & PIPE_BIND_LINEAR) + pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR; + +- bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed); ++ if (dev->chipset < 0x160) ++ bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed); ++ else ++ bo_config.nvc0.memtype = tu102_mt_choose_storage_type(mt, compressed); + + if (!nvc0_miptree_init_ms_mode(mt)) { + FREE(mt); +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +index 32aa82d168c..d2b2de47c8d 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c +@@ -645,7 +645,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset, + prog->code_size = info->bin.codeSize; + prog->relocs = info->bin.relocData; + prog->fixups = info->bin.fixupData; +- prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); ++ if (info->target >= NVISA_GV100_CHIPSET) ++ prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why? ++ else ++ prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); + prog->cp.smem_size = info->bin.smemSize; + prog->num_barriers = info->numBarriers; + +@@ -734,7 +737,14 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog) + struct nvc0_screen *screen = nvc0->screen; + const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; + int ret; +- uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); ++ uint32_t size = prog->code_size; ++ ++ if (!is_cp) { ++ if (screen->eng3d->oclass < TU102_3D_CLASS) ++ size += GF100_SHADER_HEADER_SIZE; ++ else ++ size += TU102_SHADER_HEADER_SIZE; ++ } + + /* On Fermi, SP_START_ID must be aligned to 0x40. + * On Kepler, the first instruction must be aligned to 0x80 because +@@ -750,7 +760,8 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog) + prog->code_base = prog->mem->start; + + if (!is_cp) { +- if (screen->base.class_3d >= NVE4_3D_CLASS) { ++ if (screen->base.class_3d >= NVE4_3D_CLASS && ++ screen->base.class_3d < TU102_3D_CLASS) { + switch (prog->mem->start & 0xff) { + case 0x40: prog->code_base += 0x70; break; + case 0x80: prog->code_base += 0x30; break; +@@ -777,7 +788,16 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) + { + struct nvc0_screen *screen = nvc0->screen; + const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; +- uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); ++ uint32_t code_pos = prog->code_base; ++ uint32_t size_sph = 0; ++ ++ if (!is_cp) { ++ if (screen->eng3d->oclass < TU102_3D_CLASS) ++ size_sph = GF100_SHADER_HEADER_SIZE; ++ else ++ size_sph = TU102_SHADER_HEADER_SIZE; ++ } ++ code_pos += size_sph; + + if (prog->relocs) + nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, +@@ -803,8 +823,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) + + if (!is_cp) + nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, +- NV_VRAM_DOMAIN(&screen->base), +- NVC0_SHADER_HEADER_SIZE, prog->hdr); ++ NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr); + + nvc0->base.push_data(&nvc0->base, screen->text, code_pos, + NV_VRAM_DOMAIN(&screen->base), prog->code_size, +@@ -817,7 +836,14 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog) + struct nvc0_screen *screen = nvc0->screen; + const bool is_cp = prog->type == PIPE_SHADER_COMPUTE; + int ret; +- uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); ++ uint32_t size = prog->code_size; ++ ++ if (!is_cp) { ++ if (screen->eng3d->oclass < TU102_3D_CLASS) ++ size += GF100_SHADER_HEADER_SIZE; ++ else ++ size += TU102_SHADER_HEADER_SIZE; ++ } + + ret = nvc0_program_alloc_code(nvc0, prog); + if (ret) { +@@ -874,8 +900,7 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog) + BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(FLUSH), 1); + PUSH_DATA (nvc0->base.pushbuf, NVC0_COMPUTE_FLUSH_CODE); + } else { +- BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(SP_START_ID(i)), 1); +- PUSH_DATA (nvc0->base.pushbuf, progs[i]->code_base); ++ nvc0_program_sp_start_id(nvc0, i, progs[i]); + } + } + } +@@ -953,7 +978,7 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) + unsigned base = 0; + unsigned i; + if (prog->type != PIPE_SHADER_COMPUTE) +- base = NVC0_SHADER_HEADER_SIZE; ++ base = GF100_SHADER_HEADER_SIZE; + for (i = 0; i < prog->cp.num_syms; ++i) + if (syms[i].label == label) + return prog->code_base + base + syms[i].offset; +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +index 5684207aa54..2c465b342e9 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h +@@ -15,7 +15,9 @@ struct nvc0_transform_feedback_state { + }; + + +-#define NVC0_SHADER_HEADER_SIZE (20 * 4) ++#define GF100_SHADER_HEADER_SIZE (20 * 4) ++#define TU102_SHADER_HEADER_SIZE (32 * 4) ++#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE + + struct nvc0_program { + struct pipe_shader_state pipe; +@@ -30,7 +32,7 @@ struct nvc0_program { + unsigned code_size; + unsigned parm_size; /* size of non-bindable uniforms (c0[]) */ + +- uint32_t hdr[20]; ++ uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4]; + uint32_t flags[2]; + + struct { +@@ -72,4 +74,6 @@ struct nvc0_program { + struct nouveau_heap *mem; + }; + ++void ++nvc0_program_sp_start_id(struct nvc0_context *, int, struct nvc0_program *); + #endif +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +index 7abbf762af2..07d74ddd50c 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c +@@ -27,15 +27,17 @@ + #include "util/format/u_format_s3tc.h" + #include "util/u_screen.h" + #include "pipe/p_screen.h" +-#include "compiler/nir/nir.h" + + #include "nouveau_vp3_video.h" + ++#include "codegen/nv50_ir_driver.h" ++ + #include "nvc0/nvc0_context.h" + #include "nvc0/nvc0_screen.h" + + #include "nvc0/mme/com9097.mme.h" + #include "nvc0/mme/com90c0.mme.h" ++#include "nvc0/mme/comc597.mme.h" + + #include "nv50/g80_texture.xml.h" + +@@ -443,8 +445,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, + case PIPE_SHADER_CAP_PREFERRED_IR: + return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI; + case PIPE_SHADER_CAP_SUPPORTED_IRS: { +- uint32_t irs = 1 << PIPE_SHADER_IR_TGSI | +- 1 << PIPE_SHADER_IR_NIR; ++ uint32_t irs = 1 << PIPE_SHADER_IR_NIR | ++ ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI); + if (screen->force_enable_cl) + irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED; + return irs; +@@ -467,6 +469,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, + case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR: + return shader != PIPE_SHADER_FRAGMENT; + case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR: ++ /* HW doesn't support indirect addressing of fragment program inputs ++ * on Volta. The binary driver generates a function to handle every ++ * possible indirection, and indirectly calls the function to handle ++ * this instead. ++ */ ++ if (class_3d >= GV100_3D_CLASS) ++ return shader != PIPE_SHADER_FRAGMENT; ++ return 1; + case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR: + case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR: + return 1; +@@ -717,6 +727,26 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, + return pos + size; + } + ++static int ++tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos, ++ unsigned size, const uint32_t *data) ++{ ++ struct nouveau_pushbuf *push = screen->base.pushbuf; ++ ++ size /= 4; ++ ++ assert((pos + size) <= 0x800); ++ ++ BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2); ++ PUSH_DATA (push, (m - 0x3800) / 8); ++ PUSH_DATA (push, pos); ++ BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1); ++ PUSH_DATA (push, pos); ++ PUSH_DATAp(push, data, size); ++ ++ return pos + (size / 3); ++} ++ + static void + nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) + { +@@ -728,8 +758,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) + BEGIN_NVC0(push, SUBC_3D(0x10ec), 2); + PUSH_DATA (push, 0xff); + PUSH_DATA (push, 0xff); +- BEGIN_NVC0(push, SUBC_3D(0x074c), 1); +- PUSH_DATA (push, 0x3f); ++ if (obj_class < GV100_3D_CLASS) { ++ BEGIN_NVC0(push, SUBC_3D(0x074c), 1); ++ PUSH_DATA (push, 0x3f); ++ } + + BEGIN_NVC0(push, SUBC_3D(0x16a8), 1); + PUSH_DATA (push, (3 << 16) | 3); +@@ -761,8 +793,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class) + BEGIN_NVC0(push, SUBC_3D(0x0300), 1); + PUSH_DATA (push, 3); + +- BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); +- PUSH_DATA (push, 0x3fffff); ++ if (obj_class < GV100_3D_CLASS) { ++ BEGIN_NVC0(push, SUBC_3D(0x02d0), 1); ++ PUSH_DATA (push, 0x3fffff); ++ } + BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1); + PUSH_DATA (push, 1); + BEGIN_NVC0(push, SUBC_3D(0x19c0), 1); +@@ -822,6 +856,8 @@ nvc0_screen_init_compute(struct nvc0_screen *screen) + case 0x110: + case 0x120: + case 0x130: ++ case 0x140: ++ case 0x160: + return nve4_screen_compute_setup(screen, screen->base.pushbuf); + default: + return -1; +@@ -893,13 +929,15 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size) + nouveau_heap_init(&screen->text_heap, 0, size - 0x100); + + /* update the code segment setup */ +- BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); +- PUSH_DATAh(push, screen->text->offset); +- PUSH_DATA (push, screen->text->offset); +- if (screen->compute) { +- BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); ++ if (screen->eng3d->oclass < GV100_3D_CLASS) { ++ BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2); + PUSH_DATAh(push, screen->text->offset); + PUSH_DATA (push, screen->text->offset); ++ if (screen->compute) { ++ BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2); ++ PUSH_DATAh(push, screen->text->offset); ++ PUSH_DATA (push, screen->text->offset); ++ } + } + + return 0; +@@ -939,74 +977,14 @@ nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize, + IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0)); + } + +-static const nir_shader_compiler_options nir_options = { +- .lower_fdiv = false, +- .lower_ffma = false, +- .fuse_ffma = false, /* nir doesn't track mad vs fma */ +- .lower_flrp32 = true, +- .lower_flrp64 = true, +- .lower_fpow = false, +- .lower_fsat = false, +- .lower_fsqrt = false, // TODO: only before gm200 +- .lower_fmod = true, +- .lower_bitfield_extract = false, +- .lower_bitfield_extract_to_shifts = false, +- .lower_bitfield_insert = false, +- .lower_bitfield_insert_to_shifts = false, +- .lower_bitfield_reverse = false, +- .lower_bit_count = false, +- .lower_ifind_msb = false, +- .lower_find_lsb = false, +- .lower_uadd_carry = true, // TODO +- .lower_usub_borrow = true, // TODO +- .lower_mul_high = false, +- .lower_negate = false, +- .lower_sub = true, +- .lower_scmp = true, // TODO: not implemented yet +- .lower_idiv = true, +- .lower_isign = false, // TODO +- .fdot_replicates = false, // TODO +- .lower_ffloor = false, // TODO +- .lower_ffract = true, +- .lower_fceil = false, // TODO +- .lower_ldexp = true, +- .lower_pack_half_2x16 = true, +- .lower_pack_unorm_2x16 = true, +- .lower_pack_snorm_2x16 = true, +- .lower_pack_unorm_4x8 = true, +- .lower_pack_snorm_4x8 = true, +- .lower_unpack_half_2x16 = true, +- .lower_unpack_unorm_2x16 = true, +- .lower_unpack_snorm_2x16 = true, +- .lower_unpack_unorm_4x8 = true, +- .lower_unpack_snorm_4x8 = true, +- .lower_extract_byte = true, +- .lower_extract_word = true, +- .lower_all_io_to_temps = false, +- .vertex_id_zero_based = false, +- .lower_base_vertex = false, +- .lower_helper_invocation = false, +- .lower_cs_local_index_from_id = true, +- .lower_cs_local_id_from_index = false, +- .lower_device_index_to_zero = false, // TODO +- .lower_wpos_pntc = false, // TODO +- .lower_hadd = true, // TODO +- .lower_add_sat = true, // TODO +- .use_interpolated_input_intrinsics = true, +- .lower_mul_2x32_64 = true, // TODO +- .max_unroll_iterations = 32, +- .lower_int64_options = nir_lower_ufind_msb64|nir_lower_divmod64, // TODO +- .lower_doubles_options = nir_lower_dmod, // TODO +- .lower_to_scalar = true, +-}; +- + static const void * + nvc0_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, + enum pipe_shader_type shader) + { ++ struct nvc0_screen *screen = nvc0_screen(pscreen); + if (ir == PIPE_SHADER_IR_NIR) +- return &nir_options; ++ return nv50_ir_nir_shader_compiler_options(screen->base.device->chipset); + return NULL; + } + +@@ -1038,6 +1016,8 @@ nvc0_screen_create(struct nouveau_device *dev) + case 0x110: + case 0x120: + case 0x130: ++ case 0x140: ++ case 0x160: + break; + default: + return NULL; +@@ -1104,16 +1084,19 @@ nvc0_screen_create(struct nouveau_device *dev) + screen->base.fence.emit = nvc0_screen_fence_emit; + screen->base.fence.update = nvc0_screen_fence_update; + ++ if (dev->chipset < 0x140) { ++ ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, ++ NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw); ++ if (ret) ++ FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret); + +- ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, +- NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw); +- if (ret) +- FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret); +- +- BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); +- PUSH_DATA (push, screen->nvsw->handle); ++ BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1); ++ PUSH_DATA (push, screen->nvsw->handle); ++ } + + switch (dev->chipset & ~0xf) { ++ case 0x160: ++ case 0x140: + case 0x130: + case 0x120: + case 0x110: +@@ -1167,6 +1150,12 @@ nvc0_screen_create(struct nouveau_device *dev) + PUSH_DATA (push, screen->fence.bo->offset + 16); + + switch (dev->chipset & ~0xf) { ++ case 0x160: ++ obj_class = TU102_3D_CLASS; ++ break; ++ case 0x140: ++ obj_class = GV100_3D_CLASS; ++ break; + case 0x130: + switch (dev->chipset) { + case 0x130: +@@ -1414,25 +1403,47 @@ nvc0_screen_create(struct nouveau_device *dev) + PUSH_DATA (push, 16384 << 16); + } + ++ if (screen->eng3d->oclass < TU102_3D_CLASS) { + #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n); + +- i = 0; +- MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf); +- MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables); +- MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select); +- MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select); +- MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select); +- MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front); +- MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back); +- MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect); +- MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); +- MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); +- MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); +- MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); +- MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); +- MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); +- MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); +- MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); ++ i = 0; ++ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf); ++ MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables); ++ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select); ++ MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select); ++ MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select); ++ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front); ++ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back); ++ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect); ++ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect); ++ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count); ++ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count); ++ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write); ++ MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state); ++ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter); ++ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query); ++ MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect); ++ } else { ++#undef MK_MACRO ++#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n); ++ ++ i = 0; ++ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf); ++ MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables); ++ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select); ++ MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select); ++ MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select); ++ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front); ++ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back); ++ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect); ++ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect); ++ MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count); ++ MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count); ++ MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write); ++ MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state); ++ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter); ++ MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query); ++ } + + BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1); + PUSH_DATA (push, 1); +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +index b7e0c8a930f..490026b2c00 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c +@@ -64,6 +64,22 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog) + return true; /* stream output info only */ + } + ++void ++nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage, ++ struct nvc0_program *prog) ++{ ++ struct nouveau_pushbuf *push = nvc0->base.pushbuf; ++ ++ if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) { ++ BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1); ++ PUSH_DATA (push, prog->code_base); ++ } else { ++ BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2); ++ PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base); ++ PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base); ++ } ++} ++ + void + nvc0_vertprog_validate(struct nvc0_context *nvc0) + { +@@ -74,9 +90,9 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0) + return; + nvc0_program_update_context_state(nvc0, vp, 0); + +- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2); ++ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1); + PUSH_DATA (push, 0x11); +- PUSH_DATA (push, vp->code_base); ++ nvc0_program_sp_start_id(nvc0, 1, vp); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1); + PUSH_DATA (push, vp->num_gprs); + +@@ -152,9 +168,9 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0) + fp->fp.post_depth_coverage); + } + +- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2); ++ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1); + PUSH_DATA (push, 0x51); +- PUSH_DATA (push, fp->code_base); ++ nvc0_program_sp_start_id(nvc0, 5, fp); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1); + PUSH_DATA (push, fp->num_gprs); + +@@ -176,9 +192,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) + BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1); + PUSH_DATA (push, tp->tp.tess_mode); + } +- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); ++ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + PUSH_DATA (push, 0x21); +- PUSH_DATA (push, tp->code_base); ++ nvc0_program_sp_start_id(nvc0, 2, tp); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1); + PUSH_DATA (push, tp->num_gprs); + } else { +@@ -186,9 +202,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0) + /* not a whole lot we can do to handle this failure */ + if (!nvc0_program_validate(nvc0, tp)) + assert(!"unable to validate empty tcp"); +- BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2); ++ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1); + PUSH_DATA (push, 0x20); +- PUSH_DATA (push, tp->code_base); ++ nvc0_program_sp_start_id(nvc0, 2, tp); + } + nvc0_program_update_context_state(nvc0, tp, 1); + } +@@ -206,8 +222,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0) + } + BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1); + PUSH_DATA (push, 0x31); +- BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1); +- PUSH_DATA (push, tp->code_base); ++ nvc0_program_sp_start_id(nvc0, 3, tp); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1); + PUSH_DATA (push, tp->num_gprs); + } else { +@@ -227,8 +242,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0) + if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) { + BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1); + PUSH_DATA (push, 0x41); +- BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1); +- PUSH_DATA (push, gp->code_base); ++ nvc0_program_sp_start_id(nvc0, 4, gp); + BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1); + PUSH_DATA (push, gp->num_gprs); + } else { +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +index 538effdb531..731b0b5dbf8 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c +@@ -29,6 +29,8 @@ + #include "util/format/u_format.h" + #include "util/u_surface.h" + ++#include "tgsi/tgsi_ureg.h" ++ + #include "os/os_thread.h" + + #include "nvc0/nvc0_context.h" +@@ -138,6 +140,11 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst, + PUSH_DATA (push, bo->offset + offset); + } + ++ if (dst) { ++ IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE), ++ util_format_is_depth_or_stencil(pformat)); ++ } ++ + #if 0 + if (dst) { + BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4); +@@ -772,7 +779,7 @@ gm200_evaluate_depth_buffer(struct pipe_context *pipe) + struct nvc0_blitter + { + struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES]; +- struct nvc0_program vp; ++ struct nvc0_program *vp; + + struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */ + +@@ -785,6 +792,7 @@ struct nvc0_blitctx + { + struct nvc0_context *nvc0; + struct nvc0_program *fp; ++ struct nvc0_program *vp; + uint8_t mode; + uint16_t color_mask; + uint8_t filter; +@@ -809,78 +817,27 @@ struct nvc0_blitctx + struct nvc0_rasterizer_stateobj rast; + }; + +-static void +-nvc0_blitter_make_vp(struct nvc0_blitter *blit) ++static void * ++nvc0_blitter_make_vp(struct pipe_context *pipe) + { +- static const uint32_t code_nvc0[] = +- { +- 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ +- 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ +- 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ +- 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ +- 0x00001de7, 0x80000000, /* exit */ +- }; +- static const uint32_t code_nve4[] = +- { +- 0x00000007, 0x20000000, /* sched */ +- 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */ +- 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */ +- 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */ +- 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */ +- 0x00001de7, 0x80000000, /* exit */ +- }; +- static const uint32_t code_gk110[] = +- { +- 0x00000000, 0x08000000, /* sched */ +- 0x401ffc12, 0x7ec7fc00, /* ld b64 $r4d a[0x80] 0x0 0x0 */ +- 0x481ffc02, 0x7ecbfc00, /* ld b96 $r0t a[0x90] 0x0 0x0 */ +- 0x381ffc12, 0x7f07fc00, /* st b64 a[0x70] $r4d 0x0 0x0 */ +- 0x401ffc02, 0x7f0bfc00, /* st b96 a[0x80] $r0t 0x0 0x0 */ +- 0x001c003c, 0x18000000, /* exit */ +- }; +- static const uint32_t code_gm107[] = +- { +- 0xe4200701, 0x001d0400, /* sched (st 0x1 wr 0x0) (st 0x1 wr 0x1) (st 0x1 wr 0x2) */ +- 0x0807ff00, 0xefd87f80, /* ld b32 $r0 a[0x80] 0x0 */ +- 0x0847ff01, 0xefd87f80, /* ld b32 $r1 a[0x84] 0x0 */ +- 0x0907ff02, 0xefd87f80, /* ld b32 $r2 a[0x90] 0x0 */ +- 0xf0200761, 0x003f8400, /* sched (st 0x1 wr 0x3) (st 0x1 wr 0x4) (st 0x1 wt 0x1) */ +- 0x0947ff03, 0xefd87f80, /* ld b32 $r3 a[0x94] 0x0 */ +- 0x0987ff04, 0xefd87f80, /* ld b32 $r4 a[0x98] 0x0 */ +- 0x0707ff00, 0xeff07f80, /* st b32 a[0x70] $r0 0x0 */ +- 0xfc2017e1, 0x011f8404, /* sched (st 0x1 wt 0x2) (st 0x1 wt 0x4) (st 0x1 wt 0x8) */ +- 0x0747ff01, 0xeff07f80, /* st b32 a[0x74] $r1 0x0 */ +- 0x0807ff02, 0xeff07f80, /* st b32 a[0x80] $r2 0x0 */ +- 0x0847ff03, 0xeff07f80, /* st b32 a[0x84] $r3 0x0 */ +- 0xfde087e1, 0x001f8000, /* sched (st 0x1 wt 0x10) (st 0xf) (st 0x0) */ +- 0x0887ff04, 0xeff07f80, /* st b32 a[0x88] $r4 0x0 */ +- 0x0007000f, 0xe3000000, /* exit */ +- }; +- +- blit->vp.type = PIPE_SHADER_VERTEX; +- blit->vp.translated = true; +- if (blit->screen->base.class_3d >= GM107_3D_CLASS) { +- blit->vp.code = (uint32_t *)code_gm107; /* const_cast */ +- blit->vp.code_size = sizeof(code_gm107); +- } else +- if (blit->screen->base.class_3d >= NVF0_3D_CLASS) { +- blit->vp.code = (uint32_t *)code_gk110; /* const_cast */ +- blit->vp.code_size = sizeof(code_gk110); +- } else +- if (blit->screen->base.class_3d >= NVE4_3D_CLASS) { +- blit->vp.code = (uint32_t *)code_nve4; /* const_cast */ +- blit->vp.code_size = sizeof(code_nve4); +- } else { +- blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */ +- blit->vp.code_size = sizeof(code_nvc0); +- } +- blit->vp.num_gprs = 6; +- blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS; ++ struct ureg_program *ureg; ++ struct ureg_src ipos, itex; ++ struct ureg_dst opos, otex; ++ ++ ureg = ureg_create(PIPE_SHADER_VERTEX); ++ if (!ureg) ++ return NULL; ++ ++ opos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); ++ ipos = ureg_DECL_vs_input(ureg, 0); ++ otex = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0); ++ itex = ureg_DECL_vs_input(ureg, 1); ++ ++ ureg_MOV(ureg, ureg_writemask(opos, TGSI_WRITEMASK_XY ), ipos); ++ ureg_MOV(ureg, ureg_writemask(otex, TGSI_WRITEMASK_XYZ), itex); ++ ureg_END(ureg); + +- blit->vp.hdr[0] = 0x00020461; /* vertprog magic */ +- blit->vp.hdr[4] = 0x000ff000; /* no outputs read */ +- blit->vp.hdr[6] = 0x00000073; /* a[0x80].xy, a[0x90].xyz */ +- blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */ ++ return ureg_create_shader_and_destroy(ureg, pipe); + } + + static void +@@ -910,6 +867,20 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit) + G80_TSC_1_MIP_FILTER_NONE; + } + ++static void ++nvc0_blit_select_vp(struct nvc0_blitctx *ctx) ++{ ++ struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter; ++ ++ if (!blitter->vp) { ++ mtx_lock(&blitter->mutex); ++ if (!blitter->vp) ++ blitter->vp = nvc0_blitter_make_vp(&ctx->nvc0->base.pipe); ++ mtx_unlock(&blitter->mutex); ++ } ++ ctx->vp = blitter->vp; ++} ++ + static void + nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info) + { +@@ -1082,7 +1053,7 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx, + + nvc0->rast = &ctx->rast; + +- nvc0->vertprog = &blitter->vp; ++ nvc0->vertprog = ctx->vp; + nvc0->tctlprog = NULL; + nvc0->tevlprog = NULL; + nvc0->gmtyprog = NULL; +@@ -1221,6 +1192,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + blit->filter = nv50_blit_get_filter(info); + blit->render_condition_enable = info->render_condition_enable; + ++ nvc0_blit_select_vp(blit); + nvc0_blit_select_fp(blit, info); + nvc0_blitctx_pre_blit(blit, info); + +@@ -1266,6 +1238,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + } + } + ++ if (screen->eng3d->oclass >= TU102_3D_CLASS) { ++ IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), ++ util_format_is_depth_or_stencil(info->dst.format)); ++ } ++ + IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0); + IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 | + NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1); +@@ -1326,7 +1303,10 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + PUSH_DATAh(push, vtxbuf); + PUSH_DATA (push, vtxbuf); + PUSH_DATA (push, 0); +- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); ++ if (screen->eng3d->oclass < TU102_3D_CLASS) ++ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); ++ else ++ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + PUSH_DATAh(push, vtxbuf + length - 1); + PUSH_DATA (push, vtxbuf + length - 1); + +@@ -1403,6 +1383,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info) + + /* restore viewport transform */ + IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1); ++ if (screen->eng3d->oclass >= TU102_3D_CLASS) ++ IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0); + } + + static void +@@ -1697,7 +1679,6 @@ nvc0_blitter_create(struct nvc0_screen *screen) + + (void) mtx_init(&screen->blitter->mutex, mtx_plain); + +- nvc0_blitter_make_vp(screen->blitter); + nvc0_blitter_make_sampler(screen->blitter); + + return true; +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +index 92bd7eb5b8e..8287d8431b1 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c +@@ -360,7 +360,11 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0) + PUSH_DATAh(push, res->address + offset); + PUSH_DATA (push, res->address + offset); + } +- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); ++ ++ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) ++ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2); ++ else ++ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i)), 2); + PUSH_DATAh(push, res->address + limit); + PUSH_DATA (push, res->address + limit); + +@@ -406,7 +410,11 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0) + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride); + PUSH_DATAh(push, buf->address + offset); + PUSH_DATA (push, buf->address + offset); +- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); ++ ++ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) ++ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2); ++ else ++ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(b)), 2); + PUSH_DATAh(push, buf->address + limit); + PUSH_DATA (push, buf->address + limit); + +@@ -961,12 +969,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) + assert(nouveau_resource_mapped_by_gpu(&buf->base)); + + PUSH_SPACE(push, 6); +- BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5); +- PUSH_DATAh(push, buf->address); +- PUSH_DATA (push, buf->address); +- PUSH_DATAh(push, buf->address + buf->base.width0 - 1); +- PUSH_DATA (push, buf->address + buf->base.width0 - 1); +- PUSH_DATA (push, info->index_size >> 1); ++ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) { ++ BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5); ++ PUSH_DATAh(push, buf->address); ++ PUSH_DATA (push, buf->address); ++ PUSH_DATAh(push, buf->address + buf->base.width0 - 1); ++ PUSH_DATA (push, buf->address + buf->base.width0 - 1); ++ PUSH_DATA (push, info->index_size >> 1); ++ } else { ++ BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 2); ++ PUSH_DATAh(push, buf->address); ++ PUSH_DATA (push, buf->address); ++ BEGIN_NVC0(push, SUBC_3D(TU102_3D_INDEX_ARRAY_LIMIT_HIGH), 2); ++ PUSH_DATAh(push, buf->address + buf->base.width0 - 1); ++ PUSH_DATA (push, buf->address + buf->base.width0 - 1); ++ BEGIN_NVC0(push, NVC0_3D(INDEX_FORMAT), 1); ++ PUSH_DATA (push, info->index_size >> 1); ++ } + + BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD); + } +diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +index 8aa7088dfec..d49a5dfd2cf 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c ++++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c +@@ -228,7 +228,11 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count) + BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2); + PUSH_DATAh(push, va); + PUSH_DATA (push, va); +- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); ++ ++ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) ++ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2); ++ else ++ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2); + PUSH_DATAh(push, va + size - 1); + PUSH_DATA (push, va + size - 1); + +@@ -771,7 +775,11 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx, + PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size); + PUSH_DATAh(push, va); + PUSH_DATA (push, va); +- BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); ++ ++ if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) ++ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2); ++ else ++ BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(1)), 2); + PUSH_DATAh(push, va + info->count * index_size - 1); + PUSH_DATA (push, va + info->count * index_size - 1); + +diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +index 146eeb35f85..ebbc410184b 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c ++++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +@@ -27,11 +27,18 @@ + + #include "codegen/nv50_ir_driver.h" + +-#ifndef NDEBUG +-static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *); +-static void gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *); +-#endif +- ++#include "drf.h" ++#include "qmd.h" ++#include "cla0c0qmd.h" ++#include "clc0c0qmd.h" ++#include "clc3c0qmd.h" ++ ++#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a) ++#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a) ++#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a) ++#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a) ++#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a) ++#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a) + + int + nve4_screen_compute_setup(struct nvc0_screen *screen, +@@ -45,6 +52,12 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, + uint64_t address; + + switch (dev->chipset & ~0xf) { ++ case 0x160: ++ obj_class = TU102_COMPUTE_CLASS; ++ break; ++ case 0x140: ++ obj_class = GV100_COMPUTE_CLASS; ++ break; + case 0x100: + case 0xf0: + obj_class = NVF0_COMPUTE_CLASS; /* GK110 */ +@@ -88,24 +101,35 @@ nve4_screen_compute_setup(struct nvc0_screen *screen, + PUSH_DATAh(push, screen->tls->size / screen->mp_count); + PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); + PUSH_DATA (push, 0xff); +- BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); +- PUSH_DATAh(push, screen->tls->size / screen->mp_count); +- PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); +- PUSH_DATA (push, 0xff); ++ if (obj_class < GV100_COMPUTE_CLASS) { ++ BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3); ++ PUSH_DATAh(push, screen->tls->size / screen->mp_count); ++ PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff); ++ PUSH_DATA (push, 0xff); ++ } + + /* Unified address space ? Who needs that ? Certainly not OpenCL. + * + * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be + * accessible. We cannot prevent that at the moment, so expect failure. + */ +- BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); +- PUSH_DATA (push, 0xff << 24); +- BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); +- PUSH_DATA (push, 0xfe << 24); +- +- BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); +- PUSH_DATAh(push, screen->text->offset); +- PUSH_DATA (push, screen->text->offset); ++ if (obj_class < GV100_COMPUTE_CLASS) { ++ BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1); ++ PUSH_DATA (push, 0xff << 24); ++ BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1); ++ PUSH_DATA (push, 0xfe << 24); ++ ++ BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2); ++ PUSH_DATAh(push, screen->text->offset); ++ PUSH_DATA (push, screen->text->offset); ++ } else { ++ BEGIN_NVC0(push, SUBC_CP(0x2a0), 2); ++ PUSH_DATAh(push, 0xfeULL << 24); ++ PUSH_DATA (push, 0xfeULL << 24); ++ BEGIN_NVC0(push, SUBC_CP(0x7b0), 2); ++ PUSH_DATAh(push, 0xffULL << 24); ++ PUSH_DATA (push, 0xffULL << 24); ++ } + + BEGIN_NVC0(push, SUBC_CP(0x0310), 1); + PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300); +@@ -542,14 +566,35 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, + PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB); + } + +-static inline uint8_t +-nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size) ++static inline void ++gp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, ++ struct nouveau_bo *bo, uint32_t base, uint32_t size) ++{ ++ uint64_t address = bo->offset + base; ++ ++ assert(index < 8); ++ assert(!(base & 0xff)); ++ ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index, ++ DIV_ROUND_UP(size, 16)); ++ NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); ++} ++ ++static inline void ++nve4_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, struct nouveau_bo *bo, ++ uint32_t base, uint32_t size) + { +- if (shared_size > (32 << 10)) +- return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1; +- if (shared_size > (16 << 10)) +- return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1; +- return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1; ++ uint64_t address = bo->offset + base; ++ ++ assert(index < 8); ++ assert(!(base & 0xff)); ++ ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE); + } + + static void +@@ -577,92 +622,186 @@ nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc) + } + + static void +-nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, +- struct nve4_cp_launch_desc *desc, ++nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, + const struct pipe_grid_info *info) + { + const struct nvc0_screen *screen = nvc0->screen; + const struct nvc0_program *cp = nvc0->compprog; + +- nve4_cp_launch_desc_init_default(desc); +- +- desc->entry = nvc0_program_symbol_offset(cp, info->pc); +- +- desc->griddim_x = info->grid[0]; +- desc->griddim_y = info->grid[1]; +- desc->griddim_z = info->grid[2]; +- desc->blockdim_x = info->block[0]; +- desc->blockdim_y = info->block[1]; +- desc->blockdim_z = info->block[2]; +- +- desc->shared_size = align(cp->cp.smem_size, 0x100); +- desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10); +- desc->local_size_n = 0; +- desc->cstack_size = 0x800; +- desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR); ++ NVA0C0_QMDV00_06_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30); ++ ++ NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET, ++ nvc0_program_symbol_offset(cp, info->pc)); ++ ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); ++ ++ NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE, ++ align(cp->cp.smem_size, 0x100)); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, ++ (cp->hdr[1] & 0xfffff0) + ++ align(cp->cp.lmem_size, 0x10)); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800); ++ ++ if (cp->cp.smem_size > (32 << 10)) ++ NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, ++ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); ++ else ++ if (cp->cp.smem_size > (16 << 10)) ++ NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, ++ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB); ++ else ++ NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION, ++ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB); + +- desc->gpr_alloc = cp->num_gprs; +- desc->bar_alloc = cp->num_barriers; ++ NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs); ++ NVA0C0_QMDV00_06_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); + + // Only bind user uniforms and the driver constant buffer through the + // launch descriptor because UBOs are sticked to the driver cb to avoid the + // limitation of 8 CBs. + if (nvc0->constbuf[5][0].user || cp->parm_size) { +- nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, ++ nve4_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, + NVC0_CB_USR_INFO(5), 1 << 16); + + // Later logic will attempt to bind a real buffer at position 0. That + // should not happen if we've bound a user buffer. + assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); + } +- nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, ++ nve4_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, + NVC0_CB_AUX_INFO(5), 1 << 11); + +- nve4_compute_setup_buf_cb(nvc0, false, desc); ++ nve4_compute_setup_buf_cb(nvc0, false, qmd); + } + + static void +-gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, +- struct gp100_cp_launch_desc *desc, ++gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd, + const struct pipe_grid_info *info) + { + const struct nvc0_screen *screen = nvc0->screen; + const struct nvc0_program *cp = nvc0->compprog; + +- gp100_cp_launch_desc_init_default(desc); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); ++ NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR); ++ NVC0C0_QMDV02_01_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR); ++ NVC0C0_QMDV02_01_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); ++ ++ NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET, ++ nvc0_program_symbol_offset(cp, info->pc)); ++ ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); ++ ++ NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE, ++ align(cp->cp.smem_size, 0x100)); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, ++ (cp->hdr[1] & 0xfffff0) + ++ align(cp->cp.lmem_size, 0x10)); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800); + +- desc->entry = nvc0_program_symbol_offset(cp, info->pc); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs); ++ NVC0C0_QMDV02_01_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); + +- desc->griddim_x = info->grid[0]; +- desc->griddim_y = info->grid[1]; +- desc->griddim_z = info->grid[2]; +- desc->blockdim_x = info->block[0]; +- desc->blockdim_y = info->block[1]; +- desc->blockdim_z = info->block[2]; ++ // Only bind user uniforms and the driver constant buffer through the ++ // launch descriptor because UBOs are sticked to the driver cb to avoid the ++ // limitation of 8 CBs. ++ if (nvc0->constbuf[5][0].user || cp->parm_size) { ++ gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, ++ NVC0_CB_USR_INFO(5), 1 << 16); + +- desc->shared_size = align(cp->cp.smem_size, 0x100); +- desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10); +- desc->local_size_n = 0; +- desc->cstack_size = 0x800; ++ // Later logic will attempt to bind a real buffer at position 0. That ++ // should not happen if we've bound a user buffer. ++ assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); ++ } ++ gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, ++ NVC0_CB_AUX_INFO(5), 1 << 11); ++ ++ nve4_compute_setup_buf_cb(nvc0, true, qmd); ++} ++ ++static int ++gv100_sm_config_smem_size(u32 size) ++{ ++ if (size > 64 * 1024) size = 96 * 1024; ++ else if (size > 32 * 1024) size = 64 * 1024; ++ else if (size > 16 * 1024) size = 32 * 1024; ++ else if (size > 8 * 1024) size = 16 * 1024; ++ else size = 8 * 1024; ++ return (size / 4096) + 1; ++} + +- desc->gpr_alloc = cp->num_gprs; +- desc->bar_alloc = cp->num_barriers; ++static void ++gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd, ++ const struct pipe_grid_info *info) ++{ ++ struct nvc0_program *cp = nvc0->compprog; ++ struct nvc0_screen *screen = nvc0->screen; ++ uint64_t entry = ++ screen->text->offset + nvc0_program_symbol_offset(cp, info->pc); ++ ++ NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); ++ NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); ++ NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE, ++ align(cp->cp.smem_size, 0x100)); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, ++ (cp->hdr[1] & 0xfffff0) + ++ align(cp->cp.lmem_size, 0x10)); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, ++ gv100_sm_config_smem_size(8 * 1024)); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, ++ gv100_sm_config_smem_size(96 * 1024)); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, ++ gv100_sm_config_smem_size(cp->cp.smem_size)); ++ ++ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers); + + // Only bind user uniforms and the driver constant buffer through the + // launch descriptor because UBOs are sticked to the driver cb to avoid the + // limitation of 8 CBs. + if (nvc0->constbuf[5][0].user || cp->parm_size) { +- gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo, ++ gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo, + NVC0_CB_USR_INFO(5), 1 << 16); + + // Later logic will attempt to bind a real buffer at position 0. That + // should not happen if we've bound a user buffer. + assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf); + } +- gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo, ++ gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo, + NVC0_CB_AUX_INFO(5), 1 << 11); + +- nve4_compute_setup_buf_cb(nvc0, true, desc); ++ nve4_compute_setup_buf_cb(nvc0, true, qmd); ++ ++ NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32); + } + + static inline void * +@@ -677,6 +816,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv, + ptr += adj; + *pgpuaddr += adj; + } ++ memset(ptr, 0x00, 256); + return ptr; + } + +@@ -734,6 +874,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) + if (ret) + goto out; + ++ if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS) ++ gv100_compute_setup_launch_desc(nvc0, desc, info); ++ else + if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) + gp100_compute_setup_launch_desc(nvc0, desc, info); + else +@@ -743,10 +886,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) + + #ifndef NDEBUG + if (debug_get_num_option("NV50_PROG_DEBUG", 0)) { ++ debug_printf("Queue Meta Data:\n"); ++ if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS) ++ NVC3C0QmdDump_V02_02(desc); ++ else + if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) +- gp100_compute_dump_launch_desc(desc); ++ NVC0C0QmdDump_V02_01(desc); + else +- nve4_compute_dump_launch_desc(desc); ++ NVA0C0QmdDump_V00_06(desc); + } + #endif + +@@ -877,115 +1024,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) + nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; + } + +- +-#ifndef NDEBUG +-static const char *nve4_cache_split_name(unsigned value) +-{ +- switch (value) { +- case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1"; +- case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1"; +- case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1"; +- default: +- return "(invalid)"; +- } +-} +- +-static void +-nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc) +-{ +- const uint32_t *data = (const uint32_t *)desc; +- unsigned i; +- bool zero = false; +- +- debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n"); +- +- for (i = 0; i < sizeof(*desc); i += 4) { +- if (data[i / 4]) { +- debug_printf("[%x]: 0x%08x\n", i, data[i / 4]); +- zero = false; +- } else +- if (!zero) { +- debug_printf("...\n"); +- zero = true; +- } +- } +- +- debug_printf("entry = 0x%x\n", desc->entry); +- debug_printf("grid dimensions = %ux%ux%u\n", +- desc->griddim_x, desc->griddim_y, desc->griddim_z); +- debug_printf("block dimensions = %ux%ux%u\n", +- desc->blockdim_x, desc->blockdim_y, desc->blockdim_z); +- debug_printf("s[] size: 0x%x\n", desc->shared_size); +- debug_printf("l[] size: -0x%x / +0x%x\n", +- desc->local_size_n, desc->local_size_p); +- debug_printf("stack size: 0x%x\n", desc->cstack_size); +- debug_printf("barrier count: %u\n", desc->bar_alloc); +- debug_printf("$r count: %u\n", desc->gpr_alloc); +- debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split)); +- debug_printf("linked tsc: %d\n", desc->linked_tsc); +- +- for (i = 0; i < 8; ++i) { +- uint64_t address; +- uint32_t size = desc->cb[i].size; +- bool valid = !!(desc->cb_mask & (1 << i)); +- +- address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l; +- +- if (!valid && !address && !size) +- continue; +- debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n", +- i, address, size, valid ? "" : " (invalid)"); +- } +-} +- +-static void +-gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *desc) +-{ +- const uint32_t *data = (const uint32_t *)desc; +- unsigned i; +- bool zero = false; +- +- debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n"); +- +- for (i = 0; i < sizeof(*desc); i += 4) { +- if (data[i / 4]) { +- debug_printf("[%x]: 0x%08x\n", i, data[i / 4]); +- zero = false; +- } else +- if (!zero) { +- debug_printf("...\n"); +- zero = true; +- } +- } +- +- debug_printf("entry = 0x%x\n", desc->entry); +- debug_printf("grid dimensions = %ux%ux%u\n", +- desc->griddim_x, desc->griddim_y, desc->griddim_z); +- debug_printf("block dimensions = %ux%ux%u\n", +- desc->blockdim_x, desc->blockdim_y, desc->blockdim_z); +- debug_printf("s[] size: 0x%x\n", desc->shared_size); +- debug_printf("l[] size: -0x%x / +0x%x\n", +- desc->local_size_n, desc->local_size_p); +- debug_printf("stack size: 0x%x\n", desc->cstack_size); +- debug_printf("barrier count: %u\n", desc->bar_alloc); +- debug_printf("$r count: %u\n", desc->gpr_alloc); +- debug_printf("linked tsc: %d\n", desc->linked_tsc); +- +- for (i = 0; i < 8; ++i) { +- uint64_t address; +- uint32_t size = desc->cb[i].size_sh4 << 4; +- bool valid = !!(desc->cb_mask & (1 << i)); +- +- address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l; +- +- if (!valid && !address && !size) +- continue; +- debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n", +- i, address, size, valid ? "" : " (invalid)"); +- } +-} +-#endif +- + #ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER + static void + nve4_compute_trap_info(struct nvc0_context *nvc0) +diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h +index 7ff6935cc3d..d2599f7a71d 100644 +--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h ++++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h +@@ -4,142 +4,6 @@ + + #include "nvc0/nve4_compute.xml.h" + +-struct nve4_cp_launch_desc +-{ +- u32 unk0[8]; +- u32 entry; +- u32 unk9[2]; +- u32 unk11_0 : 30; +- u32 linked_tsc : 1; +- u32 unk11_31 : 1; +- u32 griddim_x : 31; +- u32 unk12 : 1; +- u16 griddim_y; +- u16 griddim_z; +- u32 unk14[3]; +- u16 shared_size; /* must be aligned to 0x100 */ +- u16 unk17; +- u16 unk18; +- u16 blockdim_x; +- u16 blockdim_y; +- u16 blockdim_z; +- u32 cb_mask : 8; +- u32 unk20_8 : 21; +- u32 cache_split : 2; +- u32 unk20_31 : 1; +- u32 unk21[8]; +- struct { +- u32 address_l; +- u32 address_h : 8; +- u32 reserved : 7; +- u32 size : 17; +- } cb[8]; +- u32 local_size_p : 20; +- u32 unk45_20 : 7; +- u32 bar_alloc : 5; +- u32 local_size_n : 20; +- u32 unk46_20 : 4; +- u32 gpr_alloc : 8; +- u32 cstack_size : 20; +- u32 unk47_20 : 12; +- u32 unk48[16]; +-}; +- +-struct gp100_cp_launch_desc +-{ +- u32 unk0[8]; +- u32 entry; +- u32 unk9[2]; +- u32 unk11_0 : 30; +- u32 linked_tsc : 1; +- u32 unk11_31 : 1; +- u32 griddim_x : 31; +- u32 unk12 : 1; +- u16 griddim_y; +- u16 unk13; +- u16 griddim_z; +- u16 unk14; +- u32 unk15[2]; +- u32 shared_size : 18; +- u32 unk17 : 14; +- u16 unk18; +- u16 blockdim_x; +- u16 blockdim_y; +- u16 blockdim_z; +- u32 cb_mask : 8; +- u32 unk20 : 24; +- u32 unk21[8]; +- u32 local_size_p : 24; +- u32 unk29 : 3; +- u32 bar_alloc : 5; +- u32 local_size_n : 24; +- u32 gpr_alloc : 8; +- u32 cstack_size : 24; +- u32 unk31 : 8; +- struct { +- u32 address_l; +- u32 address_h : 17; +- u32 reserved : 2; +- u32 size_sh4 : 13; +- } cb[8]; +- u32 unk48[16]; +-}; +- +-static inline void +-nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc) +-{ +- memset(desc, 0, sizeof(*desc)); +- +- desc->unk0[7] = 0xbc000000; +- desc->unk11_0 = 0x04014000; +- desc->unk47_20 = 0x300; +-} +- +-static inline void +-nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc, +- unsigned index, +- struct nouveau_bo *bo, +- uint32_t base, uint32_t size) +-{ +- uint64_t address = bo->offset + base; +- +- assert(index < 8); +- assert(!(base & 0xff)); +- +- desc->cb[index].address_l = address; +- desc->cb[index].address_h = address >> 32; +- desc->cb[index].size = size; +- +- desc->cb_mask |= 1 << index; +-} +- +-static inline void +-gp100_cp_launch_desc_init_default(struct gp100_cp_launch_desc *desc) +-{ +- memset(desc, 0, sizeof(*desc)); +- +- desc->unk0[4] = 0x40; +- desc->unk11_0 = 0x04014000; +-} +- +-static inline void +-gp100_cp_launch_desc_set_cb(struct gp100_cp_launch_desc *desc, +- unsigned index, +- struct nouveau_bo *bo, +- uint32_t base, uint32_t size) +-{ +- uint64_t address = bo->offset + base; +- +- assert(index < 8); +- assert(!(base & 0xff)); +- +- desc->cb[index].address_l = address; +- desc->cb[index].address_h = address >> 32; +- desc->cb[index].size_sh4 = DIV_ROUND_UP(size, 16); +- +- desc->cb_mask |= 1 << index; +-} +- + struct nve4_mp_trap_info { + u32 lock; + u32 pc; +diff --git a/src/gallium/drivers/nouveau/nvc0/qmd.h b/src/gallium/drivers/nouveau/nvc0/qmd.h +new file mode 100644 +index 00000000000..86c290fe836 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/qmd.h +@@ -0,0 +1,68 @@ ++#ifndef __NVHW_QMD_H__ ++#define __NVHW_QMD_H__ ++#include ++#include ++#include "util/u_debug.h" ++#include "drf.h" ++ ++#define NVQMD_ENUM_1(X,drf,v0) \ ++ [drf##_##v0] = #v0 ++#define NVQMD_ENUM_2(X,drf,v0,v1) \ ++ [drf##_##v0] = #v0, \ ++ [drf##_##v1] = #v1 ++#define NVQMD_ENUM_3(X,drf,v0,v1,v2) \ ++ [drf##_##v0] = #v0, \ ++ [drf##_##v1] = #v1, \ ++ [drf##_##v2] = #v2 ++#define NVQMD_ENUM_8(X,drf,v0,v1,v2,v3,v4,v5,v6,v7) \ ++ [drf##_##v0] = #v0, \ ++ [drf##_##v1] = #v1, \ ++ [drf##_##v2] = #v2, \ ++ [drf##_##v3] = #v3, \ ++ [drf##_##v4] = #v4, \ ++ [drf##_##v5] = #v5, \ ++ [drf##_##v6] = #v6, \ ++ [drf##_##v7] = #v7 ++ ++#define NVQMD_ENUM_(X,_1,_2,_3,_4,_5,_6,_7,_8,_9,IMPL,...) IMPL ++#define NVQMD_ENUM(A...) NVQMD_ENUM_(X, ##A, NVQMD_ENUM_8, NVQMD_ENUM_7, \ ++ NVQMD_ENUM_6, NVQMD_ENUM_5, \ ++ NVQMD_ENUM_4, NVQMD_ENUM_3, \ ++ NVQMD_ENUM_2, NVQMD_ENUM_1)(X, ##A) ++ ++#define NVQMD_VAL_N(X,d,r,p,f,o) do { \ ++ uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f); \ ++ debug_printf(" %-36s: "o"\n", #f, val); \ ++} while(0) ++#define NVQMD_VAL_I(X,d,r,p,f,i,o) do { \ ++ uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f(i)); \ ++ char name[80]; \ ++ snprintf(name, sizeof(name), "%s(%d)", #f, i); \ ++ debug_printf(" %-36s: "o"\n", name, val); \ ++} while(0) ++#define NVQMD_VAL_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL ++#define NVQMD_VAL(A...) NVQMD_VAL_(X, ##A, NVQMD_VAL_I, NVQMD_VAL_N)(X, ##A) ++ ++#define NVQMD_DEF(d,r,p,f,e...) do { \ ++ static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) }; \ ++ uint32_t val = NVVAL_MW_GET((p), d, r, f); \ ++ if (val < ARRAY_SIZE(ev) && ev[val]) \ ++ debug_printf(" %-36s: %s\n", #f, ev[val]); \ ++ else \ ++ debug_printf(" %-36s: UNKNOWN 0x%x\n", #f, val); \ ++} while(0) ++#define NVQMD_IDX(d,r,p,f,i,e...) do { \ ++ static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) }; \ ++ char name[80]; \ ++ snprintf(name, sizeof(name), "%s(%d)", #f, i); \ ++ uint32_t val = NVVAL_MW_GET((p), d, r, f, i); \ ++ if (val < ARRAY_SIZE(ev) && ev[val]) \ ++ debug_printf(" %-36s: %s\n", name, ev[val]); \ ++ else \ ++ debug_printf(" %-36s: UNKNOWN 0x%x\n", name, val); \ ++} while(0) ++ ++void NVA0C0QmdDump_V00_06(uint32_t *); ++void NVC0C0QmdDump_V02_01(uint32_t *); ++void NVC3C0QmdDump_V02_02(uint32_t *); ++#endif +diff --git a/src/gallium/drivers/nouveau/nvc0/qmda0c0.c b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c +new file mode 100644 +index 00000000000..7103a893af5 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c +@@ -0,0 +1,166 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include "qmd.h" ++#include "cla0c0qmd.h" ++ ++#define NVA0C0_QMDV00_06_VAL(a...) NVQMD_VAL(NVA0C0, QMDV00_06, ##a) ++#define NVA0C0_QMDV00_06_DEF(a...) NVQMD_DEF(NVA0C0, QMDV00_06, ##a) ++#define NVA0C0_QMDV00_06_IDX(a...) NVQMD_IDX(NVA0C0, QMDV00_06, ##a) ++ ++void ++NVA0C0QmdDump_V00_06(uint32_t *qmd) ++{ ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_A, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_B, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_C, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_D, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_E, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_F, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_G, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_H, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A_A, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_I, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_J, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_K, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_L, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_B, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_M, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_N, FALSE, TRUE); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_O, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_C, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE); ++ NVA0C0_QMDV00_06_VAL(qmd, PROGRAM_OFFSET, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_P, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Q, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_D, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_R, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_S, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR); ++ NVA0C0_QMDV00_06_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR); ++ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_T, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_U, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, THROTTLED, FALSE, TRUE); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_A, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_B, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK); ++ NVA0C0_QMDV00_06_DEF(qmd, SHARED_MEMORY_BANK_MAPPING, FOUR_BYTES_PER_BANK, ++ EIGHT_BYTES_PER_BANK); ++ NVA0C0_QMDV00_06_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E3_A, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_V, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_F, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_W, FALSE, TRUE); ++ NVA0C0_QMDV00_06_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_G, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_VERSION, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_MAJOR_VERSION, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_H, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x"); ++ for (int i = 0; i < 8; i++) ++ NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_I, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, L1_CONFIGURATION, ++ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB, ++ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB, ++ DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_X, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Y, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_J, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD, ++ RED_MIN, ++ RED_MAX, ++ RED_INC, ++ RED_DEC, ++ RED_AND, ++ RED_OR, ++ RED_XOR); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_K, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); ++ NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_PAYLOAD, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_L, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD, ++ RED_MIN, ++ RED_MAX, ++ RED_INC, ++ RED_DEC, ++ RED_AND, ++ RED_OR, ++ RED_XOR); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_M, "0x%x"); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE); ++ NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); ++ NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_PAYLOAD, "0x%x"); ++ for (int i = 0; i < 8; i++) { ++ NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x"); ++ NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE); ++ NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_SIZE, i, "0x%x"); ++ } ++ NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_N, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, BARRIER_COUNT, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, REGISTER_COUNT, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, SASS_VERSION, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_A, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_B, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_C, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_D, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_E, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_F, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_G, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_H, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_I, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_J, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_K, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_L, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_M, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_N, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_UPPER, "0x%x"); ++ NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_LOWER, "0x%x"); ++} +diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c +new file mode 100644 +index 00000000000..945439ee0c8 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c +@@ -0,0 +1,165 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include "qmd.h" ++#include "clc0c0qmd.h" ++ ++#define NVC0C0_QMDV02_01_VAL(a...) NVQMD_VAL(NVC0C0, QMDV02_01, ##a) ++#define NVC0C0_QMDV02_01_DEF(a...) NVQMD_DEF(NVC0C0, QMDV02_01, ##a) ++#define NVC0C0_QMDV02_01_IDX(a...) NVQMD_IDX(NVC0C0, QMDV02_01, ##a) ++ ++void ++NVC0C0QmdDump_V02_01(uint32_t *qmd) ++{ ++ NVC0C0_QMDV02_01_VAL(qmd, OUTER_PUT, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, OUTER_OVERFLOW, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, OUTER_GET, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, INNER_GET, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, INNER_OVERFLOW, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, INNER_PUT, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_GROUP_ID, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x"); ++ NVC0C0_QMDV02_01_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, IS_QUEUE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID); ++ NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_B, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_C, "0x%x"); ++ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, PROGRAM_OFFSET, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_D, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x"); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR); ++ NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR); ++ NVC0C0_QMDV02_01_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, THROTTLED, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK); ++ NVC0C0_QMDV02_01_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED13A, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED14A, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_G, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_VERSION, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_MAJOR_VERSION, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_H, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x"); ++ for (int i = 0; i < 8; i++) ++ NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_I, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_J, "0x%x"); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD, ++ RED_MIN, ++ RED_MAX, ++ RED_INC, ++ RED_DEC, ++ RED_AND, ++ RED_OR, ++ RED_XOR); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_K, "0x%x"); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); ++ NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_PAYLOAD, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_L, "0x%x"); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD, ++ RED_MIN, ++ RED_MAX, ++ RED_INC, ++ RED_DEC, ++ RED_AND, ++ RED_OR, ++ RED_XOR); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_M, "0x%x"); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE); ++ NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); ++ NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_PAYLOAD, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_N, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, BARRIER_COUNT, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, REGISTER_COUNT, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, SASS_VERSION, "0x%x"); ++ for (int i = 0; i < 8; i++) { ++ NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x"); ++ NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE); ++ NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x"); ++ } ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_R, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_S, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_GET, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_Q, "0x%x"); ++ NVC0C0_QMDV02_01_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE); ++ NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_G, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_H, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_I, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_J, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_K, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_L, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_M, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_N, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_UPPER, "0x%x"); ++ NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_LOWER, "0x%x"); ++} +diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c +new file mode 100644 +index 00000000000..c9bd8966114 +--- /dev/null ++++ b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c +@@ -0,0 +1,168 @@ ++/* ++ * Copyright 2020 Red Hat Inc. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR ++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR ++ * OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include "qmd.h" ++#include "clc3c0qmd.h" ++ ++#define NVC3C0_QMDV02_02_VAL(a...) NVQMD_VAL(NVC3C0, QMDV02_02, ##a) ++#define NVC3C0_QMDV02_02_DEF(a...) NVQMD_DEF(NVC3C0, QMDV02_02, ##a) ++#define NVC3C0_QMDV02_02_IDX(a...) NVQMD_IDX(NVC3C0, QMDV02_02, ##a) ++ ++void ++NVC3C0QmdDump_V02_02(uint32_t *qmd) ++{ ++ NVC3C0_QMDV02_02_VAL(qmd, OUTER_PUT, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, OUTER_OVERFLOW, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, OUTER_GET, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, INNER_GET, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, INNER_OVERFLOW, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, INNER_PUT, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_GROUP_ID, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x"); ++ NVC3C0_QMDV02_02_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, IS_QUEUE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID); ++ NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_B, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_C, "0x%x"); ++ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_OFFSET, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_D, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x"); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR); ++ NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR); ++ NVC3C0_QMDV02_02_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK); ++ NVC3C0_QMDV02_02_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED13A, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED14A, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_VERSION, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_MAJOR_VERSION, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_H, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x"); ++ for (int i = 0; i < 8; i++) ++ NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE); ++ NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT_V, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, FREE_CTA_SLOTS_EMPTY_SM, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_J, "0x%x"); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD, ++ RED_MIN, ++ RED_MAX, ++ RED_INC, ++ RED_DEC, ++ RED_AND, ++ RED_OR, ++ RED_XOR); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_K, "0x%x"); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); ++ NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_PAYLOAD, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_L, "0x%x"); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD, ++ RED_MIN, ++ RED_MAX, ++ RED_INC, ++ RED_DEC, ++ RED_AND, ++ RED_OR, ++ RED_XOR); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_M, "0x%x"); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE); ++ NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD); ++ NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_PAYLOAD, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_N, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, BARRIER_COUNT, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, SASS_VERSION, "0x%x"); ++ for (int i = 0; i < 8; i++) { ++ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x"); ++ NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE); ++ NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x"); ++ } ++ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_LOWER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_UPPER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_S, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_GET, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_Q, "0x%x"); ++ NVC3C0_QMDV02_02_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE); ++ NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_G, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_H, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_I, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_J, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_K, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_L, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_M, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_N, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_UPPER, "0x%x"); ++ NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_LOWER, "0x%x"); ++} +diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +index 5c43518afcb..d123c8a1c17 100644 +--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c ++++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c +@@ -104,6 +104,8 @@ nouveau_drm_screen_create(int fd) + case 0x110: + case 0x120: + case 0x130: ++ case 0x140: ++ case 0x160: + init = nvc0_screen_create; + break; + default: diff --git a/SPECS/mesa.spec b/SPECS/mesa.spec index 0952e53..177ac45 100644 --- a/SPECS/mesa.spec +++ b/SPECS/mesa.spec @@ -12,6 +12,7 @@ %define platform_drivers ,i965 %define with_vmware 1 %define with_xa 1 +%define with_iris 1 %endif %ifarch %{ix86} x86_64 @@ -40,8 +41,8 @@ Name: mesa Summary: Mesa graphics libraries -Version: 19.3.4 -Release: 2%{?rctag:.%{rctag}}%{?dist} +Version: 20.1.4 +Release: 1%{?rctag:.%{rctag}}%{?dist} License: MIT URL: http://www.mesa3d.org @@ -55,11 +56,9 @@ Source3: Makefile # Fedora opts to ignore the optional part of clause 2 and treat that code as 2 clause BSD. Source4: Mesa-MLAA-License-Clarification-Email.txt -# fix for shm black images with Xephyr (#1798702) -# upstream in 19.3.5 most likely -Patch0: 0001-dri-add-another-get-shm-variant.patch -Patch1: 0002-glx-add-getImageShm2-path.patch -Patch2: dri-shm-fix-put-image.patch +# Add support for TU11x nvidia +Patch10: 0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch +Patch11: nouveau-tu1xx-support.patch BuildRequires: gcc BuildRequires: gcc-c++ @@ -328,7 +327,7 @@ export ASFLAGS="--generate-missing-build-notes=yes" -Ddri3=true \ -Ddri-drivers=%{?dri_drivers} \ %if 0%{?with_hardware} - -Dgallium-drivers=swrast,virgl,nouveau%{?with_vmware:,svga},radeonsi,r600%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_kmsro:,kmsro} \ + -Dgallium-drivers=swrast%{?with_iris:,iris},virgl,nouveau%{?with_vmware:,svga},radeonsi,r600%{?with_freedreno:,freedreno}%{?with_etnaviv:,etnaviv}%{?with_tegra:,tegra}%{?with_vc4:,vc4}%{?with_kmsro:,kmsro} \ %else -Dgallium-drivers=swrast,virgl \ %endif @@ -355,6 +354,7 @@ export ASFLAGS="--generate-missing-build-notes=yes" -Dbuild-tests=false \ -Dselinux=true \ -Dosmesa=gallium \ + -Dvulkan-device-select-layer=true \ %{nil} %meson_build @@ -489,6 +489,7 @@ done %{_libdir}/dri/radeonsi_dri.so %ifarch %{ix86} x86_64 %{_libdir}/dri/i965_dri.so +%{_libdir}/dri/iris_dri.so %endif %if 0%{?with_vc4} %{_libdir}/dri/vc4_dri.so @@ -537,12 +538,38 @@ done %{_datadir}/vulkan/icd.d/intel_icd.i686.json %{_datadir}/vulkan/icd.d/radeon_icd.i686.json %endif +%{_libdir}/libVkLayer_MESA_device_select.so +%{_datadir}/vulkan/implicit_layer.d/VkLayer_MESA_device_select.json %files vulkan-devel %{_includedir}/vulkan/ %endif %changelog +* Wed Aug 05 2020 Dave Airlie - 20.1.4-1 +- Update to 20.1.4 +- Update nouveau tu1xx support patch (Karol) + +* Mon Jun 29 2020 Dave Airlie - 20.1.2-3 +- a fix on top of the big-endian fix (#1847064) + +* Mon Jun 29 2020 Dave Airlie - 20.1.2-2 +- add another fix for big-endian llvmpipe (#1847064) + +* Mon Jun 29 2020 Dave Airlie - 20.1.2-1 +- Update to 20.1.2 +- add fix for big-endian llvmpipe (#1847064) + +* Thu Jun 11 2020 Dave Airlie - 20.1.1-1 +- Update to 20.1.1 +- Add support for turing + +* Thu May 28 2020 Dave Airlie - 20.1.0-1 +- Update to 20.1.0 final + +* Mon May 25 2020 Dave Airlie - 20.1.0-0.1.rc4 +- Update to 20.1.0-rc4 + * Thu Feb 20 2020 Dave Airlie - 19.3.4-2 - Fix put image shm fallback path.