From 2cac32371b454f69d5db150a0405971379b86dbd Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Aug 06 2020 18:30:59 +0000 Subject: import mesa-20.1.4-1.el8 --- diff --git a/.gitignore b/.gitignore index f1fe27b..94c9bc6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/mesa-20.1.2.tar.xz +SOURCES/mesa-20.1.4.tar.xz diff --git a/.mesa.metadata b/.mesa.metadata index 87f9f7f..ca12abc 100644 --- a/.mesa.metadata +++ b/.mesa.metadata @@ -1 +1 @@ -b90fe9ca8c3bdad043e86cd1af93bcf83e1da3fb SOURCES/mesa-20.1.2.tar.xz +78243cd7152a8ba759f8f2bdfcf0a877b455e351 SOURCES/mesa-20.1.4.tar.xz diff --git a/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch b/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch deleted file mode 100644 index 30fc63d..0000000 --- a/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch +++ /dev/null @@ -1,45 +0,0 @@ -From fcf3f45728a22250ad15db7e230545147fc28c2e Mon Sep 17 00:00:00 2001 -From: Dave Airlie -Date: Mon, 29 Jun 2020 14:59:20 +1000 -Subject: [PATCH] gallivm/nir: fix big-endian 64-bit splitting/merging. - -The shuffles need to be swapped to do this properly on big-endian ---- - src/gallium/auxiliary/gallivm/lp_bld_nir.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c -index f14475e839d..2c4135ccc05 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c -@@ -353,8 +353,13 @@ static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base, - LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; - int len = bld_base->base.type.length * 2; - for (unsigned i = 0; i < bld_base->base.type.length; i++) { -+#if UTIL_ARCH_LITTLE_ENDIAN - shuffles[i] = lp_build_const_int32(gallivm, i * 2); - shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); -+#else -+ shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1); -+ shuffles2[i] = lp_build_const_int32(gallivm, (i * 2)); -+#endif - } - - src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), ""); -@@ -378,8 +383,13 @@ merge_64bit(struct lp_build_nir_context *bld_base, - assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); - - for (i = 0; i < bld_base->base.type.length * 2; i+=2) { -+#if UTIL_ARCH_LITTLE_ENDIAN - shuffles[i] = lp_build_const_int32(gallivm, i / 2); - shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); -+#else -+ shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); -+ shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2); -+#endif - } - return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); - } --- -2.26.2 - diff --git a/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch b/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch deleted file mode 100644 index 33c573f..0000000 --- a/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch +++ /dev/null @@ -1,33 +0,0 @@ -From ea7bf3941eeef8320c711a6f66b5e73077cc6e6b Mon Sep 17 00:00:00 2001 -From: Dave Airlie -Date: Mon, 29 Jun 2020 07:40:13 +1000 -Subject: [PATCH] gallivm/nir: fix const loading on big endian systems - -The code was expecting the lower 32-bits of the 64-bit to be -what it wanted, don't be implicit, pull the value from the union. - -This should fix rendering on big endian systems since NIR was -introduced. - -Fixes: 44a6b0107b37 ("gallivm: add nir->llvm translation (v2)") -Reviewed-by: Timothy Arceri ---- - src/gallium/auxiliary/gallivm/lp_bld_nir.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c -index 9aa582a0e8a..f14475e839d 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c -@@ -865,7 +865,7 @@ static void visit_load_const(struct lp_build_nir_context *bld_base, - LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]; - struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size); - for (unsigned i = 0; i < instr->def.num_components; i++) -- result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->value[i].u64); -+ result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64); - assign_ssa_dest(bld_base, &instr->def, result); - } - --- -2.26.2 - diff --git a/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch b/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch deleted file mode 100644 index 4e37ce3..0000000 --- a/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch +++ /dev/null @@ -1,81 +0,0 @@ -From 5fc0b580cecb1529659d5d3719412fb7cbffac0d Mon Sep 17 00:00:00 2001 -From: Dave Airlie -Date: Mon, 29 Jun 2020 13:26:56 +1000 -Subject: [PATCH] glsl: fix constant packing for 64-bit big endian. - -In a piglit run on s390 a lot of double tests fail, explicitly -packing/shifting things rather than using memcpy seems to help ---- - src/compiler/glsl/ir_constant_expression.cpp | 15 +++++++++++++++ - src/compiler/glsl/ir_expression_operation.py | 20 ++++++++++---------- - 2 files changed, 25 insertions(+), 10 deletions(-) - -diff --git a/src/compiler/glsl/ir_constant_expression.cpp b/src/compiler/glsl/ir_constant_expression.cpp -index 636196886b3..595cc821797 100644 ---- a/src/compiler/glsl/ir_constant_expression.cpp -+++ b/src/compiler/glsl/ir_constant_expression.cpp -@@ -452,6 +452,21 @@ isub64_saturate(int64_t a, int64_t b) - return a - b; - } - -+static uint64_t -+pack_2x32(uint32_t a, uint32_t b) -+{ -+ uint64_t v = a; -+ v |= (uint64_t)b << 32; -+ return v; -+} -+ -+static void -+unpack_2x32(uint64_t p, uint32_t *a, uint32_t *b) -+{ -+ *a = p & 0xffffffff; -+ *b = (p >> 32); -+} -+ - /** - * Get the constant that is ultimately referenced by an r-value, in a constant - * expression evaluation context. -diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py -index d2c4d41024f..1c4e6b358e1 100644 ---- a/src/compiler/glsl/ir_expression_operation.py -+++ b/src/compiler/glsl/ir_expression_operation.py -@@ -560,14 +560,14 @@ ir_expression_operation = [ - operation("saturate", 1, printable_name="sat", source_types=(float_type,), c_expression="CLAMP({src0}, 0.0f, 1.0f)"), - - # Double packing, part of ARB_gpu_shader_fp64. -- operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))), -- operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), - - # Sampler/Image packing, part of ARB_bindless_texture. -- operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))), -- operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))), -- operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))), -- operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), - - operation("frexp_sig", 1), - operation("frexp_exp", 1), -@@ -592,10 +592,10 @@ ir_expression_operation = [ - operation("ssbo_unsized_array_length", 1), - - # 64-bit integer packing ops. -- operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="memcpy(&data.i64[0], &op[0]->value.i[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))), -- operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))), -- operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="memcpy(&data.i[0], &op[0]->value.i64[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))), -- operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), -+ operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))), - - operation("add", 2, printable_name="+", source_types=numeric_types, c_expression="{src0} + {src1}", flags=vector_scalar_operation), - operation("sub", 2, printable_name="-", source_types=numeric_types, c_expression="{src0} - {src1}", flags=vector_scalar_operation), --- -2.26.2 - diff --git a/SOURCES/Makefile b/SOURCES/Makefile index c431c49..8396596 100644 --- a/SOURCES/Makefile +++ b/SOURCES/Makefile @@ -1,4 +1,4 @@ -VERSION ?= 20.1.2 +VERSION ?= 20.1.4 SANITIZE ?= 1 DIRNAME = mesa-${VERSION} diff --git a/SOURCES/nouveau-tu1xx-support.patch b/SOURCES/nouveau-tu1xx-support.patch index 1134f43..3254466 100644 --- a/SOURCES/nouveau-tu1xx-support.patch +++ b/SOURCES/nouveau-tu1xx-support.patch @@ -357,10 +357,10 @@ index e244bd0d610..dd8e1ab86c4 100644 { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp new file mode 100644 -index 00000000000..0fbd47ccf88 +index 00000000000..ef33743e610 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp -@@ -0,0 +1,2011 @@ +@@ -0,0 +1,2052 @@ +/* + * Copyright 2020 Red Hat Inc. + * @@ -1221,6 +1221,7 @@ index 00000000000..0fbd47ccf88 + break; + } + emitField(73, 3, dType); ++ emitGPR (64, insn->src(2)); + } + + emitPRED (81); @@ -1272,6 +1273,40 @@ index 00000000000..0fbd47ccf88 + emitGPR (16, insn->def(0)); +} + ++static void ++interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data) ++{ ++ int ipa = entry->ipa; ++ int loc = entry->loc; ++ ++ if (data.force_persample_interp && ++ (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT && ++ (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) { ++ ipa |= NV50_IR_INTERP_CENTROID; ++ } ++ ++ int sample; ++ switch (ipa & NV50_IR_INTERP_SAMPLE_MASK) { ++ case NV50_IR_INTERP_DEFAULT : sample = 0; break; ++ case NV50_IR_INTERP_CENTROID: sample = 1; break; ++ case NV50_IR_INTERP_OFFSET : sample = 2; break; ++ default: assert(!"invalid sample mode"); ++ } ++ ++ int interp; ++ switch (ipa & NV50_IR_INTERP_MODE_MASK) { ++ case NV50_IR_INTERP_LINEAR : ++ case NV50_IR_INTERP_PERSPECTIVE: interp = 0; break; ++ case NV50_IR_INTERP_FLAT : interp = 1; break; ++ case NV50_IR_INTERP_SC : interp = 2; break; ++ default: assert(!"invalid ipa mode"); ++ } ++ ++ code[loc + 2] &= ~(0xf << 12); ++ code[loc + 2] |= sample << 12; ++ code[loc + 2] |= interp << 14; ++} ++ +void +CodeEmitterGV100::emitIPA() +{ @@ -1288,17 +1323,21 @@ index 00000000000..0fbd47ccf88 + break; + } + ++ switch (insn->getSampleMode()) { ++ case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break; ++ case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break; ++ case NV50_IR_INTERP_OFFSET : emitField(76, 2, 2); break; ++ default: ++ assert(!"invalid sample mode"); ++ break; ++ } ++ + if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) { -+ switch (insn->getSampleMode()) { -+ case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break; -+ case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break; -+ default: -+ break; -+ } + emitGPR (32); ++ addInterp(insn->ipa, 0xff, interpApply); + } else { -+ emitField(76, 2, 2); + emitGPR (32, insn->src(1)); ++ addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, interpApply); + } + + assert(!insn->src(0).isIndirect(0)); @@ -1315,21 +1354,22 @@ index 00000000000..0fbd47ccf88 +} + +void -+CodeEmitterGV100::emitLDSTc(int pos) ++CodeEmitterGV100::emitLDSTc(int posm, int poso) +{ + int mode = 0; ++ int order = 1; + + switch (insn->cache) { -+ case CACHE_CA: mode = 0; break; -+ case CACHE_CG: mode = 1; break; -+ case CACHE_CS: mode = 2; break; -+ case CACHE_CV: mode = 3; break; ++ case CACHE_CA: mode = 0; order = 1; break; ++ case CACHE_CG: mode = 2; order = 2; break; ++ case CACHE_CV: mode = 3; order = 2; break; + default: + assert(!"invalid caching mode"); + break; + } + -+ emitField(pos, 2, mode); ++ emitField(poso, 2, order); ++ emitField(posm, 2, mode); +} + +void @@ -1552,6 +1592,14 @@ index 00000000000..0fbd47ccf88 +{ + const TexInstruction *insn = this->insn->asTex(); + ++ int offsets = 0; ++ switch (insn->tex.useOffsets) { ++ case 4: offsets = 2; break; ++ case 1: offsets = 1; break; ++ case 0: offsets = 0; break; ++ default: assert(!"invalid offsets count"); break; ++ } ++ + if (insn->tex.rIndirectSrc < 0) { + emitInsn (0xb63); + emitField(54, 5, prog->driver->io.auxCBSlot); @@ -1565,8 +1613,7 @@ index 00000000000..0fbd47ccf88 + emitField(84, 1, 1); // !.EF + emitPRED (81); + emitField(78, 1, insn->tex.target.isShadow()); -+ emitField(77, 2, insn->tex.useOffsets == 4); -+ emitField(76, 2, insn->tex.useOffsets == 1); ++ emitField(76, 2, offsets); + emitField(72, 4, insn->tex.mask); + emitGPR (64, insn->def(1)); + emitField(63, 1, insn->tex.target.isArray()); @@ -1776,7 +1823,6 @@ index 00000000000..0fbd47ccf88 + assert(0); + break; + } -+ // emitLDSTc(0x18); + emitField(73, 3, type); + } else { + emitInsn(0x998); @@ -1785,7 +1831,7 @@ index 00000000000..0fbd47ccf88 + } + + emitPRED (81); -+ emitField(79, 2, 1); ++ emitLDSTc(77, 79); + + emitGPR (16, insn->def(0)); + emitGPR (24, insn->src(0)); @@ -1805,12 +1851,7 @@ index 00000000000..0fbd47ccf88 +#endif + emitSUTarget(); + -+ -+#if 0 -+ emitLDSTc(0x18); -+#endif -+ -+ emitField(79, 2, 1); ++ emitLDSTc(77, 79); + emitField(72, 4, 0xf); // rgba + emitGPR(32, insn->src(1)); + emitGPR(24, insn->src(0)); @@ -2374,7 +2415,7 @@ index 00000000000..0fbd47ccf88 +}; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h new file mode 100644 -index 00000000000..e97bf6580a1 +index 00000000000..15ab717e460 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h @@ -0,0 +1,403 @@ @@ -2744,7 +2785,7 @@ index 00000000000..e97bf6580a1 + void emitATOMS(); + void emitIPA(); + void emitISBERD(); -+ void emitLDSTc(int); ++ void emitLDSTc(int, int); + void emitLDSTs(int, DataType); + void emitLD(); + void emitLDC(); @@ -2782,10 +2823,26 @@ index 00000000000..e97bf6580a1 +}; +#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp -index bd78b76f384..69f9cfad0d6 100644 +index bd78b76f384..eee9aa67256 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp -@@ -571,6 +571,10 @@ Converter::getSubOp(nir_op op) +@@ -170,6 +170,7 @@ private: + NirArrayLMemOffsets regToLmemOffset; + NirBlockMap blocks; + unsigned int curLoopDepth; ++ unsigned int curIfDepth; + + BasicBlock *exit; + Value *zero; +@@ -188,6 +189,7 @@ Converter::Converter(Program *prog, nir_shader *nir, nv50_ir_prog_info *info) + : ConverterCommon(prog, info), + nir(nir), + curLoopDepth(0), ++ curIfDepth(0), + clipVertexOutput(-1) + { + zero = mkImm((uint32_t)0); +@@ -571,6 +573,10 @@ Converter::getSubOp(nir_op op) case nir_op_imul_high: case nir_op_umul_high: return NV50_IR_SUBOP_MUL_HIGH; @@ -2796,7 +2853,120 @@ index bd78b76f384..69f9cfad0d6 100644 default: return 0; } -@@ -1067,7 +1071,11 @@ bool Converter::assignSlots() { +@@ -909,7 +915,7 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, + uint16_t slots; + switch (stage) { + case Program::TYPE_GEOMETRY: +- slots = type->uniform_locations(); ++ slots = type->count_attribute_slots(false); + if (input) + slots /= info.gs.vertices_in; + break; +@@ -917,9 +923,9 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, + case Program::TYPE_TESSELLATION_EVAL: + // remove first dimension + if (var->data.patch || (!input && stage == Program::TYPE_TESSELLATION_EVAL)) +- slots = type->uniform_locations(); ++ slots = type->count_attribute_slots(false); + else +- slots = type->fields.array->uniform_locations(); ++ slots = type->fields.array->count_attribute_slots(false); + break; + default: + slots = type->count_attribute_slots(false); +@@ -929,6 +935,24 @@ calcSlots(const glsl_type *type, Program::Type stage, const shader_info &info, + return slots; + } + ++static uint8_t ++getMaskForType(const glsl_type *type, uint8_t slot) { ++ uint16_t comp = type->without_array()->components(); ++ comp = comp ? comp : 4; ++ ++ if (glsl_base_type_is_64bit(type->without_array()->base_type)) { ++ comp *= 2; ++ if (comp > 4) { ++ if (slot % 2) ++ comp -= 4; ++ else ++ comp = 4; ++ } ++ } ++ ++ return (1 << comp) - 1; ++} ++ + bool Converter::assignSlots() { + unsigned name; + unsigned index; +@@ -981,16 +1005,8 @@ bool Converter::assignSlots() { + const glsl_type *type = var->type; + int slot = var->data.location; + uint16_t slots = calcSlots(type, prog->getType(), nir->info, true, var); +- uint32_t comp = type->is_array() ? type->without_array()->component_slots() +- : type->component_slots(); +- uint32_t frac = var->data.location_frac; + uint32_t vary = var->data.driver_location; + +- if (glsl_base_type_is_64bit(type->without_array()->base_type)) { +- if (comp > 2) +- slots *= 2; +- } +- + assert(vary + slots <= PIPE_MAX_SHADER_INPUTS); + + switch(prog->getType()) { +@@ -1014,6 +1030,8 @@ bool Converter::assignSlots() { + info->numPatchConstants = MAX2(info->numPatchConstants, index + slots); + break; + case Program::TYPE_VERTEX: ++ if (slot >= VERT_ATTRIB_GENERIC0) ++ slot = VERT_ATTRIB_GENERIC0 + vary; + vert_attrib_to_tgsi_semantic((gl_vert_attrib)slot, &name, &index); + switch (name) { + case TGSI_SEMANTIC_EDGEFLAG: +@@ -1029,17 +1047,12 @@ bool Converter::assignSlots() { + } + + for (uint16_t i = 0u; i < slots; ++i, ++vary) { +- info->in[vary].id = vary; +- info->in[vary].patch = var->data.patch; +- info->in[vary].sn = name; +- info->in[vary].si = index + i; +- if (glsl_base_type_is_64bit(type->without_array()->base_type)) +- if (i & 0x1) +- info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4); +- else +- info->in[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf); +- else +- info->in[vary].mask |= ((1 << comp) - 1) << frac; ++ nv50_ir_varying *v = &info->in[vary]; ++ ++ v->patch = var->data.patch; ++ v->sn = name; ++ v->si = index + i; ++ v->mask |= getMaskForType(type, i) << var->data.location_frac; + } + info->numInputs = std::max(info->numInputs, vary); + } +@@ -1048,16 +1061,8 @@ bool Converter::assignSlots() { + const glsl_type *type = var->type; + int slot = var->data.location; + uint16_t slots = calcSlots(type, prog->getType(), nir->info, false, var); +- uint32_t comp = type->is_array() ? type->without_array()->component_slots() +- : type->component_slots(); +- uint32_t frac = var->data.location_frac; + uint32_t vary = var->data.driver_location; + +- if (glsl_base_type_is_64bit(type->without_array()->base_type)) { +- if (comp > 2) +- slots *= 2; +- } +- + assert(vary < PIPE_MAX_SHADER_OUTPUTS); + + switch(prog->getType()) { +@@ -1067,7 +1072,11 @@ bool Converter::assignSlots() { case TGSI_SEMANTIC_COLOR: if (!var->data.fb_fetch_output) info->prop.fp.numColourResults++; @@ -2809,7 +2979,185 @@ index bd78b76f384..69f9cfad0d6 100644 // sometimes we get FRAG_RESULT_DATAX with data.index 0 // sometimes we get FRAG_RESULT_DATA0 with data.index X index = index == 0 ? var->data.index : index; -@@ -1617,6 +1625,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1118,20 +1127,14 @@ bool Converter::assignSlots() { + } + + for (uint16_t i = 0u; i < slots; ++i, ++vary) { +- info->out[vary].id = vary; +- info->out[vary].patch = var->data.patch; +- info->out[vary].sn = name; +- info->out[vary].si = index + i; +- if (glsl_base_type_is_64bit(type->without_array()->base_type)) +- if (i & 0x1) +- info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) >> 0x4); +- else +- info->out[vary].mask |= (((1 << (comp * 2)) - 1) << (frac * 2) & 0xf); +- else +- info->out[vary].mask |= ((1 << comp) - 1) << frac; ++ nv50_ir_varying *v = &info->out[vary]; ++ v->patch = var->data.patch; ++ v->sn = name; ++ v->si = index + i; ++ v->mask |= getMaskForType(type, i) << var->data.location_frac; + + if (nir->info.outputs_read & 1ull << slot) +- info->out[vary].oread = 1; ++ v->oread = 1; + } + info->numOutputs = std::max(info->numOutputs, vary); + } +@@ -1275,6 +1278,7 @@ Converter::parseNIR() + info->bin.tlsSpace = 0; + info->io.clipDistances = nir->info.clip_distance_array_size; + info->io.cullDistances = nir->info.cull_distance_array_size; ++ info->io.layer_viewport_relative = nir->info.layer_viewport_relative; + + switch(prog->getType()) { + case Program::TYPE_COMPUTE: +@@ -1291,7 +1295,7 @@ Converter::parseNIR() + info->prop.fp.postDepthCoverage = nir->info.fs.post_depth_coverage; + info->prop.fp.readsSampleLocations = + (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_POS); +- info->prop.fp.usesDiscard = nir->info.fs.uses_discard; ++ info->prop.fp.usesDiscard = nir->info.fs.uses_discard || nir->info.fs.uses_demote; + info->prop.fp.usesSampleMaskIn = + !!(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN); + break; +@@ -1426,64 +1430,69 @@ Converter::visit(nir_block *block) + bool + Converter::visit(nir_if *nif) + { ++ curIfDepth++; ++ + DataType sType = getSType(nif->condition, false, false); + Value *src = getSrc(&nif->condition, 0); + + nir_block *lastThen = nir_if_last_then_block(nif); + nir_block *lastElse = nir_if_last_else_block(nif); + +- assert(!lastThen->successors[1]); +- assert(!lastElse->successors[1]); +- ++ BasicBlock *headBB = bb; + BasicBlock *ifBB = convert(nir_if_first_then_block(nif)); + BasicBlock *elseBB = convert(nir_if_first_else_block(nif)); + + bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); + bb->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); + +- // we only insert joinats, if both nodes end up at the end of the if again. +- // the reason for this to not happens are breaks/continues/ret/... which +- // have their own handling +- if (lastThen->successors[0] == lastElse->successors[0]) +- bb->joinAt = mkFlow(OP_JOINAT, convert(lastThen->successors[0]), +- CC_ALWAYS, NULL); +- ++ bool insertJoins = lastThen->successors[0] == lastElse->successors[0]; + mkFlow(OP_BRA, elseBB, CC_EQ, src)->setType(sType); + + foreach_list_typed(nir_cf_node, node, node, &nif->then_list) { + if (!visit(node)) + return false; + } ++ + setPosition(convert(lastThen), true); +- if (!bb->getExit() || +- !bb->getExit()->asFlow() || +- bb->getExit()->asFlow()->op == OP_JOIN) { ++ if (!bb->isTerminated()) { + BasicBlock *tailBB = convert(lastThen->successors[0]); + mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL); + bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD); ++ } else { ++ insertJoins = insertJoins && bb->getExit()->op == OP_BRA; + } + + foreach_list_typed(nir_cf_node, node, node, &nif->else_list) { + if (!visit(node)) + return false; + } ++ + setPosition(convert(lastElse), true); +- if (!bb->getExit() || +- !bb->getExit()->asFlow() || +- bb->getExit()->asFlow()->op == OP_JOIN) { ++ if (!bb->isTerminated()) { + BasicBlock *tailBB = convert(lastElse->successors[0]); + mkFlow(OP_BRA, tailBB, CC_ALWAYS, NULL); + bb->cfg.attach(&tailBB->cfg, Graph::Edge::FORWARD); ++ } else { ++ insertJoins = insertJoins && bb->getExit()->op == OP_BRA; + } + +- if (lastThen->successors[0] == lastElse->successors[0]) { +- setPosition(convert(lastThen->successors[0]), true); ++ /* only insert joins for the most outer if */ ++ if (--curIfDepth) ++ insertJoins = false; ++ ++ /* we made sure that all threads would converge at the same block */ ++ if (insertJoins) { ++ BasicBlock *conv = convert(lastThen->successors[0]); ++ setPosition(headBB->getExit(), false); ++ headBB->joinAt = mkFlow(OP_JOINAT, conv, CC_ALWAYS, NULL); ++ setPosition(conv, false); + mkFlow(OP_JOIN, NULL, CC_ALWAYS, NULL)->fixed = 1; + } + + return true; + } + ++// TODO: add convergency + bool + Converter::visit(nir_loop *loop) + { +@@ -1491,8 +1500,8 @@ Converter::visit(nir_loop *loop) + func->loopNestingBound = std::max(func->loopNestingBound, curLoopDepth); + + BasicBlock *loopBB = convert(nir_loop_first_block(loop)); +- BasicBlock *tailBB = +- convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node))); ++ BasicBlock *tailBB = convert(nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node))); ++ + bb->cfg.attach(&loopBB->cfg, Graph::Edge::TREE); + + mkFlow(OP_PREBREAK, tailBB, CC_ALWAYS, NULL); +@@ -1503,19 +1512,15 @@ Converter::visit(nir_loop *loop) + if (!visit(node)) + return false; + } +- Instruction *insn = bb->getExit(); +- if (bb->cfg.incidentCount() != 0) { +- if (!insn || !insn->asFlow()) { +- mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); +- bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); +- } else if (insn && insn->op == OP_BRA && !insn->getPredicate() && +- tailBB->cfg.incidentCount() == 0) { +- // RA doesn't like having blocks around with no incident edge, +- // so we create a fake one to make it happy +- bb->cfg.attach(&tailBB->cfg, Graph::Edge::TREE); +- } ++ ++ if (!bb->isTerminated()) { ++ mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); ++ bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); + } + ++ if (tailBB->cfg.incidentCount() == 0) ++ loopBB->cfg.attach(&tailBB->cfg, Graph::Edge::TREE); ++ + curLoopDepth -= 1; + + return true; +@@ -1560,6 +1565,7 @@ Converter::convert(nir_intrinsic_op intr) + return SV_DRAWID; + case nir_intrinsic_load_front_face: + return SV_FACE; ++ case nir_intrinsic_is_helper_invocation: + case nir_intrinsic_load_helper_invocation: + return SV_THREAD_KILL; + case nir_intrinsic_load_instance_id: +@@ -1617,6 +1623,7 @@ Converter::visit(nir_intrinsic_instr *insn) { nir_intrinsic_op op = insn->intrinsic; const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op]; @@ -2817,7 +3165,7 @@ index bd78b76f384..69f9cfad0d6 100644 switch (op) { case nir_intrinsic_load_uniform: { -@@ -1624,7 +1633,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1624,7 +1631,7 @@ Converter::visit(nir_intrinsic_instr *insn) const DataType dType = getDType(insn); Value *indirect; uint32_t coffset = getIndirect(insn, 0, 0, indirect); @@ -2826,7 +3174,7 @@ index bd78b76f384..69f9cfad0d6 100644 loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect); } break; -@@ -1635,7 +1644,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1635,7 +1642,7 @@ Converter::visit(nir_intrinsic_instr *insn) DataType dType = getSType(insn->src[0], false, false); uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect); @@ -2835,7 +3183,15 @@ index bd78b76f384..69f9cfad0d6 100644 if (!((1u << i) & nir_intrinsic_write_mask(insn))) continue; -@@ -1688,7 +1697,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1652,6 +1659,7 @@ Converter::visit(nir_intrinsic_instr *insn) + break; + } + case Program::TYPE_GEOMETRY: ++ case Program::TYPE_TESSELLATION_EVAL: + case Program::TYPE_VERTEX: { + if (info->io.genUserClip > 0 && idx == (uint32_t)clipVertexOutput) { + mkMov(clipVtx[i], src); +@@ -1688,7 +1696,7 @@ Converter::visit(nir_intrinsic_instr *insn) srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0))); srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0))); @@ -2844,7 +3200,27 @@ index bd78b76f384..69f9cfad0d6 100644 defs.push_back(newDefs[i]); mask |= 1 << i; } -@@ -1723,7 +1732,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1715,15 +1723,25 @@ Converter::visit(nir_intrinsic_instr *insn) + + // see load_barycentric_* handling + if (prog->getType() == Program::TYPE_FRAGMENT) { +- mode = translateInterpMode(&vary, nvirOp); + if (op == nir_intrinsic_load_interpolated_input) { + ImmediateValue immMode; + if (getSrc(&insn->src[0], 1)->getUniqueInsn()->src(0).getImmediate(immMode)) +- mode |= immMode.reg.data.u32; ++ mode = immMode.reg.data.u32; ++ } ++ if (mode == NV50_IR_INTERP_DEFAULT) ++ mode |= translateInterpMode(&vary, nvirOp); ++ else { ++ if (vary.linear) { ++ nvirOp = OP_LINTERP; ++ mode |= NV50_IR_INTERP_LINEAR; ++ } else { ++ nvirOp = OP_PINTERP; ++ mode |= NV50_IR_INTERP_PERSPECTIVE; ++ } } } @@ -2853,7 +3229,27 @@ index bd78b76f384..69f9cfad0d6 100644 uint32_t address = getSlotAddress(insn, idx, i); Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address); if (prog->getType() == Program::TYPE_FRAGMENT) { -@@ -1858,7 +1867,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1814,9 +1832,11 @@ Converter::visit(nir_intrinsic_instr *insn) + loadImm(newDefs[1], mode); + break; + } ++ case nir_intrinsic_demote: + case nir_intrinsic_discard: + mkOp(OP_DISCARD, TYPE_NONE, NULL); + break; ++ case nir_intrinsic_demote_if: + case nir_intrinsic_discard_if: { + Value *pred = getSSA(1, FILE_PREDICATE); + if (insn->num_components > 1) { +@@ -1832,6 +1852,7 @@ Converter::visit(nir_intrinsic_instr *insn) + case nir_intrinsic_load_base_instance: + case nir_intrinsic_load_draw_id: + case nir_intrinsic_load_front_face: ++ case nir_intrinsic_is_helper_invocation: + case nir_intrinsic_load_helper_invocation: + case nir_intrinsic_load_instance_id: + case nir_intrinsic_load_invocation_id: +@@ -1858,7 +1879,7 @@ Converter::visit(nir_intrinsic_instr *insn) SVSemantic sv = convert(op); LValues &newDefs = convert(&insn->dest); @@ -2862,7 +3258,7 @@ index bd78b76f384..69f9cfad0d6 100644 Value *def; if (typeSizeof(dType) == 8) def = getSSA(); -@@ -1910,12 +1919,12 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1910,12 +1931,12 @@ Converter::visit(nir_intrinsic_instr *insn) if (op == nir_intrinsic_read_first_invocation) { mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY; @@ -2877,7 +3273,7 @@ index bd78b76f384..69f9cfad0d6 100644 mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f)) ->subOp = NV50_IR_SUBOP_SHFL_IDX; } -@@ -1931,7 +1940,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1931,7 +1952,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS), mkImm(baseVertex), indirectVertex); @@ -2886,7 +3282,7 @@ index bd78b76f384..69f9cfad0d6 100644 uint32_t address = getSlotAddress(insn, idx, i); loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0, indirectOffset, vtxBase, info->in[idx].patch); -@@ -1954,7 +1963,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -1954,19 +1975,24 @@ Converter::visit(nir_intrinsic_instr *insn) vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase); @@ -2895,7 +3291,26 @@ index bd78b76f384..69f9cfad0d6 100644 uint32_t address = getSlotAddress(insn, idx, i); loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0, indirectOffset, vtxBase, info->in[idx].patch); -@@ -1978,7 +1987,7 @@ Converter::visit(nir_intrinsic_instr *insn) + } + break; + } +- case nir_intrinsic_emit_vertex: ++ case nir_intrinsic_emit_vertex: { + if (info->io.genUserClip > 0) + handleUserClipPlanes(); +- // fallthrough ++ uint32_t idx = nir_intrinsic_stream_id(insn); ++ mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; ++ break; ++ } + case nir_intrinsic_end_primitive: { + uint32_t idx = nir_intrinsic_stream_id(insn); ++ if (idx) ++ break; + mkOp1(getOperation(op), TYPE_U32, NULL, mkImm(idx))->fixed = 1; + break; + } +@@ -1978,7 +2004,7 @@ Converter::visit(nir_intrinsic_instr *insn) uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1; uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); @@ -2904,7 +3319,7 @@ index bd78b76f384..69f9cfad0d6 100644 loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i, indirectOffset, indirectIndex); } -@@ -2001,7 +2010,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -2001,7 +2027,7 @@ Converter::visit(nir_intrinsic_instr *insn) uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer); uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset); @@ -2913,7 +3328,7 @@ index bd78b76f384..69f9cfad0d6 100644 if (!((1u << i) & nir_intrinsic_write_mask(insn))) continue; Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType, -@@ -2020,7 +2029,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -2020,7 +2046,7 @@ Converter::visit(nir_intrinsic_instr *insn) uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer); uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); @@ -2922,7 +3337,7 @@ index bd78b76f384..69f9cfad0d6 100644 loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i, indirectOffset, indirectBuffer); -@@ -2314,7 +2323,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -2314,7 +2340,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *indirectOffset; uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset); @@ -2931,7 +3346,7 @@ index bd78b76f384..69f9cfad0d6 100644 if (!((1u << i) & nir_intrinsic_write_mask(insn))) continue; Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType)); -@@ -2328,7 +2337,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -2328,7 +2354,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *indirectOffset; uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); @@ -2940,7 +3355,7 @@ index bd78b76f384..69f9cfad0d6 100644 loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset); break; -@@ -2367,7 +2376,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -2367,7 +2393,7 @@ Converter::visit(nir_intrinsic_instr *insn) Value *indirectOffset; uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset); @@ -2949,7 +3364,7 @@ index bd78b76f384..69f9cfad0d6 100644 loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset); info->io.globalAccess |= 0x1; -@@ -2376,7 +2385,7 @@ Converter::visit(nir_intrinsic_instr *insn) +@@ -2376,7 +2402,7 @@ Converter::visit(nir_intrinsic_instr *insn) case nir_intrinsic_store_global: { DataType sType = getSType(insn->src[0], false, false); @@ -2958,7 +3373,15 @@ index bd78b76f384..69f9cfad0d6 100644 if (!((1u << i) & nir_intrinsic_write_mask(insn))) continue; if (typeSizeof(sType) == 8) { -@@ -2774,7 +2783,7 @@ Converter::visit(nir_alu_instr *insn) +@@ -2418,7 +2444,6 @@ Converter::visit(nir_jump_instr *insn) + case nir_jump_continue: { + bool isBreak = insn->type == nir_jump_break; + nir_block *block = insn->instr.block; +- assert(!block->successors[1]); + BasicBlock *target = convert(block->successors[0]); + mkFlow(isBreak ? OP_BREAK : OP_CONT, target, CC_ALWAYS, NULL); + bb->cfg.attach(&target->cfg, isBreak ? Graph::Edge::CROSS : Graph::Edge::BACK); +@@ -2774,7 +2799,7 @@ Converter::visit(nir_alu_instr *insn) case nir_op_bfm: { DEFAULT_CHECKS; LValues &newDefs = convert(&insn->dest); @@ -2967,7 +3390,7 @@ index bd78b76f384..69f9cfad0d6 100644 break; } case nir_op_bitfield_insert: { -@@ -2794,17 +2803,69 @@ Converter::visit(nir_alu_instr *insn) +@@ -2794,17 +2819,69 @@ Converter::visit(nir_alu_instr *insn) case nir_op_bitfield_reverse: { DEFAULT_CHECKS; LValues &newDefs = convert(&insn->dest); @@ -3039,7 +3462,25 @@ index bd78b76f384..69f9cfad0d6 100644 // boolean conversions case nir_op_b2f32: { DEFAULT_CHECKS; -@@ -3224,6 +3285,11 @@ Converter::run() +@@ -2990,14 +3067,11 @@ Converter::handleDeref(nir_deref_instr *deref, Value * &indirect, const nir_vari + CacheMode + Converter::convert(enum gl_access_qualifier access) + { +- switch (access) { +- case ACCESS_VOLATILE: ++ if (access & ACCESS_VOLATILE) + return CACHE_CV; +- case ACCESS_COHERENT: ++ if (access & ACCESS_COHERENT) + return CACHE_CG; +- default: +- return CACHE_CA; +- } ++ return CACHE_CA; + } + + CacheMode +@@ -3224,6 +3298,11 @@ Converter::run() NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); NIR_PASS_V(nir, nir_lower_phis_to_scalar); @@ -3051,7 +3492,7 @@ index bd78b76f384..69f9cfad0d6 100644 do { progress = false; NIR_PASS(progress, nir, nir_copy_prop); -@@ -3285,3 +3351,125 @@ Program::makeFromNIR(struct nv50_ir_prog_info *info) +@@ -3285,3 +3364,125 @@ Program::makeFromNIR(struct nv50_ir_prog_info *info) } } // namespace nv50_ir @@ -3059,106 +3500,106 @@ index bd78b76f384..69f9cfad0d6 100644 +static nir_shader_compiler_options +nvir_nir_shader_compiler_options(int chipset) +{ -+ return { -+ .lower_fdiv = (chipset >= NVISA_GV100_CHIPSET), -+ .lower_ffma = false, -+ .fuse_ffma = false, /* nir doesn't track mad vs fma */ -+ .lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET), -+ .lower_flrp32 = true, -+ .lower_flrp64 = true, -+ .lower_fpow = false, // TODO: nir's lowering is broken, or we could use it -+ .lower_fsat = false, -+ .lower_fsqrt = false, // TODO: only before gm200 -+ .lower_sincos = false, -+ .lower_fmod = true, -+ .lower_bitfield_extract = false, -+ .lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET), -+ .lower_bitfield_insert = false, -+ .lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET), -+ .lower_bitfield_insert_to_bitfield_select = false, -+ .lower_bitfield_reverse = false, -+ .lower_bit_count = false, -+ .lower_ifind_msb = false, -+ .lower_find_lsb = false, -+ .lower_uadd_carry = true, // TODO -+ .lower_usub_borrow = true, // TODO -+ .lower_mul_high = false, -+ .lower_negate = false, -+ .lower_sub = true, -+ .lower_scmp = true, // TODO: not implemented yet -+ .lower_vector_cmp = false, -+ .lower_idiv = true, -+ .lower_bitops = false, -+ .lower_isign = (chipset >= NVISA_GV100_CHIPSET), -+ .lower_fsign = (chipset >= NVISA_GV100_CHIPSET), -+ .lower_fdph = false, -+ .lower_fdot = false, -+ .fdot_replicates = false, // TODO -+ .lower_ffloor = false, // TODO -+ .lower_ffract = true, -+ .lower_fceil = false, // TODO -+ .lower_ftrunc = false, -+ .lower_ldexp = true, -+ .lower_pack_half_2x16 = true, -+ .lower_pack_unorm_2x16 = true, -+ .lower_pack_snorm_2x16 = true, -+ .lower_pack_unorm_4x8 = true, -+ .lower_pack_snorm_4x8 = true, -+ .lower_unpack_half_2x16 = true, -+ .lower_unpack_unorm_2x16 = true, -+ .lower_unpack_snorm_2x16 = true, -+ .lower_unpack_unorm_4x8 = true, -+ .lower_unpack_snorm_4x8 = true, -+ .lower_pack_split = false, -+ .lower_extract_byte = (chipset < NVISA_GM107_CHIPSET), -+ .lower_extract_word = (chipset < NVISA_GM107_CHIPSET), -+ .lower_all_io_to_temps = false, -+ .lower_all_io_to_elements = false, -+ .vertex_id_zero_based = false, -+ .lower_base_vertex = false, -+ .lower_helper_invocation = false, -+ .optimize_sample_mask_in = false, -+ .lower_cs_local_index_from_id = true, -+ .lower_cs_local_id_from_index = false, -+ .lower_device_index_to_zero = false, // TODO -+ .lower_wpos_pntc = false, // TODO -+ .lower_hadd = true, // TODO -+ .lower_add_sat = true, // TODO -+ .vectorize_io = false, -+ .lower_to_scalar = true, -+ .unify_interfaces = false, -+ .use_interpolated_input_intrinsics = true, -+ .lower_mul_2x32_64 = true, // TODO -+ .lower_rotate = (chipset < NVISA_GV100_CHIPSET), -+ .has_imul24 = false, -+ .intel_vec4 = false, -+ .max_unroll_iterations = 32, -+ .lower_int64_options = (nir_lower_int64_options) ( -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) | -+ nir_lower_divmod64 | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) | -+ ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) | -+ nir_lower_ufind_msb64 -+ ), -+ .lower_doubles_options = (nir_lower_doubles_options) ( -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) | -+ nir_lower_dmod | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) | -+ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0) -+ ) -+ }; ++ nir_shader_compiler_options op = {}; ++ op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_ffma = false; ++ op.fuse_ffma = false; /* nir doesn't track mad vs fma */ ++ op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_flrp32 = true; ++ op.lower_flrp64 = true; ++ op.lower_fpow = false; // TODO: nir's lowering is broken, or we could use it ++ op.lower_fsat = false; ++ op.lower_fsqrt = false; // TODO: only before gm200 ++ op.lower_sincos = false; ++ op.lower_fmod = true; ++ op.lower_bitfield_extract = false; ++ op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_bitfield_insert = false; ++ op.lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_bitfield_insert_to_bitfield_select = false; ++ op.lower_bitfield_reverse = false; ++ op.lower_bit_count = false; ++ op.lower_ifind_msb = false; ++ op.lower_find_lsb = false; ++ op.lower_uadd_carry = true; // TODO ++ op.lower_usub_borrow = true; // TODO ++ op.lower_mul_high = false; ++ op.lower_negate = false; ++ op.lower_sub = true; ++ op.lower_scmp = true; // TODO: not implemented yet ++ op.lower_vector_cmp = false; ++ op.lower_idiv = true; ++ op.lower_bitops = false; ++ op.lower_isign = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_fsign = (chipset >= NVISA_GV100_CHIPSET); ++ op.lower_fdph = false; ++ op.lower_fdot = false; ++ op.fdot_replicates = false; // TODO ++ op.lower_ffloor = false; // TODO ++ op.lower_ffract = true; ++ op.lower_fceil = false; // TODO ++ op.lower_ftrunc = false; ++ op.lower_ldexp = true; ++ op.lower_pack_half_2x16 = true; ++ op.lower_pack_unorm_2x16 = true; ++ op.lower_pack_snorm_2x16 = true; ++ op.lower_pack_unorm_4x8 = true; ++ op.lower_pack_snorm_4x8 = true; ++ op.lower_unpack_half_2x16 = true; ++ op.lower_unpack_unorm_2x16 = true; ++ op.lower_unpack_snorm_2x16 = true; ++ op.lower_unpack_unorm_4x8 = true; ++ op.lower_unpack_snorm_4x8 = true; ++ op.lower_pack_split = false; ++ op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET); ++ op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET); ++ op.lower_all_io_to_temps = false; ++ op.lower_all_io_to_elements = false; ++ op.vertex_id_zero_based = false; ++ op.lower_base_vertex = false; ++ op.lower_helper_invocation = false; ++ op.optimize_sample_mask_in = false; ++ op.lower_cs_local_index_from_id = true; ++ op.lower_cs_local_id_from_index = false; ++ op.lower_device_index_to_zero = false; // TODO ++ op.lower_wpos_pntc = false; // TODO ++ op.lower_hadd = true; // TODO ++ op.lower_add_sat = true; // TODO ++ op.vectorize_io = false; ++ op.lower_to_scalar = false; ++ op.unify_interfaces = false; ++ op.use_interpolated_input_intrinsics = true; ++ op.lower_mul_2x32_64 = true; // TODO ++ op.lower_rotate = (chipset < NVISA_GV100_CHIPSET); ++ op.has_imul24 = false; ++ op.intel_vec4 = false; ++ op.max_unroll_iterations = 32; ++ op.lower_int64_options = (nir_lower_int64_options) ( ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) | ++ nir_lower_divmod64 | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) | ++ ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) | ++ nir_lower_ufind_msb64 ++ ); ++ op.lower_doubles_options = (nir_lower_doubles_options) ( ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) | ++ nir_lower_dmod | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) | ++ ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0) ++ ); ++ return op; +} + +static const nir_shader_compiler_options gf100_nir_shader_compiler_options = @@ -3241,10 +3682,10 @@ index 71e5ea6417a..dfa1d035dac 100644 }; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp new file mode 100644 -index 00000000000..4b6df0db588 +index 00000000000..644d4928327 --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp -@@ -0,0 +1,477 @@ +@@ -0,0 +1,481 @@ +/* + * Copyright 2020 Red Hat Inc. + * @@ -3282,7 +3723,7 @@ index 00000000000..4b6df0db588 + Value *pred = bld.getSSA(1, FILE_PREDICATE); + + bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred, -+ i->sType, bld.mkImm(0), i->getSrc(2)); ++ i->sType, bld.mkImm(0), i->getSrc(2))->ftz = i->ftz; + bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred); + return true; +} @@ -3436,6 +3877,7 @@ index 00000000000..4b6df0db588 + xsetp->src(0).mod = i->src(0).mod; + xsetp->src(1).mod = i->src(1).mod; + xsetp->setSrc(2, src2); ++ xsetp->ftz = i->ftz; + + i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred); + i->src(2).mod = Modifier(NV50_IR_MOD_NOT); @@ -3453,24 +3895,25 @@ index 00000000000..4b6df0db588 +} + +bool -+GV100LegalizeSSA::handleSHL(Instruction *i) ++GV100LegalizeSSA::handleShift(Instruction *i) +{ -+ if (i->src(0).getFile() != FILE_GPR) { -+ bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1), -+ i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_L | -+ NV50_IR_SUBOP_SHF_HI; ++ Value *zero = bld.mkImm(0); ++ Value *src1 = i->getSrc(1); ++ Value *src0, *src2; ++ uint8_t subOp = i->op == OP_SHL ? NV50_IR_SUBOP_SHF_L : NV50_IR_SUBOP_SHF_R; ++ ++ if (i->op == OP_SHL && i->src(0).getFile() == FILE_GPR) { ++ src0 = i->getSrc(0); ++ src2 = zero; + } else { -+ bld.mkOp3(OP_SHF, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), -+ bld.mkImm(0))->subOp = NV50_IR_SUBOP_SHF_L; ++ src0 = zero; ++ src2 = i->getSrc(0); ++ subOp |= NV50_IR_SUBOP_SHF_HI; + } -+ return true; -+} ++ if (i->subOp & NV50_IR_SUBOP_SHIFT_WRAP) ++ subOp |= NV50_IR_SUBOP_SHF_W; + -+bool -+GV100LegalizeSSA::handleSHR(Instruction *i) -+{ -+ bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1), -+ i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_R | NV50_IR_SUBOP_SHF_HI; ++ bld.mkOp3(OP_SHF, i->dType, i->getDef(0), src0, src1, src2)->subOp = subOp; + return true; +} + @@ -3481,6 +3924,7 @@ index 00000000000..4b6df0db588 + bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1)); + xadd->src(0).mod = i->src(0).mod; + xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); ++ xadd->ftz = i->ftz; + return true; +} + @@ -3490,6 +3934,9 @@ index 00000000000..4b6df0db588 + bool lowered = false; + + bld.setPosition(i, false); ++ if (i->sType == TYPE_F32 && i->dType != TYPE_F16 && ++ prog->getType() != Program::TYPE_COMPUTE) ++ handleFTZ(i); + + switch (i->op) { + case OP_AND: @@ -3502,10 +3949,8 @@ index 00000000000..4b6df0db588 + lowered = handleNOT(i); + break; + case OP_SHL: -+ lowered = handleSHL(i); -+ break; + case OP_SHR: -+ lowered = handleSHR(i); ++ lowered = handleShift(i); + break; + case OP_SET: + case OP_SET_AND: @@ -3724,10 +4169,10 @@ index 00000000000..4b6df0db588 +} // namespace nv50_ir diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h new file mode 100644 -index 00000000000..92fdb938244 +index 00000000000..d918c6e83eb --- /dev/null +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h -@@ -0,0 +1,79 @@ +@@ -0,0 +1,78 @@ +/* + * Copyright 2020 Red Hat Inc. + * @@ -3801,14 +4246,13 @@ index 00000000000..92fdb938244 + bool handleQUADPOP(Instruction *); + bool handleSET(Instruction *); + bool handleSHFL(Instruction *); -+ bool handleSHL(Instruction *); -+ bool handleSHR(Instruction *); ++ bool handleShift(Instruction *); + bool handleSUB(Instruction *); +}; +} +#endif diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp -index a60881000fe..f100445e9d0 100644 +index a60881000fe..067f9abaca8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp) @@ -3860,18 +4304,36 @@ index a60881000fe..f100445e9d0 100644 bld.mkMovToReg(0, gpEmitAddress); } } +@@ -1714,7 +1727,8 @@ NVC0LoweringPass::handleCasExch(Instruction *cas, bool needCctl) + cctl->setPredicate(cas->cc, cas->getPredicate()); + } + +- if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS) { ++ if (cas->subOp == NV50_IR_SUBOP_ATOM_CAS && ++ targ->getChipset() < NVISA_GV100_CHIPSET) { + // CAS is crazy. It's 2nd source is a double reg, and the 3rd source + // should be set to the high part of the double reg or bad things will + // happen elsewhere in the universe. diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h -index b4c405a9ea5..a4925013ee4 100644 +index b4c405a9ea5..8c99427d3c0 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h -@@ -68,6 +68,7 @@ private: +@@ -64,12 +64,14 @@ private: + void handleDIV(Instruction *); // integer division, modulus + void handleRCPRSQLib(Instruction *, Value *[]); + void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt +- void handleFTZ(Instruction *); void handleSET(CmpInstruction *); void handleTEXLOD(TexInstruction *); void handleShift(Instruction *); + void handleBREV(Instruction *); protected: ++ void handleFTZ(Instruction *); ++ BuildUtil bld; + }; + diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 2f46b0e886a..3a4ec3ca561 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -8661,7 +9123,7 @@ index 8aa7088dfec..d49a5dfd2cf 100644 PUSH_DATA (push, va + info->count * index_size - 1); diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c -index 146eeb35f85..d4687b652ba 100644 +index 146eeb35f85..ebbc410184b 100644 --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c @@ -27,11 +27,18 @@ @@ -8792,7 +9254,7 @@ index 146eeb35f85..d4687b652ba 100644 } static void -@@ -577,92 +622,182 @@ nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc) +@@ -577,92 +622,186 @@ nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc) } static void @@ -8972,9 +9434,13 @@ index 146eeb35f85..d4687b652ba 100644 + + NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1); + NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK); -+ NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, VIA_HEADER_INDEX); ++ NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, INDEPENDENTLY); + NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE, + align(cp->cp.smem_size, 0x100)); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, ++ (cp->hdr[1] & 0xfffff0) + ++ align(cp->cp.lmem_size, 0x10)); ++ NVC3C0_QMDV02_02_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0); + NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, + gv100_sm_config_smem_size(8 * 1024)); + NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, @@ -9017,7 +9483,7 @@ index 146eeb35f85..d4687b652ba 100644 } static inline void * -@@ -677,6 +812,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv, +@@ -677,6 +816,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv, ptr += adj; *pgpuaddr += adj; } @@ -9025,7 +9491,7 @@ index 146eeb35f85..d4687b652ba 100644 return ptr; } -@@ -734,6 +870,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) +@@ -734,6 +874,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) if (ret) goto out; @@ -9035,7 +9501,7 @@ index 146eeb35f85..d4687b652ba 100644 if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS) gp100_compute_setup_launch_desc(nvc0, desc, info); else -@@ -743,10 +882,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) +@@ -743,10 +886,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info) #ifndef NDEBUG if (debug_get_num_option("NV50_PROG_DEBUG", 0)) { @@ -9052,7 +9518,7 @@ index 146eeb35f85..d4687b652ba 100644 } #endif -@@ -877,115 +1020,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) +@@ -877,115 +1024,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0) nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES; } diff --git a/SPECS/mesa.spec b/SPECS/mesa.spec index 4e6c79d..177ac45 100644 --- a/SPECS/mesa.spec +++ b/SPECS/mesa.spec @@ -41,8 +41,8 @@ Name: mesa Summary: Mesa graphics libraries -Version: 20.1.2 -Release: 3%{?rctag:.%{rctag}}%{?dist} +Version: 20.1.4 +Release: 1%{?rctag:.%{rctag}}%{?dist} License: MIT URL: http://www.mesa3d.org @@ -56,11 +56,6 @@ Source3: Makefile # Fedora opts to ignore the optional part of clause 2 and treat that code as 2 clause BSD. Source4: Mesa-MLAA-License-Clarification-Email.txt -# fix llvmpipe big-endian (#1847064) -Patch1: 0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch -Patch2: 0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch -Patch3: 0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch - # Add support for TU11x nvidia Patch10: 0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch Patch11: nouveau-tu1xx-support.patch @@ -551,6 +546,10 @@ done %endif %changelog +* Wed Aug 05 2020 Dave Airlie - 20.1.4-1 +- Update to 20.1.4 +- Update nouveau tu1xx support patch (Karol) + * Mon Jun 29 2020 Dave Airlie - 20.1.2-3 - a fix on top of the big-endian fix (#1847064)