diff --git a/.gitignore b/.gitignore index eed2164..0fcb908 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/mesa-20170307.tar.xz +SOURCES/mesa-20171019.tar.xz diff --git a/.mesa.metadata b/.mesa.metadata index c0393a2..22afd08 100644 --- a/.mesa.metadata +++ b/.mesa.metadata @@ -1 +1 @@ -42c01da9375d21e1b1814503ef5730c7260e8d62 SOURCES/mesa-20170307.tar.xz +7ec79a2e38091d819744f3fbf40d5fcb42d792fb SOURCES/mesa-20171019.tar.xz diff --git a/SOURCES/0001-Revert-draw-use-SoA-fetch-not-AoS-one.patch b/SOURCES/0001-Revert-draw-use-SoA-fetch-not-AoS-one.patch deleted file mode 100644 index 3245007..0000000 --- a/SOURCES/0001-Revert-draw-use-SoA-fetch-not-AoS-one.patch +++ /dev/null @@ -1,123 +0,0 @@ -From e4e52b06a9dec7d076ceeb4469bb2ca8b37c6cd5 Mon Sep 17 00:00:00 2001 -From: Lyude -Date: Tue, 2 May 2017 17:05:50 -0400 -Subject: [PATCH] Revert "draw: use SoA fetch, not AoS one" - -This reverts commit e827d9175675aaa6cfc0b981e2a80685fb7b3a74. - -Signed-off-by: Lyude ---- - src/gallium/auxiliary/draw/draw_llvm.c | 71 +++++++++++++++++++++++----------- - 1 file changed, 48 insertions(+), 23 deletions(-) - -diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c -index 8952dc8..19b75a5 100644 ---- a/src/gallium/auxiliary/draw/draw_llvm.c -+++ b/src/gallium/auxiliary/draw/draw_llvm.c -@@ -713,6 +713,39 @@ fetch_instanced(struct gallivm_state *gallivm, - - - static void -+convert_to_soa(struct gallivm_state *gallivm, -+ LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32], -+ LLVMValueRef dst_soa[TGSI_NUM_CHANNELS], -+ const struct lp_type soa_type) -+{ -+ unsigned j, k; -+ struct lp_type aos_channel_type = soa_type; -+ -+ LLVMValueRef aos_channels[TGSI_NUM_CHANNELS]; -+ unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS; -+ -+ debug_assert(TGSI_NUM_CHANNELS == 4); -+ debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0); -+ -+ aos_channel_type.length >>= 1; -+ -+ for (j = 0; j < TGSI_NUM_CHANNELS; ++j) { -+ LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 }; -+ -+ assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH); -+ -+ for (k = 0; k < pixels_per_channel; ++k) { -+ channel[k] = src_aos[j + TGSI_NUM_CHANNELS * k]; -+ } -+ -+ aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel); -+ } -+ -+ lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa); -+} -+ -+ -+static void - fetch_vector(struct gallivm_state *gallivm, - const struct util_format_description *format_desc, - struct lp_type vs_type, -@@ -722,10 +755,11 @@ fetch_vector(struct gallivm_state *gallivm, - LLVMValueRef *inputs, - LLVMValueRef indices) - { -+ LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); - LLVMBuilderRef builder = gallivm->builder; - struct lp_build_context blduivec; -- struct lp_type fetch_type = vs_type; - LLVMValueRef offset, valid_mask; -+ LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32]; - unsigned i; - - lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); -@@ -749,37 +783,28 @@ fetch_vector(struct gallivm_state *gallivm, - } - - /* -- * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches. -- * This should always produce better code. -+ * Note: we probably really want to use SoA fetch, not AoS one (albeit -+ * for most formats it will amount to the same as this isn't very -+ * optimized). But looks dangerous since it assumes alignment. - */ -+ for (i = 0; i < vs_type.length; i++) { -+ LLVMValueRef offset1, elem; -+ elem = lp_build_const_int32(gallivm, i); -+ offset1 = LLVMBuildExtractElement(builder, offset, elem, ""); - -- /* The type handling is annoying here... */ -- if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && -- format_desc->channel[0].pure_integer) { -- if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { -- fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length); -- } -- else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { -- fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length); -- } -- } -- -- lp_build_fetch_rgba_soa(gallivm, format_desc, -- fetch_type, FALSE, map_ptr, offset, -- blduivec.zero, blduivec.zero, -- NULL, inputs); -- -- for (i = 0; i < TGSI_NUM_CHANNELS; i++) { -- inputs[i] = LLVMBuildBitCast(builder, inputs[i], -- lp_build_vec_type(gallivm, vs_type), ""); -+ aos_fetch[i] = lp_build_fetch_rgba_aos(gallivm, format_desc, -+ lp_float32_vec4_type(), -+ FALSE, map_ptr, offset1, -+ zero, zero, NULL); - } -+ convert_to_soa(gallivm, aos_fetch, inputs, vs_type); - -- /* out-of-bound fetches return all zeros */ - for (i = 0; i < TGSI_NUM_CHANNELS; i++) { - inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, ""); - inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, ""); - inputs[i] = LLVMBuildBitCast(builder, inputs[i], - lp_build_vec_type(gallivm, vs_type), ""); -+ - } - } - --- -2.9.3 - diff --git a/SOURCES/0001-gallivm-Make-sure-module-has-the-correct-data-layout.patch b/SOURCES/0001-gallivm-Make-sure-module-has-the-correct-data-layout.patch deleted file mode 100644 index dd27ba0..0000000 --- a/SOURCES/0001-gallivm-Make-sure-module-has-the-correct-data-layout.patch +++ /dev/null @@ -1,88 +0,0 @@ -From 9aca538a8ae017222aded41bc530fef150db351c Mon Sep 17 00:00:00 2001 -From: Tom Stellard -Date: Fri, 5 May 2017 01:07:00 +0000 -Subject: [PATCH] gallivm: Make sure module has the correct data layout when - pass manager runs - -The datalayout for modules was purposely not being set in order to work around -the fact that the ExecutionEngine requires that the module's datalayout -matches the datalayout of the TargetMachine that the ExecutionEngine is -using. - -When the pass manager runs on a module with no datalayout, it uses -the default datalayout which is little-endian. This causes problems -on big-endian targets, because some optimizations that are legal on -little-endian or illegal on big-endian. - -To resolve this, we set the datalayout prior to running the pass -manager, and then clear it before creating the ExectionEngine. - -This patch fixes a lot of piglit tests on big-endian ppc64. - -Cc: mesa-stable@lists.freedesktop.org ---- - src/gallium/auxiliary/gallivm/lp_bld_init.c | 34 +++++++++++++++-------------- - 1 file changed, 18 insertions(+), 16 deletions(-) - -diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c -index ef2580e..9f1ade6 100644 ---- a/src/gallium/auxiliary/gallivm/lp_bld_init.c -+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c -@@ -125,19 +125,6 @@ create_pass_manager(struct gallivm_state *gallivm) - LLVMAddTargetData(gallivm->target, gallivm->passmgr); - #endif - -- /* Setting the module's DataLayout to an empty string will cause the -- * ExecutionEngine to copy to the DataLayout string from its target -- * machine to the module. As of LLVM 3.8 the module and the execution -- * engine are required to have the same DataLayout. -- * -- * TODO: This is just a temporary work-around. The correct solution is -- * for gallivm_init_state() to create a TargetMachine and pull the -- * DataLayout from there. Currently, the TargetMachine used by llvmpipe -- * is being implicitly created by the EngineBuilder in -- * lp_build_create_jit_compiler_for_module() -- */ -- --#if HAVE_LLVM < 0x0308 - { - char *td_str; - // New ones from the Module. -@@ -145,9 +132,6 @@ create_pass_manager(struct gallivm_state *gallivm) - LLVMSetDataLayout(gallivm->module, td_str); - free(td_str); - } --#else -- LLVMSetDataLayout(gallivm->module, ""); --#endif - - if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, -@@ -628,6 +612,24 @@ gallivm_compile_module(struct gallivm_state *gallivm) - } - - if (use_mcjit) { -+ /* Setting the module's DataLayout to an empty string will cause the -+ * ExecutionEngine to copy to the DataLayout string from its target -+ * machine to the module. As of LLVM 3.8 the module and the execution -+ * engine are required to have the same DataLayout. -+ * -+ * We must make sure we do this after running the optimization passes, -+ * because those passes need a correct datalayout string. For example, -+ * if those optimization passes see an empty datalayout, they will assume -+ * this is a little endian target and will do optimizations that break big -+ * endian machines. -+ * -+ * TODO: This is just a temporary work-around. The correct solution is -+ * for gallivm_init_state() to create a TargetMachine and pull the -+ * DataLayout from there. Currently, the TargetMachine used by llvmpipe -+ * is being implicitly created by the EngineBuilder in -+ * lp_build_create_jit_compiler_for_module() -+ */ -+ LLVMSetDataLayout(gallivm->module, ""); - assert(!gallivm->engine); - if (!init_gallivm_engine(gallivm)) { - assert(0); --- -2.9.3 - diff --git a/SOURCES/0001-glsl-Allow-compatibility-shaders-with-MESA_GL_VERSIO.patch b/SOURCES/0001-glsl-Allow-compatibility-shaders-with-MESA_GL_VERSIO.patch deleted file mode 100644 index 13a45ee..0000000 --- a/SOURCES/0001-glsl-Allow-compatibility-shaders-with-MESA_GL_VERSIO.patch +++ /dev/null @@ -1,114 +0,0 @@ -From d7a0486a9e4e71d98c694872815909b8f8c0d3ac Mon Sep 17 00:00:00 2001 -From: Matt Turner -Date: Tue, 31 Jan 2017 15:41:52 -0800 -Subject: [PATCH] glsl: Allow compatibility shaders with - MESA_GL_VERSION_OVERRIDE=... - -Previously if you used MESA_GL_VERSION_OVERRIDE=3.3COMPAT, Mesa exposed -an OpenGL 3.3 compatibility profile context (with various unimplemented -features and bugs), but still refused to compile shaders with - - #version 330 compatibility - -This patch simply adds a small bit of plumbing to let that through. - -Of course the same caveats apply: compatibility profile is still not -supported (and will not be supported), so there are no guarantees that -anything will work. - -Tested-by: Dylan Baker -Reviewed-by: Anuj Phogat -Reviewed-by: Ian Romanick ---- - src/compiler/glsl/builtin_types.cpp | 2 +- - src/compiler/glsl/builtin_variables.cpp | 2 +- - src/compiler/glsl/glsl_parser_extras.cpp | 13 +++++++++++-- - src/compiler/glsl/glsl_parser_extras.h | 1 + - 4 files changed, 14 insertions(+), 4 deletions(-) - -diff --git a/src/compiler/glsl/builtin_types.cpp b/src/compiler/glsl/builtin_types.cpp -index a63d736..cae972b 100644 ---- a/src/compiler/glsl/builtin_types.cpp -+++ b/src/compiler/glsl/builtin_types.cpp -@@ -288,7 +288,7 @@ _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) - /* Add deprecated structure types. While these were deprecated in 1.30, - * they're still present. We've removed them in 1.40+ (OpenGL 3.1+). - */ -- if (!state->es_shader && state->language_version < 140) { -+ if (state->compat_shader) { - for (unsigned i = 0; i < ARRAY_SIZE(deprecated_types); i++) { - add_type(symbols, deprecated_types[i]); - } -diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp -index 4eb275e..be593e9 100644 ---- a/src/compiler/glsl/builtin_variables.cpp -+++ b/src/compiler/glsl/builtin_variables.cpp -@@ -444,7 +444,7 @@ private: - builtin_variable_generator::builtin_variable_generator( - exec_list *instructions, struct _mesa_glsl_parse_state *state) - : instructions(instructions), state(state), symtab(state->symbols), -- compatibility(!state->is_version(140, 100)), -+ compatibility(state->compat_shader || !state->is_version(140, 100)), - bool_t(glsl_type::bool_type), int_t(glsl_type::int_type), - uint_t(glsl_type::uint_type), - float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type), -diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp -index 6fe1dd9..c4da79a 100644 ---- a/src/compiler/glsl/glsl_parser_extras.cpp -+++ b/src/compiler/glsl/glsl_parser_extras.cpp -@@ -83,6 +83,7 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, - this->forced_language_version = ctx->Const.ForceGLSLVersion; - this->zero_init = ctx->Const.GLSLZeroInit; - this->gl_version = 20; -+ this->compat_shader = true; - this->es_shader = false; - this->ARB_texture_rectangle_enable = true; - -@@ -370,6 +371,7 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, - const char *ident) - { - bool es_token_present = false; -+ bool compat_token_present = false; - if (ident) { - if (strcmp(ident, "es") == 0) { - es_token_present = true; -@@ -379,8 +381,12 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, - * a core profile shader since that's the only profile we support. - */ - } else if (strcmp(ident, "compatibility") == 0) { -- _mesa_glsl_error(locp, this, -- "the compatibility profile is not supported"); -+ compat_token_present = true; -+ -+ if (this->ctx->API != API_OPENGL_COMPAT) { -+ _mesa_glsl_error(locp, this, -+ "the compatibility profile is not supported"); -+ } - } else { - _mesa_glsl_error(locp, this, - "\"%s\" is not a valid shading language profile; " -@@ -412,6 +418,9 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, - else - this->language_version = version; - -+ this->compat_shader = compat_token_present || -+ (!this->es_shader && this->language_version < 140); -+ - bool supported = false; - for (unsigned i = 0; i < this->num_supported_versions; i++) { - if (this->supported_versions[i].ver == this->language_version -diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h -index 424cab5..66a954f 100644 ---- a/src/compiler/glsl/glsl_parser_extras.h -+++ b/src/compiler/glsl/glsl_parser_extras.h -@@ -348,6 +348,7 @@ struct _mesa_glsl_parse_state { - } supported_versions[16]; - - bool es_shader; -+ bool compat_shader; - unsigned language_version; - unsigned forced_language_version; - bool zero_init; --- -2.9.3 - diff --git a/SOURCES/0001-intel-Add-more-Coffee-Lake-PCI-IDs.patch b/SOURCES/0001-intel-Add-more-Coffee-Lake-PCI-IDs.patch new file mode 100644 index 0000000..e2d25df --- /dev/null +++ b/SOURCES/0001-intel-Add-more-Coffee-Lake-PCI-IDs.patch @@ -0,0 +1,45 @@ +From fe668b5c155aee4443dde0748065241e09293302 Mon Sep 17 00:00:00 2001 +From: Anuj Phogat +Date: Fri, 5 Jan 2018 09:17:36 -0800 +Subject: [PATCH] intel: Add more Coffee Lake PCI IDs + +More Coffee Lake PCI IDs have been added to the spec. + +Signed-off-by: Anuj Phogat +Reviewed-by: Rodrigo Vivi +--- + include/pci_ids/i965_pci_ids.h | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/include/pci_ids/i965_pci_ids.h b/include/pci_ids/i965_pci_ids.h +index 0dd01a4..9616f7d 100644 +--- a/include/pci_ids/i965_pci_ids.h ++++ b/include/pci_ids/i965_pci_ids.h +@@ -167,15 +167,23 @@ CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)") + CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)") + CHIPSET(0x3E90, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)") + CHIPSET(0x3E93, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)") ++CHIPSET(0x3E99, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)") ++CHIPSET(0x3EA1, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)") ++CHIPSET(0x3EA4, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)") + CHIPSET(0x3E91, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") + CHIPSET(0x3E92, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") + CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") ++CHIPSET(0x3E9A, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") + CHIPSET(0x3E9B, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") + CHIPSET(0x3E94, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") ++CHIPSET(0x3EA0, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") ++CHIPSET(0x3EA3, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") ++CHIPSET(0x3EA9, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)") ++CHIPSET(0x3EA2, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)") ++CHIPSET(0x3EA5, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)") + CHIPSET(0x3EA6, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)") + CHIPSET(0x3EA7, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)") + CHIPSET(0x3EA8, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)") +-CHIPSET(0x3EA5, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)") + CHIPSET(0x5A49, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)") + CHIPSET(0x5A4A, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)") + CHIPSET(0x5A41, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)") +-- +2.9.5 + diff --git a/SOURCES/0001-intel-blorp-Use-mocs.tex-for-depth-stencil.patch b/SOURCES/0001-intel-blorp-Use-mocs.tex-for-depth-stencil.patch new file mode 100644 index 0000000..69eee59 --- /dev/null +++ b/SOURCES/0001-intel-blorp-Use-mocs.tex-for-depth-stencil.patch @@ -0,0 +1,33 @@ +From d284bd93e387019b34796b6d8e7a985d60590157 Mon Sep 17 00:00:00 2001 +From: Jason Ekstrand +Date: Fri, 3 Nov 2017 14:31:51 -0700 +Subject: [PATCH 1/5] intel/blorp: Use mocs.tex for depth stencil + +Cc: "17.3" +Tested-by: Lyude Paul +Reviewed-by: Kenneth Graunke +Signed-off-by: Lyude +--- + src/intel/blorp/blorp_genX_exec.h | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) + +diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h +index 93534169ef..565acca929 100644 +--- a/src/intel/blorp/blorp_genX_exec.h ++++ b/src/intel/blorp/blorp_genX_exec.h +@@ -1364,11 +1364,7 @@ blorp_emit_depth_stencil_config(struct blorp_batch *batch, + return; + + struct isl_depth_stencil_hiz_emit_info info = { +-#if GEN_GEN >= 7 +- .mocs = 1, /* GEN7_MOCS_L3 */ +-#else +- .mocs = 0, +-#endif ++ .mocs = batch->blorp->mocs.tex, + }; + + if (params->depth.enabled) { +-- +2.14.3 + diff --git a/SOURCES/0001-mesa-Squash-merge-of-S3TC-support.patch b/SOURCES/0001-mesa-Squash-merge-of-S3TC-support.patch new file mode 100644 index 0000000..1b4433b --- /dev/null +++ b/SOURCES/0001-mesa-Squash-merge-of-S3TC-support.patch @@ -0,0 +1,2492 @@ +From aff9c0e3d29983f90438dd4006e6f2654c878fd4 Mon Sep 17 00:00:00 2001 +From: Matt Turner +Date: Wed, 27 Sep 2017 19:58:28 -0700 +Subject: [PATCH] mesa: Squash merge of S3TC support + +(cherry picked from commit 04396a134f003aece573df593acfa1ab4418ffe8) +(cherry picked from commit f7daa737d17df6d0a847e5c61e48d25e598671f4) +(cherry picked from commit 7ce9999166f24996d24d56d3effcae181d401111) +(cherry picked from commit 82c54c4fdc8495d7522c782141c080314459690a) +(cherry picked from commit fb5338c4b77db70ea26b0745cff1cb3e330a5139) +(cherry picked from commit 34cf3c43beed2fafb4512e921f39c818478f86d7) +(cherry picked from commit 78c6221f18ab451f2e57bc61852595a60f82e3cb) +(cherry picked from commit c5d508028454f42923aee1ea1ab876f01f680ae7) +(cherry picked from commit f6c56e07fc5a8e81fd90688c9fee239f18c3480e) +(cherry picked from commit 3a8a5e77e8f992aaa3539e060885138c2fcddad1) +(cherry picked from commit dc546a7bb3fae1d597e5a22d9527540ec4f072c8) +(cherry picked from commit c17c47207b96172ca9c85a16f7fb7f1d3ea959d8) +(cherry picked from commit 8d02abd0feaaef28a35d89903bd534a7f27c38d7) +(cherry picked from commit 74b5568978968f580b9809135198db1015bc55b7) +--- + configure.ac | 4 +- + src/gallium/auxiliary/util/u_format.c | 2 +- + src/gallium/auxiliary/util/u_format_s3tc.c | 133 +-- + src/gallium/auxiliary/util/u_format_s3tc.h | 6 - + src/gallium/drivers/freedreno/freedreno_screen.c | 2 - + src/gallium/drivers/i915/i915_screen.c | 2 - + src/gallium/drivers/llvmpipe/lp_screen.c | 6 - + src/gallium/drivers/llvmpipe/lp_test_format.c | 7 - + src/gallium/drivers/nouveau/nouveau_screen.c | 2 - + src/gallium/drivers/r300/r300_screen.c | 3 +- + src/gallium/drivers/r300/r300_texture.c | 4 - + src/gallium/drivers/r600/r600_state_common.c | 4 - + src/gallium/drivers/radeon/r600_pipe_common.c | 1 - + src/gallium/drivers/radeonsi/si_state.c | 4 - + src/gallium/drivers/softpipe/sp_screen.c | 6 - + src/gallium/drivers/swr/swr_screen.cpp | 6 - + src/gallium/drivers/virgl/virgl_screen.c | 5 +- + src/gallium/include/state_tracker/st_api.h | 1 - + src/gallium/state_trackers/dri/dri_screen.c | 16 - + src/gallium/state_trackers/osmesa/osmesa.c | 1 - + src/gallium/tests/unit/u_format_test.c | 7 - + src/mesa/Makefile.sources | 1 - + src/mesa/SConscript | 4 - + src/mesa/drivers/dri/common/xmlpool/t_options.h | 5 - + src/mesa/drivers/dri/i915/intel_extensions.c | 5 +- + src/mesa/drivers/dri/i915/intel_screen.c | 1 - + src/mesa/drivers/dri/i965/intel_extensions.c | 4 +- + src/mesa/drivers/dri/i965/intel_screen.c | 2 - + src/mesa/drivers/dri/nouveau/nv10_context.c | 6 +- + src/mesa/drivers/dri/nouveau/nv20_context.c | 6 +- + src/mesa/drivers/dri/r200/r200_context.c | 10 +- + src/mesa/drivers/dri/radeon/radeon_context.c | 10 +- + src/mesa/drivers/dri/radeon/radeon_screen.c | 2 - + src/mesa/main/context.c | 2 - + src/mesa/main/dlopen.h | 97 --- + src/mesa/main/extensions.c | 6 +- + src/mesa/main/mtypes.h | 3 - + src/mesa/main/texcompress_s3tc.c | 269 ++---- + src/mesa/main/texcompress_s3tc.h | 3 - + src/mesa/main/texcompress_s3tc_tmp.h | 989 +++++++++++++++++++++++ + src/mesa/main/texformat.c | 14 +- + src/mesa/main/teximage.c | 32 - + src/mesa/state_tracker/st_context.c | 2 +- + src/mesa/state_tracker/st_extensions.c | 8 +- + src/mesa/state_tracker/st_extensions.h | 3 +- + src/mesa/state_tracker/st_format.c | 4 +- + src/mesa/state_tracker/st_manager.c | 2 +- + 47 files changed, 1083 insertions(+), 629 deletions(-) + delete mode 100644 src/mesa/main/dlopen.h + create mode 100644 src/mesa/main/texcompress_s3tc_tmp.h + +diff --git a/configure.ac b/configure.ac +index 49dd002502..cdce1ea8b1 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -862,9 +862,9 @@ AC_LINK_IFELSE([AC_LANG_SOURCE([[ + + dnl Check to see if dlopen is in default libraries (like Solaris, which + dnl has it in libc), or if libdl is needed to get it. +-AC_CHECK_FUNC([dlopen], [DEFINES="$DEFINES -DHAVE_DLOPEN"], ++AC_CHECK_FUNC([dlopen], [], + [AC_CHECK_LIB([dl], [dlopen], +- [DEFINES="$DEFINES -DHAVE_DLOPEN"; DLOPEN_LIBS="-ldl"])]) ++ [DLOPEN_LIBS="-ldl"])]) + AC_SUBST([DLOPEN_LIBS]) + + dnl Check if that library also has dladdr +diff --git a/src/gallium/auxiliary/util/u_format.c b/src/gallium/auxiliary/util/u_format.c +index 3d281905ce..0fc3231654 100644 +--- a/src/gallium/auxiliary/util/u_format.c ++++ b/src/gallium/auxiliary/util/u_format.c +@@ -238,7 +238,7 @@ util_format_is_subsampled_422(enum pipe_format format) + boolean + util_format_is_supported(enum pipe_format format, unsigned bind) + { +- if (util_format_is_s3tc(format) && !util_format_s3tc_enabled) { ++ if (format >= PIPE_FORMAT_COUNT) { + return FALSE; + } + +diff --git a/src/gallium/auxiliary/util/u_format_s3tc.c b/src/gallium/auxiliary/util/u_format_s3tc.c +index 8c4f2150be..3f755e5363 100644 +--- a/src/gallium/auxiliary/util/u_format_s3tc.c ++++ b/src/gallium/auxiliary/util/u_format_s3tc.c +@@ -28,136 +28,15 @@ + #include "u_format.h" + #include "u_format_s3tc.h" + #include "util/format_srgb.h" ++#include "../../../mesa/main/texcompress_s3tc_tmp.h" + + +-#if defined(_WIN32) || defined(WIN32) +-#define DXTN_LIBNAME "dxtn.dll" +-#elif defined(__CYGWIN__) +-#define DXTN_LIBNAME "cygtxc_dxtn.dll" +-#elif defined(__APPLE__) +-#define DXTN_LIBNAME "libtxc_dxtn.dylib" +-#else +-#define DXTN_LIBNAME "libtxc_dxtn.so" +-#endif ++util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1; ++util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1; ++util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3; ++util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5; + +- +-static void +-util_format_dxt1_rgb_fetch_stub(int src_stride, +- const uint8_t *src, +- int col, int row, +- uint8_t *dst) +-{ +- assert(0); +-} +- +- +-static void +-util_format_dxt1_rgba_fetch_stub(int src_stride, +- const uint8_t *src, +- int col, int row, +- uint8_t *dst ) +-{ +- assert(0); +-} +- +- +-static void +-util_format_dxt3_rgba_fetch_stub(int src_stride, +- const uint8_t *src, +- int col, int row, +- uint8_t *dst ) +-{ +- assert(0); +-} +- +- +-static void +-util_format_dxt5_rgba_fetch_stub(int src_stride, +- const uint8_t *src, +- int col, int row, +- uint8_t *dst ) +-{ +- assert(0); +-} +- +- +-static void +-util_format_dxtn_pack_stub(int src_comps, +- int width, int height, +- const uint8_t *src, +- enum util_format_dxtn dst_format, +- uint8_t *dst, +- int dst_stride) +-{ +- assert(0); +-} +- +- +-boolean util_format_s3tc_enabled = FALSE; +- +-util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch = util_format_dxt1_rgb_fetch_stub; +-util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch = util_format_dxt1_rgba_fetch_stub; +-util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch = util_format_dxt3_rgba_fetch_stub; +-util_format_dxtn_fetch_t util_format_dxt5_rgba_fetch = util_format_dxt5_rgba_fetch_stub; +- +-util_format_dxtn_pack_t util_format_dxtn_pack = util_format_dxtn_pack_stub; +- +- +-void +-util_format_s3tc_init(void) +-{ +- static boolean first_time = TRUE; +- struct util_dl_library *library = NULL; +- util_dl_proc fetch_2d_texel_rgb_dxt1; +- util_dl_proc fetch_2d_texel_rgba_dxt1; +- util_dl_proc fetch_2d_texel_rgba_dxt3; +- util_dl_proc fetch_2d_texel_rgba_dxt5; +- util_dl_proc tx_compress_dxtn; +- +- if (!first_time) +- return; +- first_time = FALSE; +- +- if (util_format_s3tc_enabled) +- return; +- +- library = util_dl_open(DXTN_LIBNAME); +- if (!library) { +- debug_printf("couldn't open " DXTN_LIBNAME ", software DXTn " +- "compression/decompression unavailable\n"); +- return; +- } +- +- fetch_2d_texel_rgb_dxt1 = +- util_dl_get_proc_address(library, "fetch_2d_texel_rgb_dxt1"); +- fetch_2d_texel_rgba_dxt1 = +- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt1"); +- fetch_2d_texel_rgba_dxt3 = +- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt3"); +- fetch_2d_texel_rgba_dxt5 = +- util_dl_get_proc_address(library, "fetch_2d_texel_rgba_dxt5"); +- tx_compress_dxtn = +- util_dl_get_proc_address(library, "tx_compress_dxtn"); +- +- if (!util_format_dxt1_rgb_fetch || +- !util_format_dxt1_rgba_fetch || +- !util_format_dxt3_rgba_fetch || +- !util_format_dxt5_rgba_fetch || +- !util_format_dxtn_pack) { +- debug_printf("couldn't reference all symbols in " DXTN_LIBNAME +- ", software DXTn compression/decompression " +- "unavailable\n"); +- util_dl_close(library); +- return; +- } +- +- util_format_dxt1_rgb_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgb_dxt1; +- util_format_dxt1_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt1; +- util_format_dxt3_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt3; +- util_format_dxt5_rgba_fetch = (util_format_dxtn_fetch_t)fetch_2d_texel_rgba_dxt5; +- util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn; +- util_format_s3tc_enabled = TRUE; +-} ++util_format_dxtn_pack_t util_format_dxtn_pack = (util_format_dxtn_pack_t)tx_compress_dxtn; + + + /* +diff --git a/src/gallium/auxiliary/util/u_format_s3tc.h b/src/gallium/auxiliary/util/u_format_s3tc.h +index ae20010cdf..6f188c67f9 100644 +--- a/src/gallium/auxiliary/util/u_format_s3tc.h ++++ b/src/gallium/auxiliary/util/u_format_s3tc.h +@@ -58,8 +58,6 @@ typedef void + uint8_t *dst, + int dst_stride); + +-extern boolean util_format_s3tc_enabled; +- + extern util_format_dxtn_fetch_t util_format_dxt1_rgb_fetch; + extern util_format_dxtn_fetch_t util_format_dxt1_rgba_fetch; + extern util_format_dxtn_fetch_t util_format_dxt3_rgba_fetch; +@@ -69,10 +67,6 @@ extern util_format_dxtn_pack_t util_format_dxtn_pack; + + + void +-util_format_s3tc_init(void); +- +- +-void + util_format_dxt1_rgb_unpack_rgba_8unorm(uint8_t *dst_row, unsigned dst_stride, const uint8_t *src_row, unsigned src_stride, unsigned width, unsigned height); + + void +diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c +index a915d65ee0..59402ef3f6 100644 +--- a/src/gallium/drivers/freedreno/freedreno_screen.c ++++ b/src/gallium/drivers/freedreno/freedreno_screen.c +@@ -875,8 +875,6 @@ fd_screen_create(struct fd_device *dev) + + slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16); + +- util_format_s3tc_init(); +- + return pscreen; + + fail: +diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c +index 4ad98e2a08..b5675e417c 100644 +--- a/src/gallium/drivers/i915/i915_screen.c ++++ b/src/gallium/drivers/i915/i915_screen.c +@@ -621,7 +621,5 @@ i915_screen_create(struct i915_winsys *iws) + + i915_debug_init(is); + +- util_format_s3tc_init(); +- + return &is->base; + } +diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c +index e98e30d50a..3ec68a5771 100644 +--- a/src/gallium/drivers/llvmpipe/lp_screen.c ++++ b/src/gallium/drivers/llvmpipe/lp_screen.c +@@ -528,10 +528,6 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen, + format != PIPE_FORMAT_ETC1_RGB8) + return FALSE; + +- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { +- return util_format_s3tc_enabled; +- } +- + /* + * Everything can be supported by u_format + * (those without fetch_rgba_float might be not but shouldn't hit that) +@@ -682,7 +678,5 @@ llvmpipe_create_screen(struct sw_winsys *winsys) + } + (void) mtx_init(&screen->rast_mutex, mtx_plain); + +- util_format_s3tc_init(); +- + return &screen->base; + } +diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c +index 9b16162131..e9a6e01fdc 100644 +--- a/src/gallium/drivers/llvmpipe/lp_test_format.c ++++ b/src/gallium/drivers/llvmpipe/lp_test_format.c +@@ -357,8 +357,6 @@ test_all(unsigned verbose, FILE *fp) + enum pipe_format format; + boolean success = TRUE; + +- util_format_s3tc_init(); +- + #if USE_TEXTURE_CACHE + cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16); + #endif +@@ -383,11 +381,6 @@ test_all(unsigned verbose, FILE *fp) + if (util_format_is_pure_integer(format)) + continue; + +- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && +- !util_format_s3tc_enabled) { +- continue; +- } +- + /* only have util fetch func for etc1 */ + if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC && + format != PIPE_FORMAT_ETC1_RGB8) { +diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c +index 13b76d7681..ea68809c6e 100644 +--- a/src/gallium/drivers/nouveau/nouveau_screen.c ++++ b/src/gallium/drivers/nouveau/nouveau_screen.c +@@ -242,8 +242,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev) + + nouveau_disk_cache_create(screen); + +- util_format_s3tc_init(); +- + screen->lowmem_bindings = PIPE_BIND_GLOBAL; /* gallium limit */ + screen->vidmem_bindings = + PIPE_BIND_RENDER_TARGET | PIPE_BIND_DEPTH_STENCIL | +diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c +index 5cdb24871c..82d7183b9e 100644 +--- a/src/gallium/drivers/r300/r300_screen.c ++++ b/src/gallium/drivers/r300/r300_screen.c +@@ -127,7 +127,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) + + /* r300 cannot do swizzling of compressed textures. Supported otherwise. */ + case PIPE_CAP_TEXTURE_SWIZZLE: +- return util_format_s3tc_enabled ? r300screen->caps.dxtc_swizzle : 1; ++ return r300screen->caps.dxtc_swizzle; + + /* We don't support color clamping on r500, so that we can use color + * intepolators for generic varyings. */ +@@ -762,7 +762,6 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws, unsigned flags + + slab_create_parent(&r300screen->pool_transfers, sizeof(struct pipe_transfer), 64); + +- util_format_s3tc_init(); + (void) mtx_init(&r300screen->cmask_mutex, mtx_plain); + + return &r300screen->screen; +diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c +index cdf9ccb973..87632fcbb4 100644 +--- a/src/gallium/drivers/r300/r300_texture.c ++++ b/src/gallium/drivers/r300/r300_texture.c +@@ -251,10 +251,6 @@ uint32_t r300_translate_texformat(enum pipe_format format, + + /* S3TC formats. */ + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { +- if (!util_format_s3tc_enabled) { +- return ~0; /* Unsupported. */ +- } +- + switch (format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: +diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c +index 4c97efa73b..306b3c0abb 100644 +--- a/src/gallium/drivers/r600/r600_state_common.c ++++ b/src/gallium/drivers/r600/r600_state_common.c +@@ -2403,10 +2403,6 @@ uint32_t r600_translate_texformat(struct pipe_screen *screen, + } + + if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { +- if (!util_format_s3tc_enabled) { +- goto out_unknown; +- } +- + switch (format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: +diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c +index b02203c3c1..a3c6ed0a22 100644 +--- a/src/gallium/drivers/radeon/r600_pipe_common.c ++++ b/src/gallium/drivers/radeon/r600_pipe_common.c +@@ -1408,7 +1408,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, + 1 << util_logbase2(rscreen->force_aniso)); + } + +- util_format_s3tc_init(); + (void) mtx_init(&rscreen->aux_context_lock, mtx_plain); + (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain); + +diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c +index c610103032..931c326b49 100644 +--- a/src/gallium/drivers/radeonsi/si_state.c ++++ b/src/gallium/drivers/radeonsi/si_state.c +@@ -1525,10 +1525,6 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen, + if (!enable_compressed_formats) + goto out_unknown; + +- if (!util_format_s3tc_enabled) { +- goto out_unknown; +- } +- + switch (format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: +diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c +index 5c96a14c80..6aa6beb1ad 100644 +--- a/src/gallium/drivers/softpipe/sp_screen.c ++++ b/src/gallium/drivers/softpipe/sp_screen.c +@@ -455,10 +455,6 @@ softpipe_is_format_supported( struct pipe_screen *screen, + * All other operations (sampling, transfer, etc). + */ + +- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { +- return util_format_s3tc_enabled; +- } +- + /* + * Everything else should be supported by u_format. + */ +@@ -585,8 +581,6 @@ softpipe_create_screen(struct sw_winsys *winsys) + screen->base.get_compute_param = softpipe_get_compute_param; + screen->use_llvm = debug_get_option_use_llvm(); + +- util_format_s3tc_init(); +- + softpipe_init_screen_texture_funcs(&screen->base); + softpipe_init_screen_fence_funcs(&screen->base); + +diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp +index 952ae0c77a..d4f4ee8da1 100644 +--- a/src/gallium/drivers/swr/swr_screen.cpp ++++ b/src/gallium/drivers/swr/swr_screen.cpp +@@ -147,10 +147,6 @@ swr_is_format_supported(struct pipe_screen *_screen, + return FALSE; + } + +- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { +- return util_format_s3tc_enabled; +- } +- + return TRUE; + } + +@@ -1140,8 +1136,6 @@ swr_create_screen_internal(struct sw_winsys *winsys) + + swr_fence_init(&screen->base); + +- util_format_s3tc_init(); +- + swr_validate_env_options(screen); + + return &screen->base; +diff --git a/src/gallium/drivers/virgl/virgl_screen.c b/src/gallium/drivers/virgl/virgl_screen.c +index 5df08407d7..c8c29d0812 100644 +--- a/src/gallium/drivers/virgl/virgl_screen.c ++++ b/src/gallium/drivers/virgl/virgl_screen.c +@@ -480,9 +480,7 @@ virgl_is_format_supported( struct pipe_screen *screen, + */ + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { +- if (util_format_s3tc_enabled) +- goto out_lookup; +- return FALSE; ++ goto out_lookup; + } + if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { + goto out_lookup; +@@ -606,6 +604,5 @@ virgl_create_screen(struct virgl_winsys *vws) + + slab_create_parent(&screen->texture_transfer_pool, sizeof(struct virgl_transfer), 16); + +- util_format_s3tc_init(); + return &screen->base; + } +diff --git a/src/gallium/include/state_tracker/st_api.h b/src/gallium/include/state_tracker/st_api.h +index bc62a69da3..fe9fb1816a 100644 +--- a/src/gallium/include/state_tracker/st_api.h ++++ b/src/gallium/include/state_tracker/st_api.h +@@ -246,7 +246,6 @@ struct st_config_options + boolean disable_shader_bit_encoding; + boolean force_glsl_extensions_warn; + unsigned force_glsl_version; +- boolean force_s3tc_enable; + boolean allow_glsl_extension_directive_midshader; + boolean allow_glsl_builtin_variable_redeclaration; + boolean allow_higher_compat_version; +diff --git a/src/gallium/state_trackers/dri/dri_screen.c b/src/gallium/state_trackers/dri/dri_screen.c +index 6bd479074f..09af6a6234 100644 +--- a/src/gallium/state_trackers/dri/dri_screen.c ++++ b/src/gallium/state_trackers/dri/dri_screen.c +@@ -62,7 +62,6 @@ const __DRIconfigOptionsExtension gallium_config_options = { + DRI_CONF_SECTION_END + + DRI_CONF_SECTION_QUALITY +- DRI_CONF_FORCE_S3TC_ENABLE("false") + DRI_CONF_PP_CELSHADE(0) + DRI_CONF_PP_NORED(0) + DRI_CONF_PP_NOGREEN(0) +@@ -109,8 +108,6 @@ dri_fill_st_options(struct dri_screen *screen) + driQueryOptionb(optionCache, "force_glsl_extensions_warn"); + options->force_glsl_version = + driQueryOptioni(optionCache, "force_glsl_version"); +- options->force_s3tc_enable = +- driQueryOptionb(optionCache, "force_s3tc_enable"); + options->allow_glsl_extension_directive_midshader = + driQueryOptionb(optionCache, "allow_glsl_extension_directive_midshader"); + options->allow_glsl_builtin_variable_redeclaration = +@@ -564,19 +561,6 @@ dri_init_screen_helper(struct dri_screen *screen, + else + screen->target = PIPE_TEXTURE_RECT; + +- /* Handle force_s3tc_enable. */ +- if (!util_format_s3tc_enabled && screen->options.force_s3tc_enable) { +- /* Ensure libtxc_dxtn has been loaded if available. +- * Forcing S3TC on before calling this would prevent loading +- * the library. +- * This is just a precaution, the driver should have called it +- * already. +- */ +- util_format_s3tc_init(); +- +- util_format_s3tc_enabled = TRUE; +- } +- + dri_postprocessing_init(screen); + + screen->st_api->query_versions(screen->st_api, &screen->base, +diff --git a/src/gallium/state_trackers/osmesa/osmesa.c b/src/gallium/state_trackers/osmesa/osmesa.c +index 751d255c54..2f9558db31 100644 +--- a/src/gallium/state_trackers/osmesa/osmesa.c ++++ b/src/gallium/state_trackers/osmesa/osmesa.c +@@ -688,7 +688,6 @@ OSMesaCreateContextAttribs(const int *attribList, OSMesaContext sharelist) + attribs.options.disable_blend_func_extended = FALSE; + attribs.options.disable_glsl_line_continuations = FALSE; + attribs.options.disable_shader_bit_encoding = FALSE; +- attribs.options.force_s3tc_enable = FALSE; + attribs.options.force_glsl_version = 0; + + osmesa_init_st_visual(&attribs.visual, +diff --git a/src/gallium/tests/unit/u_format_test.c b/src/gallium/tests/unit/u_format_test.c +index 69d6c7dd3a..6de581fd04 100644 +--- a/src/gallium/tests/unit/u_format_test.c ++++ b/src/gallium/tests/unit/u_format_test.c +@@ -722,11 +722,6 @@ test_all(void) + assert(format_desc->block.height <= UTIL_FORMAT_MAX_UNPACKED_HEIGHT); + assert(format_desc->block.width <= UTIL_FORMAT_MAX_UNPACKED_WIDTH); + +- if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && +- !util_format_s3tc_enabled) { +- continue; +- } +- + # define TEST_ONE_FUNC(name) \ + if (format_desc->name) { \ + if (!test_one_func(format_desc, &test_format_##name, #name)) { \ +@@ -758,8 +753,6 @@ int main(int argc, char **argv) + { + boolean success; + +- util_format_s3tc_init(); +- + success = test_all(); + + return success ? 0 : 1; +diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources +index 86fbf3974e..bc93ded3db 100644 +--- a/src/mesa/Makefile.sources ++++ b/src/mesa/Makefile.sources +@@ -67,7 +67,6 @@ MAIN_FILES = \ + main/depth.h \ + main/dlist.c \ + main/dlist.h \ +- main/dlopen.h \ + main/drawpix.c \ + main/drawpix.h \ + main/drawtex.c \ +diff --git a/src/mesa/SConscript b/src/mesa/SConscript +index b63e15a3f0..ba98ad4323 100644 +--- a/src/mesa/SConscript ++++ b/src/mesa/SConscript +@@ -31,10 +31,6 @@ if env['platform'] == 'windows': + if not env['gles']: + # prevent _glapi_* from being declared __declspec(dllimport) + env.Append(CPPDEFINES = ['_GLAPI_NO_EXPORTS']) +-else: +- env.Append(CPPDEFINES = [ +- ('HAVE_DLOPEN', '1'), +- ]) + + # parse Makefile.sources + source_lists = env.ParseSourceList('Makefile.sources') +diff --git a/src/mesa/drivers/dri/common/xmlpool/t_options.h b/src/mesa/drivers/dri/common/xmlpool/t_options.h +index e308839aa7..afe342df07 100644 +--- a/src/mesa/drivers/dri/common/xmlpool/t_options.h ++++ b/src/mesa/drivers/dri/common/xmlpool/t_options.h +@@ -172,11 +172,6 @@ DRI_CONF_OPT_BEGIN_B(no_neg_lod_bias, def) \ + DRI_CONF_DESC(en,gettext("Forbid negative texture LOD bias")) \ + DRI_CONF_OPT_END + +-#define DRI_CONF_FORCE_S3TC_ENABLE(def) \ +-DRI_CONF_OPT_BEGIN_B(force_s3tc_enable, def) \ +- DRI_CONF_DESC(en,gettext("Enable S3TC texture compression even if software support is not available")) \ +-DRI_CONF_OPT_END +- + #define DRI_CONF_PRECISE_TRIG(def) \ + DRI_CONF_OPT_BEGIN_B(precise_trig, def) \ + DRI_CONF_DESC(en,gettext("Prefer accuracy over performance in trig functions")) \ +diff --git a/src/mesa/drivers/dri/i915/intel_extensions.c b/src/mesa/drivers/dri/i915/intel_extensions.c +index 4f2c6fa34e..c85bd787fe 100644 +--- a/src/mesa/drivers/dri/i915/intel_extensions.c ++++ b/src/mesa/drivers/dri/i915/intel_extensions.c +@@ -100,9 +100,6 @@ intelInitExtensions(struct gl_context *ctx) + ctx->Extensions.ARB_occlusion_query = true; + } + +- if (intel->ctx.Mesa_DXTn +- || driQueryOptionb(&intel->optionCache, "force_s3tc_enable")) +- ctx->Extensions.EXT_texture_compression_s3tc = true; +- ++ ctx->Extensions.EXT_texture_compression_s3tc = true; + ctx->Extensions.ANGLE_texture_compression_dxt = true; + } +diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c +index 863f6ef7ec..c223133363 100644 +--- a/src/mesa/drivers/dri/i915/intel_screen.c ++++ b/src/mesa/drivers/dri/i915/intel_screen.c +@@ -67,7 +67,6 @@ DRI_CONF_BEGIN + + DRI_CONF_SECTION_END + DRI_CONF_SECTION_QUALITY +- DRI_CONF_FORCE_S3TC_ENABLE("false") + DRI_CONF_SECTION_END + DRI_CONF_SECTION_DEBUG + DRI_CONF_NO_RAST("false") +diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c +index b91bbdc8d9..4fe97a0ce7 100644 +--- a/src/mesa/drivers/dri/i965/intel_extensions.c ++++ b/src/mesa/drivers/dri/i965/intel_extensions.c +@@ -297,8 +297,6 @@ intelInitExtensions(struct gl_context *ctx) + if (ctx->API != API_OPENGL_CORE) + ctx->Extensions.ARB_color_buffer_float = true; + +- if (ctx->Mesa_DXTn || driQueryOptionb(&brw->optionCache, "force_s3tc_enable")) +- ctx->Extensions.EXT_texture_compression_s3tc = true; +- ++ ctx->Extensions.EXT_texture_compression_s3tc = true; + ctx->Extensions.ANGLE_texture_compression_dxt = true; + } +diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c +index 5adb8ef1f6..e60f9fb10a 100644 +--- a/src/mesa/drivers/dri/i965/intel_screen.c ++++ b/src/mesa/drivers/dri/i965/intel_screen.c +@@ -69,8 +69,6 @@ DRI_CONF_BEGIN + DRI_CONF_SECTION_END + + DRI_CONF_SECTION_QUALITY +- DRI_CONF_FORCE_S3TC_ENABLE("false") +- + DRI_CONF_PRECISE_TRIG("false") + + DRI_CONF_OPT_BEGIN(clamp_max_samples, int, -1) +diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c +index 7a86ba2358..be2178fb79 100644 +--- a/src/mesa/drivers/dri/nouveau/nv10_context.c ++++ b/src/mesa/drivers/dri/nouveau/nv10_context.c +@@ -451,10 +451,8 @@ nv10_context_create(struct nouveau_screen *screen, gl_api api, + ctx->Extensions.EXT_texture_env_dot3 = true; + ctx->Extensions.NV_fog_distance = true; + ctx->Extensions.NV_texture_rectangle = true; +- if (ctx->Mesa_DXTn) { +- ctx->Extensions.EXT_texture_compression_s3tc = true; +- ctx->Extensions.ANGLE_texture_compression_dxt = true; +- } ++ ctx->Extensions.EXT_texture_compression_s3tc = true; ++ ctx->Extensions.ANGLE_texture_compression_dxt = true; + + /* GL constants. */ + ctx->Const.MaxTextureLevels = 12; +diff --git a/src/mesa/drivers/dri/nouveau/nv20_context.c b/src/mesa/drivers/dri/nouveau/nv20_context.c +index ec638c036b..0ab2db0b08 100644 +--- a/src/mesa/drivers/dri/nouveau/nv20_context.c ++++ b/src/mesa/drivers/dri/nouveau/nv20_context.c +@@ -462,10 +462,8 @@ nv20_context_create(struct nouveau_screen *screen, gl_api api, + ctx->Extensions.EXT_texture_env_dot3 = true; + ctx->Extensions.NV_fog_distance = true; + ctx->Extensions.NV_texture_rectangle = true; +- if (ctx->Mesa_DXTn) { +- ctx->Extensions.EXT_texture_compression_s3tc = true; +- ctx->Extensions.ANGLE_texture_compression_dxt = true; +- } ++ ctx->Extensions.EXT_texture_compression_s3tc = true; ++ ctx->Extensions.ANGLE_texture_compression_dxt = true; + + /* GL constants. */ + ctx->Const.MaxTextureCoordUnits = NV20_TEXTURE_UNITS; +diff --git a/src/mesa/drivers/dri/r200/r200_context.c b/src/mesa/drivers/dri/r200/r200_context.c +index 5a7f33499b..de15d520fe 100644 +--- a/src/mesa/drivers/dri/r200/r200_context.c ++++ b/src/mesa/drivers/dri/r200/r200_context.c +@@ -362,14 +362,8 @@ GLboolean r200CreateContext( gl_api api, + others get the bit ordering right but don't actually do YUV-RGB conversion */ + ctx->Extensions.MESA_ycbcr_texture = true; + } +- if (rmesa->radeon.glCtx.Mesa_DXTn) { +- ctx->Extensions.EXT_texture_compression_s3tc = true; +- ctx->Extensions.ANGLE_texture_compression_dxt = true; +- } +- else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) { +- ctx->Extensions.EXT_texture_compression_s3tc = true; +- ctx->Extensions.ANGLE_texture_compression_dxt = true; +- } ++ ctx->Extensions.EXT_texture_compression_s3tc = true; ++ ctx->Extensions.ANGLE_texture_compression_dxt = true; + + #if 0 + r200InitDriverFuncs( ctx ); +diff --git a/src/mesa/drivers/dri/radeon/radeon_context.c b/src/mesa/drivers/dri/radeon/radeon_context.c +index 5ef3467ac1..e84b544c19 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_context.c ++++ b/src/mesa/drivers/dri/radeon/radeon_context.c +@@ -310,14 +310,8 @@ r100CreateContext( gl_api api, + ctx->Extensions.NV_texture_rectangle = true; + ctx->Extensions.OES_EGL_image = true; + +- if (rmesa->radeon.glCtx.Mesa_DXTn) { +- ctx->Extensions.EXT_texture_compression_s3tc = true; +- ctx->Extensions.ANGLE_texture_compression_dxt = true; +- } +- else if (driQueryOptionb (&rmesa->radeon.optionCache, "force_s3tc_enable")) { +- ctx->Extensions.EXT_texture_compression_s3tc = true; +- ctx->Extensions.ANGLE_texture_compression_dxt = true; +- } ++ ctx->Extensions.EXT_texture_compression_s3tc = true; ++ ctx->Extensions.ANGLE_texture_compression_dxt = true; + + /* XXX these should really go right after _mesa_init_driver_functions() */ + radeon_fbo_init(&rmesa->radeon); +diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c +index 0f072aff20..4192283fee 100644 +--- a/src/mesa/drivers/dri/radeon/radeon_screen.c ++++ b/src/mesa/drivers/dri/radeon/radeon_screen.c +@@ -86,7 +86,6 @@ DRI_CONF_BEGIN + DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) + DRI_CONF_DEF_MAX_ANISOTROPY(1.0,"1.0,2.0,4.0,8.0,16.0") + DRI_CONF_NO_NEG_LOD_BIAS("false") +- DRI_CONF_FORCE_S3TC_ENABLE("false") + DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) + DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) + DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) +@@ -113,7 +112,6 @@ DRI_CONF_BEGIN + DRI_CONF_TEXTURE_DEPTH(DRI_CONF_TEXTURE_DEPTH_FB) + DRI_CONF_DEF_MAX_ANISOTROPY(1.0,"1.0,2.0,4.0,8.0,16.0") + DRI_CONF_NO_NEG_LOD_BIAS("false") +- DRI_CONF_FORCE_S3TC_ENABLE("false") + DRI_CONF_COLOR_REDUCTION(DRI_CONF_COLOR_REDUCTION_DITHER) + DRI_CONF_ROUND_MODE(DRI_CONF_ROUND_TRUNC) + DRI_CONF_DITHER_MODE(DRI_CONF_DITHER_XERRORDIFF) +diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c +index 3aabdc92bb..1a15016e7a 100644 +--- a/src/mesa/main/context.c ++++ b/src/mesa/main/context.c +@@ -861,8 +861,6 @@ init_attrib_groups(struct gl_context *ctx) + if (!_mesa_init_texture( ctx )) + return GL_FALSE; + +- _mesa_init_texture_s3tc( ctx ); +- + /* Miscellaneous */ + ctx->NewState = _NEW_ALL; + ctx->NewDriverState = ~0; +diff --git a/src/mesa/main/dlopen.h b/src/mesa/main/dlopen.h +deleted file mode 100644 +index 4d20ff2c7c..0000000000 +--- a/src/mesa/main/dlopen.h ++++ /dev/null +@@ -1,97 +0,0 @@ +-/* +- * Mesa 3-D graphics library +- * +- * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. +- * +- * Permission is hereby granted, free of charge, to any person obtaining a +- * copy of this software and associated documentation files (the "Software"), +- * to deal in the Software without restriction, including without limitation +- * the rights to use, copy, modify, merge, publish, distribute, sublicense, +- * and/or sell copies of the Software, and to permit persons to whom the +- * Software is furnished to do so, subject to the following conditions: +- * +- * The above copyright notice and this permission notice shall be included +- * in all copies or substantial portions of the Software. +- * +- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +- * OTHER DEALINGS IN THE SOFTWARE. +- */ +- +- +-#ifndef DLOPEN_H +-#define DLOPEN_H +- +-/** +- * Wrapper functions for dlopen(), dlsym(), dlclose(). +- * Note that the #ifdef tests for various environments should be expanded. +- */ +- +-#if defined(HAVE_DLOPEN) +-#include +-#endif +-#if defined(_WIN32) +-#include +-#endif +- +-typedef void (*GenericFunc)(void); +- +-/** +- * Wrapper for dlopen(). +- * Note that 'flags' isn't used at this time. +- */ +-static inline void * +-_mesa_dlopen(const char *libname, int flags) +-{ +-#if defined(HAVE_DLOPEN) +- flags = RTLD_LAZY | RTLD_GLOBAL; /* Overriding flags at this time */ +- return dlopen(libname, flags); +-#elif defined(_WIN32) +- return LoadLibraryA(libname); +-#else +- return NULL; +-#endif +-} +- +-/** +- * Wrapper for dlsym() that does a cast to a generic function type, +- * rather than a void *. This reduces the number of warnings that are +- * generated. +- */ +-static inline GenericFunc +-_mesa_dlsym(void *handle, const char *fname) +-{ +- union { +- void *v; +- GenericFunc f; +- } u; +-#if defined(HAVE_DLOPEN) +- u.v = dlsym(handle, fname); +-#elif defined(_WIN32) +- u.v = (void *) GetProcAddress(handle, fname); +-#else +- u.v = NULL; +-#endif +- return u.f; +-} +- +-/** +- * Wrapper for dlclose(). +- */ +-static inline void +-_mesa_dlclose(void *handle) +-{ +-#if defined(HAVE_DLOPEN) +- dlclose(handle); +-#elif defined(_WIN32) +- FreeLibrary(handle); +-#else +- (void) handle; +-#endif +-} +- +-#endif +diff --git a/src/mesa/main/extensions.c b/src/mesa/main/extensions.c +index 62a731675d..46083001d2 100644 +--- a/src/mesa/main/extensions.c ++++ b/src/mesa/main/extensions.c +@@ -176,10 +176,8 @@ _mesa_enable_sw_extensions(struct gl_context *ctx) + ctx->Extensions.EXT_gpu_program_parameters = GL_TRUE; + ctx->Extensions.OES_standard_derivatives = GL_TRUE; + ctx->Extensions.TDFX_texture_compression_FXT1 = GL_TRUE; +- if (ctx->Mesa_DXTn) { +- ctx->Extensions.ANGLE_texture_compression_dxt = GL_TRUE; +- ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; +- } ++ ctx->Extensions.ANGLE_texture_compression_dxt = GL_TRUE; ++ ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; + } + + /** +diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h +index 0cb002469b..f4641b9c68 100644 +--- a/src/mesa/main/mtypes.h ++++ b/src/mesa/main/mtypes.h +@@ -4958,9 +4958,6 @@ struct gl_context + */ + GLboolean HasConfig; + +- /** software compression/decompression supported or not */ +- GLboolean Mesa_DXTn; +- + GLboolean TextureFormatSupported[MESA_FORMAT_COUNT]; + + GLboolean RasterDiscard; /**< GL_RASTERIZER_DISCARD */ +diff --git a/src/mesa/main/texcompress_s3tc.c b/src/mesa/main/texcompress_s3tc.c +index 992ad058bf..1c6cbba892 100644 +--- a/src/mesa/main/texcompress_s3tc.c ++++ b/src/mesa/main/texcompress_s3tc.c +@@ -31,91 +31,17 @@ + + #include "glheader.h" + #include "imports.h" +-#include "dlopen.h" + #include "image.h" + #include "macros.h" + #include "mtypes.h" + #include "texcompress.h" + #include "texcompress_s3tc.h" ++#include "texcompress_s3tc_tmp.h" + #include "texstore.h" + #include "format_unpack.h" + #include "util/format_srgb.h" + + +-#if defined(_WIN32) || defined(WIN32) +-#define DXTN_LIBNAME "dxtn.dll" +-#define RTLD_LAZY 0 +-#define RTLD_GLOBAL 0 +-#elif defined(__CYGWIN__) +-#define DXTN_LIBNAME "cygtxc_dxtn.dll" +-#else +-#define DXTN_LIBNAME "libtxc_dxtn.so" +-#endif +- +-typedef void (*dxtFetchTexelFuncExt)( GLint srcRowstride, const GLubyte *pixdata, GLint col, GLint row, GLvoid *texelOut ); +- +-static dxtFetchTexelFuncExt fetch_ext_rgb_dxt1 = NULL; +-static dxtFetchTexelFuncExt fetch_ext_rgba_dxt1 = NULL; +-static dxtFetchTexelFuncExt fetch_ext_rgba_dxt3 = NULL; +-static dxtFetchTexelFuncExt fetch_ext_rgba_dxt5 = NULL; +- +-typedef void (*dxtCompressTexFuncExt)(GLint srccomps, GLint width, +- GLint height, const GLubyte *srcPixData, +- GLenum destformat, GLubyte *dest, +- GLint dstRowStride); +- +-static dxtCompressTexFuncExt ext_tx_compress_dxtn = NULL; +- +-static void *dxtlibhandle = NULL; +- +- +-void +-_mesa_init_texture_s3tc( struct gl_context *ctx ) +-{ +- /* called during context initialization */ +- ctx->Mesa_DXTn = GL_FALSE; +- if (!dxtlibhandle) { +- dxtlibhandle = _mesa_dlopen(DXTN_LIBNAME, 0); +- if (!dxtlibhandle) { +- _mesa_warning(ctx, "couldn't open " DXTN_LIBNAME ", software DXTn " +- "compression/decompression unavailable"); +- } +- else { +- /* the fetch functions are not per context! Might be problematic... */ +- fetch_ext_rgb_dxt1 = (dxtFetchTexelFuncExt) +- _mesa_dlsym(dxtlibhandle, "fetch_2d_texel_rgb_dxt1"); +- fetch_ext_rgba_dxt1 = (dxtFetchTexelFuncExt) +- _mesa_dlsym(dxtlibhandle, "fetch_2d_texel_rgba_dxt1"); +- fetch_ext_rgba_dxt3 = (dxtFetchTexelFuncExt) +- _mesa_dlsym(dxtlibhandle, "fetch_2d_texel_rgba_dxt3"); +- fetch_ext_rgba_dxt5 = (dxtFetchTexelFuncExt) +- _mesa_dlsym(dxtlibhandle, "fetch_2d_texel_rgba_dxt5"); +- ext_tx_compress_dxtn = (dxtCompressTexFuncExt) +- _mesa_dlsym(dxtlibhandle, "tx_compress_dxtn"); +- +- if (!fetch_ext_rgb_dxt1 || +- !fetch_ext_rgba_dxt1 || +- !fetch_ext_rgba_dxt3 || +- !fetch_ext_rgba_dxt5 || +- !ext_tx_compress_dxtn) { +- _mesa_warning(ctx, "couldn't reference all symbols in " +- DXTN_LIBNAME ", software DXTn compression/decompression " +- "unavailable"); +- fetch_ext_rgb_dxt1 = NULL; +- fetch_ext_rgba_dxt1 = NULL; +- fetch_ext_rgba_dxt3 = NULL; +- fetch_ext_rgba_dxt5 = NULL; +- ext_tx_compress_dxtn = NULL; +- _mesa_dlclose(dxtlibhandle); +- dxtlibhandle = NULL; +- } +- } +- } +- if (dxtlibhandle) { +- ctx->Mesa_DXTn = GL_TRUE; +- } +-} +- + /** + * Store user's image in rgb_dxt1 format. + */ +@@ -158,14 +84,9 @@ _mesa_texstore_rgb_dxt1(TEXSTORE_PARAMS) + + dst = dstSlices[0]; + +- if (ext_tx_compress_dxtn) { +- (*ext_tx_compress_dxtn)(3, srcWidth, srcHeight, pixels, +- GL_COMPRESSED_RGB_S3TC_DXT1_EXT, +- dst, dstRowStride); +- } +- else { +- _mesa_warning(ctx, "external dxt library not available: texstore_rgb_dxt1"); +- } ++ tx_compress_dxtn(3, srcWidth, srcHeight, pixels, ++ GL_COMPRESSED_RGB_S3TC_DXT1_EXT, ++ dst, dstRowStride); + + free((void *) tempImage); + +@@ -216,14 +137,9 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS) + + dst = dstSlices[0]; + +- if (ext_tx_compress_dxtn) { +- (*ext_tx_compress_dxtn)(4, srcWidth, srcHeight, pixels, +- GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, +- dst, dstRowStride); +- } +- else { +- _mesa_warning(ctx, "external dxt library not available: texstore_rgba_dxt1"); +- } ++ tx_compress_dxtn(4, srcWidth, srcHeight, pixels, ++ GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, ++ dst, dstRowStride); + + free((void*) tempImage); + +@@ -273,14 +189,9 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS) + + dst = dstSlices[0]; + +- if (ext_tx_compress_dxtn) { +- (*ext_tx_compress_dxtn)(4, srcWidth, srcHeight, pixels, +- GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, +- dst, dstRowStride); +- } +- else { +- _mesa_warning(ctx, "external dxt library not available: texstore_rgba_dxt3"); +- } ++ tx_compress_dxtn(4, srcWidth, srcHeight, pixels, ++ GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, ++ dst, dstRowStride); + + free((void *) tempImage); + +@@ -330,14 +241,9 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS) + + dst = dstSlices[0]; + +- if (ext_tx_compress_dxtn) { +- (*ext_tx_compress_dxtn)(4, srcWidth, srcHeight, pixels, +- GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, +- dst, dstRowStride); +- } +- else { +- _mesa_warning(ctx, "external dxt library not available: texstore_rgba_dxt5"); +- } ++ tx_compress_dxtn(4, srcWidth, srcHeight, pixels, ++ GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, ++ dst, dstRowStride); + + free((void *) tempImage); + +@@ -345,85 +251,52 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS) + } + + +-/** Report problem with dxt texture decompression, once */ +-static void +-problem(const char *func) +-{ +- static GLboolean warned = GL_FALSE; +- if (!warned) { +- _mesa_debug(NULL, "attempted to decode DXT texture without " +- "library available: %s\n", func); +- warned = GL_TRUE; +- } +-} +- +- + static void + fetch_rgb_dxt1(const GLubyte *map, + GLint rowStride, GLint i, GLint j, GLfloat *texel) + { +- if (fetch_ext_rgb_dxt1) { +- GLubyte tex[4]; +- fetch_ext_rgb_dxt1(rowStride, map, i, j, tex); +- texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]); +- texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]); +- texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]); +- texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); +- } +- else { +- problem("rgb_dxt1"); +- } ++ GLubyte tex[4]; ++ fetch_2d_texel_rgb_dxt1(rowStride, map, i, j, tex); ++ texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]); ++ texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]); ++ texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]); ++ texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); + } + + static void + fetch_rgba_dxt1(const GLubyte *map, + GLint rowStride, GLint i, GLint j, GLfloat *texel) + { +- if (fetch_ext_rgba_dxt1) { +- GLubyte tex[4]; +- fetch_ext_rgba_dxt1(rowStride, map, i, j, tex); +- texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]); +- texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]); +- texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]); +- texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); +- } +- else { +- problem("rgba_dxt1"); +- } ++ GLubyte tex[4]; ++ fetch_2d_texel_rgba_dxt1(rowStride, map, i, j, tex); ++ texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]); ++ texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]); ++ texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]); ++ texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); + } + + static void + fetch_rgba_dxt3(const GLubyte *map, + GLint rowStride, GLint i, GLint j, GLfloat *texel) + { +- if (fetch_ext_rgba_dxt3) { +- GLubyte tex[4]; +- fetch_ext_rgba_dxt3(rowStride, map, i, j, tex); +- texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]); +- texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]); +- texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]); +- texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); +- } +- else { +- problem("rgba_dxt3"); +- } ++ GLubyte tex[4]; ++ fetch_2d_texel_rgba_dxt3(rowStride, map, i, j, tex); ++ texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]); ++ texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]); ++ texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]); ++ texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); + } + + static void + fetch_rgba_dxt5(const GLubyte *map, + GLint rowStride, GLint i, GLint j, GLfloat *texel) + { +- if (fetch_ext_rgba_dxt5) { +- GLubyte tex[4]; +- fetch_ext_rgba_dxt5(rowStride, map, i, j, tex); +- texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]); +- texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]); +- texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]); +- texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); +- } +- else { +- problem("rgba_dxt5"); +- } ++ GLubyte tex[4]; ++ fetch_2d_texel_rgba_dxt5(rowStride, map, i, j, tex); ++ texel[RCOMP] = UBYTE_TO_FLOAT(tex[RCOMP]); ++ texel[GCOMP] = UBYTE_TO_FLOAT(tex[GCOMP]); ++ texel[BCOMP] = UBYTE_TO_FLOAT(tex[BCOMP]); ++ texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); + } + + +@@ -431,68 +304,48 @@ static void + fetch_srgb_dxt1(const GLubyte *map, + GLint rowStride, GLint i, GLint j, GLfloat *texel) + { +- if (fetch_ext_rgb_dxt1) { +- GLubyte tex[4]; +- fetch_ext_rgb_dxt1(rowStride, map, i, j, tex); +- texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(tex[RCOMP]); +- texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(tex[GCOMP]); +- texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(tex[BCOMP]); +- texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); +- } +- else { +- problem("srgb_dxt1"); +- } ++ GLubyte tex[4]; ++ fetch_2d_texel_rgb_dxt1(rowStride, map, i, j, tex); ++ texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(tex[RCOMP]); ++ texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(tex[GCOMP]); ++ texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(tex[BCOMP]); ++ texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); + } + + static void + fetch_srgba_dxt1(const GLubyte *map, + GLint rowStride, GLint i, GLint j, GLfloat *texel) + { +- if (fetch_ext_rgba_dxt1) { +- GLubyte tex[4]; +- fetch_ext_rgba_dxt1(rowStride, map, i, j, tex); +- texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(tex[RCOMP]); +- texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(tex[GCOMP]); +- texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(tex[BCOMP]); +- texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); +- } +- else { +- problem("srgba_dxt1"); +- } ++ GLubyte tex[4]; ++ fetch_2d_texel_rgba_dxt1(rowStride, map, i, j, tex); ++ texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(tex[RCOMP]); ++ texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(tex[GCOMP]); ++ texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(tex[BCOMP]); ++ texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); + } + + static void + fetch_srgba_dxt3(const GLubyte *map, + GLint rowStride, GLint i, GLint j, GLfloat *texel) + { +- if (fetch_ext_rgba_dxt3) { +- GLubyte tex[4]; +- fetch_ext_rgba_dxt3(rowStride, map, i, j, tex); +- texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(tex[RCOMP]); +- texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(tex[GCOMP]); +- texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(tex[BCOMP]); +- texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); +- } +- else { +- problem("srgba_dxt3"); +- } ++ GLubyte tex[4]; ++ fetch_2d_texel_rgba_dxt3(rowStride, map, i, j, tex); ++ texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(tex[RCOMP]); ++ texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(tex[GCOMP]); ++ texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(tex[BCOMP]); ++ texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); + } + + static void + fetch_srgba_dxt5(const GLubyte *map, + GLint rowStride, GLint i, GLint j, GLfloat *texel) + { +- if (fetch_ext_rgba_dxt5) { +- GLubyte tex[4]; +- fetch_ext_rgba_dxt5(rowStride, map, i, j, tex); +- texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(tex[RCOMP]); +- texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(tex[GCOMP]); +- texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(tex[BCOMP]); +- texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); +- } +- else { +- problem("srgba_dxt5"); +- } ++ GLubyte tex[4]; ++ fetch_2d_texel_rgba_dxt5(rowStride, map, i, j, tex); ++ texel[RCOMP] = util_format_srgb_8unorm_to_linear_float(tex[RCOMP]); ++ texel[GCOMP] = util_format_srgb_8unorm_to_linear_float(tex[GCOMP]); ++ texel[BCOMP] = util_format_srgb_8unorm_to_linear_float(tex[BCOMP]); ++ texel[ACOMP] = UBYTE_TO_FLOAT(tex[ACOMP]); + } + + +diff --git a/src/mesa/main/texcompress_s3tc.h b/src/mesa/main/texcompress_s3tc.h +index 438b71fe33..0dbb5fc537 100644 +--- a/src/mesa/main/texcompress_s3tc.h ++++ b/src/mesa/main/texcompress_s3tc.h +@@ -44,9 +44,6 @@ extern GLboolean + _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS); + + +-extern void +-_mesa_init_texture_s3tc(struct gl_context *ctx); +- + extern compressed_fetch_func + _mesa_get_dxt_fetch_func(mesa_format format); + +diff --git a/src/mesa/main/texcompress_s3tc_tmp.h b/src/mesa/main/texcompress_s3tc_tmp.h +new file mode 100644 +index 0000000000..61630f2475 +--- /dev/null ++++ b/src/mesa/main/texcompress_s3tc_tmp.h +@@ -0,0 +1,989 @@ ++/* ++ * libtxc_dxtn ++ * Version: 1.0 ++ * ++ * Copyright (C) 2004 Roland Scheidegger All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a ++ * copy of this software and associated documentation files (the "Software"), ++ * to deal in the Software without restriction, including without limitation ++ * the rights to use, copy, modify, merge, publish, distribute, sublicense, ++ * and/or sell copies of the Software, and to permit persons to whom the ++ * Software is furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included ++ * in all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN ++ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN ++ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifdef __APPLE__ ++#include ++#else ++#include ++#endif ++ ++typedef GLubyte GLchan; ++#define UBYTE_TO_CHAN(b) (b) ++#define CHAN_MAX 255 ++#define RCOMP 0 ++#define GCOMP 1 ++#define BCOMP 2 ++#define ACOMP 3 ++ ++#define EXP5TO8R(packedcol) \ ++ ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7)) ++ ++#define EXP6TO8G(packedcol) \ ++ ((((packedcol) >> 3) & 0xfc) | (((packedcol) >> 9) & 0x3)) ++ ++#define EXP5TO8B(packedcol) \ ++ ((((packedcol) << 3) & 0xf8) | (((packedcol) >> 2) & 0x7)) ++ ++#define EXP4TO8(col) \ ++ ((col) | ((col) << 4)) ++ ++/* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */ ++ ++static void dxt135_decode_imageblock ( const GLubyte *img_block_src, ++ GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) { ++ GLchan *rgba = (GLchan *) texel; ++ const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8); ++ const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8); ++ const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) | ++ (img_block_src[6] << 16) | (img_block_src[7] << 24); ++ /* What about big/little endian? */ ++ GLubyte bit_pos = 2 * (j * 4 + i) ; ++ GLubyte code = (GLubyte) ((bits >> bit_pos) & 3); ++ ++ rgba[ACOMP] = CHAN_MAX; ++ switch (code) { ++ case 0: ++ rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) ); ++ rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) ); ++ rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) ); ++ break; ++ case 1: ++ rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) ); ++ rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) ); ++ rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) ); ++ break; ++ case 2: ++ if ((dxt_type > 1) || (color0 > color1)) { ++ rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) ); ++ rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) ); ++ rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) ); ++ } ++ else { ++ rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) ); ++ rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) ); ++ rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) ); ++ } ++ break; ++ case 3: ++ if ((dxt_type > 1) || (color0 > color1)) { ++ rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) ); ++ rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) ); ++ rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) ); ++ } ++ else { ++ rgba[RCOMP] = 0; ++ rgba[GCOMP] = 0; ++ rgba[BCOMP] = 0; ++ if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0); ++ } ++ break; ++ default: ++ /* CANNOT happen (I hope) */ ++ break; ++ } ++} ++ ++ ++static void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata, ++ GLint i, GLint j, GLvoid *texel) ++{ ++ /* Extract the (i,j) pixel from pixdata and return it ++ * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP]. ++ */ ++ ++ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8); ++ dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel); ++} ++ ++ ++static void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata, ++ GLint i, GLint j, GLvoid *texel) ++{ ++ /* Extract the (i,j) pixel from pixdata and return it ++ * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP]. ++ */ ++ ++ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8); ++ dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel); ++} ++ ++static void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata, ++ GLint i, GLint j, GLvoid *texel) { ++ ++ /* Extract the (i,j) pixel from pixdata and return it ++ * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP]. ++ */ ++ ++ GLchan *rgba = (GLchan *) texel; ++ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16); ++ const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf; ++ dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel); ++ rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) ); ++} ++ ++static void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata, ++ GLint i, GLint j, GLvoid *texel) { ++ ++ /* Extract the (i,j) pixel from pixdata and return it ++ * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP]. ++ */ ++ ++ GLchan *rgba = (GLchan *) texel; ++ const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16); ++ const GLubyte alpha0 = blksrc[0]; ++ const GLubyte alpha1 = blksrc[1]; ++ const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3; ++ const GLubyte acodelow = blksrc[2 + bit_pos / 8]; ++ const GLubyte acodehigh = blksrc[3 + bit_pos / 8]; ++ const GLubyte code = (acodelow >> (bit_pos & 0x7) | ++ (acodehigh << (8 - (bit_pos & 0x7)))) & 0x7; ++ dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel); ++ if (code == 0) ++ rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 ); ++ else if (code == 1) ++ rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 ); ++ else if (alpha0 > alpha1) ++ rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) ); ++ else if (code < 6) ++ rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) ); ++ else if (code == 6) ++ rgba[ACOMP] = 0; ++ else ++ rgba[ACOMP] = CHAN_MAX; ++} ++ ++ ++/* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion ++ not sure if this really reflects visual perception */ ++#define REDWEIGHT 4 ++#define GREENWEIGHT 16 ++#define BLUEWEIGHT 1 ++ ++#define ALPHACUT 127 ++ ++static void fancybasecolorsearch( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2], ++ GLint numxpixels, GLint numypixels, GLint type, GLboolean haveAlpha) ++{ ++ /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */ ++ ++ /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done ++ if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black ++ due to their alpha value will influence the result */ ++ GLint i, j, colors, z; ++ GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest; ++ GLint colordist, blockerrlin[2][3]; ++ GLubyte nrcolor[2]; ++ GLint pixerrorcolorbest[3]; ++ GLubyte enc = 0; ++ GLubyte cv[4][4]; ++ GLubyte testcolor[2][3]; ++ ++/* fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n", ++ bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/ ++ if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) < ++ ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) { ++ testcolor[0][0] = bestcolor[0][0]; ++ testcolor[0][1] = bestcolor[0][1]; ++ testcolor[0][2] = bestcolor[0][2]; ++ testcolor[1][0] = bestcolor[1][0]; ++ testcolor[1][1] = bestcolor[1][1]; ++ testcolor[1][2] = bestcolor[1][2]; ++ } ++ else { ++ testcolor[1][0] = bestcolor[0][0]; ++ testcolor[1][1] = bestcolor[0][1]; ++ testcolor[1][2] = bestcolor[0][2]; ++ testcolor[0][0] = bestcolor[1][0]; ++ testcolor[0][1] = bestcolor[1][1]; ++ testcolor[0][2] = bestcolor[1][2]; ++ } ++ ++ for (i = 0; i < 3; i ++) { ++ cv[0][i] = testcolor[0][i]; ++ cv[1][i] = testcolor[1][i]; ++ cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3; ++ cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3; ++ } ++ ++ blockerrlin[0][0] = 0; ++ blockerrlin[0][1] = 0; ++ blockerrlin[0][2] = 0; ++ blockerrlin[1][0] = 0; ++ blockerrlin[1][1] = 0; ++ blockerrlin[1][2] = 0; ++ ++ nrcolor[0] = 0; ++ nrcolor[1] = 0; ++ ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ pixerrorbest = 0xffffffff; ++ for (colors = 0; colors < 4; colors++) { ++ colordist = srccolors[j][i][0] - (cv[colors][0]); ++ pixerror = colordist * colordist * REDWEIGHT; ++ pixerrorred = colordist; ++ colordist = srccolors[j][i][1] - (cv[colors][1]); ++ pixerror += colordist * colordist * GREENWEIGHT; ++ pixerrorgreen = colordist; ++ colordist = srccolors[j][i][2] - (cv[colors][2]); ++ pixerror += colordist * colordist * BLUEWEIGHT; ++ pixerrorblue = colordist; ++ if (pixerror < pixerrorbest) { ++ enc = colors; ++ pixerrorbest = pixerror; ++ pixerrorcolorbest[0] = pixerrorred; ++ pixerrorcolorbest[1] = pixerrorgreen; ++ pixerrorcolorbest[2] = pixerrorblue; ++ } ++ } ++ if (enc == 0) { ++ for (z = 0; z < 3; z++) { ++ blockerrlin[0][z] += 3 * pixerrorcolorbest[z]; ++ } ++ nrcolor[0] += 3; ++ } ++ else if (enc == 2) { ++ for (z = 0; z < 3; z++) { ++ blockerrlin[0][z] += 2 * pixerrorcolorbest[z]; ++ } ++ nrcolor[0] += 2; ++ for (z = 0; z < 3; z++) { ++ blockerrlin[1][z] += 1 * pixerrorcolorbest[z]; ++ } ++ nrcolor[1] += 1; ++ } ++ else if (enc == 3) { ++ for (z = 0; z < 3; z++) { ++ blockerrlin[0][z] += 1 * pixerrorcolorbest[z]; ++ } ++ nrcolor[0] += 1; ++ for (z = 0; z < 3; z++) { ++ blockerrlin[1][z] += 2 * pixerrorcolorbest[z]; ++ } ++ nrcolor[1] += 2; ++ } ++ else if (enc == 1) { ++ for (z = 0; z < 3; z++) { ++ blockerrlin[1][z] += 3 * pixerrorcolorbest[z]; ++ } ++ nrcolor[1] += 3; ++ } ++ } ++ } ++ if (nrcolor[0] == 0) nrcolor[0] = 1; ++ if (nrcolor[1] == 0) nrcolor[1] = 1; ++ for (j = 0; j < 2; j++) { ++ for (i = 0; i < 3; i++) { ++ GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j]; ++ if (newvalue <= 0) ++ testcolor[j][i] = 0; ++ else if (newvalue >= 255) ++ testcolor[j][i] = 255; ++ else testcolor[j][i] = newvalue; ++ } ++ } ++ ++ if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) && ++ (abs(testcolor[0][1] - testcolor[1][1]) < 4) && ++ (abs(testcolor[0][2] - testcolor[1][2]) < 8)) { ++ /* both colors are so close they might get encoded as the same 16bit values */ ++ GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1; ++ ++ coldiffred = abs(testcolor[0][0] - testcolor[1][0]); ++ coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]); ++ coldiffblue = abs(testcolor[0][2] - testcolor[1][2]); ++ coldiffmax = coldiffred; ++ if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen; ++ if (coldiffmax < coldiffblue) coldiffmax = coldiffblue; ++ if (coldiffmax > 0) { ++ if (coldiffmax > 4) factor = 2; ++ else if (coldiffmax > 2) factor = 3; ++ else factor = 4; ++ /* Won't do much if the color value is near 255... */ ++ /* argh so many ifs */ ++ if (testcolor[1][1] >= testcolor[0][1]) { ++ ind1 = 1; ind0 = 0; ++ } ++ else { ++ ind1 = 0; ind0 = 1; ++ } ++ if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255) ++ testcolor[ind1][1] += factor * coldiffgreen; ++ else testcolor[ind1][1] = 255; ++ if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) { ++ if ((testcolor[ind1][0] + factor * coldiffred) <= 255) ++ testcolor[ind1][0] += factor * coldiffred; ++ else testcolor[ind1][0] = 255; ++ } ++ else { ++ if ((testcolor[ind0][0] + factor * coldiffred) <= 255) ++ testcolor[ind0][0] += factor * coldiffred; ++ else testcolor[ind0][0] = 255; ++ } ++ if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) { ++ if ((testcolor[ind1][2] + factor * coldiffblue) <= 255) ++ testcolor[ind1][2] += factor * coldiffblue; ++ else testcolor[ind1][2] = 255; ++ } ++ else { ++ if ((testcolor[ind0][2] + factor * coldiffblue) <= 255) ++ testcolor[ind0][2] += factor * coldiffblue; ++ else testcolor[ind0][2] = 255; ++ } ++ } ++ } ++ ++ if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) < ++ ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) { ++ for (i = 0; i < 3; i++) { ++ bestcolor[0][i] = testcolor[0][i]; ++ bestcolor[1][i] = testcolor[1][i]; ++ } ++ } ++ else { ++ for (i = 0; i < 3; i++) { ++ bestcolor[0][i] = testcolor[1][i]; ++ bestcolor[1][i] = testcolor[0][i]; ++ } ++ } ++ ++/* fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n", ++ bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/ ++} ++ ++ ++ ++static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2], ++ GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha) ++{ ++ /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */ ++ ++ GLint i, j, colors; ++ GLuint testerror, testerror2, pixerror, pixerrorbest; ++ GLint colordist; ++ GLushort color0, color1, tempcolor; ++ GLuint bits = 0, bits2 = 0; ++ GLubyte *colorptr; ++ GLubyte enc = 0; ++ GLubyte cv[4][4]; ++ ++ bestcolor[0][0] = bestcolor[0][0] & 0xf8; ++ bestcolor[0][1] = bestcolor[0][1] & 0xfc; ++ bestcolor[0][2] = bestcolor[0][2] & 0xf8; ++ bestcolor[1][0] = bestcolor[1][0] & 0xf8; ++ bestcolor[1][1] = bestcolor[1][1] & 0xfc; ++ bestcolor[1][2] = bestcolor[1][2] & 0xf8; ++ ++ color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3; ++ color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3; ++ if (color0 < color1) { ++ tempcolor = color0; color0 = color1; color1 = tempcolor; ++ colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr; ++ } ++ ++ ++ for (i = 0; i < 3; i++) { ++ cv[0][i] = bestcolor[0][i]; ++ cv[1][i] = bestcolor[1][i]; ++ cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3; ++ cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3; ++ } ++ ++ testerror = 0; ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ pixerrorbest = 0xffffffff; ++ for (colors = 0; colors < 4; colors++) { ++ colordist = srccolors[j][i][0] - cv[colors][0]; ++ pixerror = colordist * colordist * REDWEIGHT; ++ colordist = srccolors[j][i][1] - cv[colors][1]; ++ pixerror += colordist * colordist * GREENWEIGHT; ++ colordist = srccolors[j][i][2] - cv[colors][2]; ++ pixerror += colordist * colordist * BLUEWEIGHT; ++ if (pixerror < pixerrorbest) { ++ pixerrorbest = pixerror; ++ enc = colors; ++ } ++ } ++ testerror += pixerrorbest; ++ bits |= enc << (2 * (j * 4 + i)); ++ } ++ } ++ /* some hw might disagree but actually decoding should always use 4-color encoding ++ for non-dxt1 formats */ ++ if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) { ++ for (i = 0; i < 3; i++) { ++ cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2; ++ /* this isn't used. Looks like the black color constant can only be used ++ with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees, ++ it will decode 3 to black even with DXT3/5), and due to how the color searching works ++ it won't get used even then */ ++ cv[3][i] = 0; ++ } ++ testerror2 = 0; ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ pixerrorbest = 0xffffffff; ++ if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) { ++ enc = 3; ++ pixerrorbest = 0; /* don't calculate error */ ++ } ++ else { ++ /* we're calculating the same what we have done already for colors 0-1 above... */ ++ for (colors = 0; colors < 3; colors++) { ++ colordist = srccolors[j][i][0] - cv[colors][0]; ++ pixerror = colordist * colordist * REDWEIGHT; ++ colordist = srccolors[j][i][1] - cv[colors][1]; ++ pixerror += colordist * colordist * GREENWEIGHT; ++ colordist = srccolors[j][i][2] - cv[colors][2]; ++ pixerror += colordist * colordist * BLUEWEIGHT; ++ if (pixerror < pixerrorbest) { ++ pixerrorbest = pixerror; ++ /* need to exchange colors later */ ++ if (colors > 1) enc = colors; ++ else enc = colors ^ 1; ++ } ++ } ++ } ++ testerror2 += pixerrorbest; ++ bits2 |= enc << (2 * (j * 4 + i)); ++ } ++ } ++ } else { ++ testerror2 = 0xffffffff; ++ } ++ ++ /* finally we're finished, write back colors and bits */ ++ if ((testerror > testerror2) || (haveAlpha)) { ++ *blkaddr++ = color1 & 0xff; ++ *blkaddr++ = color1 >> 8; ++ *blkaddr++ = color0 & 0xff; ++ *blkaddr++ = color0 >> 8; ++ *blkaddr++ = bits2 & 0xff; ++ *blkaddr++ = ( bits2 >> 8) & 0xff; ++ *blkaddr++ = ( bits2 >> 16) & 0xff; ++ *blkaddr = bits2 >> 24; ++ } ++ else { ++ *blkaddr++ = color0 & 0xff; ++ *blkaddr++ = color0 >> 8; ++ *blkaddr++ = color1 & 0xff; ++ *blkaddr++ = color1 >> 8; ++ *blkaddr++ = bits & 0xff; ++ *blkaddr++ = ( bits >> 8) & 0xff; ++ *blkaddr++ = ( bits >> 16) & 0xff; ++ *blkaddr = bits >> 24; ++ } ++} ++ ++static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4], ++ GLint numxpixels, GLint numypixels, GLuint type ) ++{ ++/* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color ++ present in the picture as base colors */ ++ ++ /* define lowest and highest color as shortest and longest vector to 0/0/0, though the ++ vectors are weighted similar to their importance in rgb-luminance conversion ++ doesn't work too well though... ++ This seems to be a rather difficult problem */ ++ ++ GLubyte *bestcolor[2]; ++ GLubyte basecolors[2][3]; ++ GLubyte i, j; ++ GLuint lowcv, highcv, testcv; ++ GLboolean haveAlpha = GL_FALSE; ++ ++ lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT + ++ srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT + ++ srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT; ++ bestcolor[0] = bestcolor[1] = srccolors[0][0]; ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ /* don't use this as a base color if the pixel will get black/transparent anyway */ ++ if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) { ++ testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT + ++ srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT + ++ srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT; ++ if (testcv > highcv) { ++ highcv = testcv; ++ bestcolor[1] = srccolors[j][i]; ++ } ++ else if (testcv < lowcv) { ++ lowcv = testcv; ++ bestcolor[0] = srccolors[j][i]; ++ } ++ } ++ else haveAlpha = GL_TRUE; ++ } ++ } ++ /* make sure the original color values won't get touched... */ ++ for (j = 0; j < 2; j++) { ++ for (i = 0; i < 3; i++) { ++ basecolors[j][i] = bestcolor[j][i]; ++ } ++ } ++ bestcolor[0] = basecolors[0]; ++ bestcolor[1] = basecolors[1]; ++ ++ /* try to find better base colors */ ++ fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha); ++ /* find the best encoding for these colors, and store the result */ ++ storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha); ++} ++ ++static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2, ++ GLubyte alphaenc[16]) ++{ ++ *blkaddr++ = alphabase1; ++ *blkaddr++ = alphabase2; ++ *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6); ++ *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7); ++ *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5); ++ *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6); ++ *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7); ++ *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5); ++} ++ ++static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4], ++ GLint numxpixels, GLint numypixels) ++{ ++ GLubyte alphabase[2], alphause[2]; ++ GLshort alphatest[2]; ++ GLuint alphablockerror1, alphablockerror2, alphablockerror3; ++ GLubyte i, j, aindex, acutValues[7]; ++ GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16]; ++ GLboolean alphaabsmin = GL_FALSE; ++ GLboolean alphaabsmax = GL_FALSE; ++ GLshort alphadist; ++ ++ /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */ ++ alphabase[0] = 0xff; alphabase[1] = 0x0; ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ if (srccolors[j][i][3] == 0) ++ alphaabsmin = GL_TRUE; ++ else if (srccolors[j][i][3] == 255) ++ alphaabsmax = GL_TRUE; ++ else { ++ if (srccolors[j][i][3] > alphabase[1]) ++ alphabase[1] = srccolors[j][i][3]; ++ if (srccolors[j][i][3] < alphabase[0]) ++ alphabase[0] = srccolors[j][i][3]; ++ } ++ } ++ } ++ ++ ++ if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */ ++ /* shortcut here since it is a very common case (and also avoids later problems) */ ++ /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */ ++ /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */ ++ ++ *blkaddr++ = srccolors[0][0][3]; ++ blkaddr++; ++ *blkaddr++ = 0; ++ *blkaddr++ = 0; ++ *blkaddr++ = 0; ++ *blkaddr++ = 0; ++ *blkaddr++ = 0; ++ *blkaddr++ = 0; ++/* fprintf(stderr, "enc0 used\n");*/ ++ return; ++ } ++ ++ /* find best encoding for alpha0 > alpha1 */ ++ /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */ ++ alphablockerror1 = 0x0; ++ alphablockerror2 = 0xffffffff; ++ alphablockerror3 = 0xffffffff; ++ if (alphaabsmin) alphause[0] = 0; ++ else alphause[0] = alphabase[0]; ++ if (alphaabsmax) alphause[1] = 255; ++ else alphause[1] = alphabase[1]; ++ /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */ ++ for (aindex = 0; aindex < 7; aindex++) { ++ /* don't forget here is always rounded down */ ++ acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14; ++ } ++ ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ /* maybe it's overkill to have the most complicated calculation just for the error ++ calculation which we only need to figure out if encoding1 or encoding2 is better... */ ++ if (srccolors[j][i][3] > acutValues[0]) { ++ alphaenc1[4*j + i] = 0; ++ alphadist = srccolors[j][i][3] - alphause[1]; ++ } ++ else if (srccolors[j][i][3] > acutValues[1]) { ++ alphaenc1[4*j + i] = 2; ++ alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7; ++ } ++ else if (srccolors[j][i][3] > acutValues[2]) { ++ alphaenc1[4*j + i] = 3; ++ alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7; ++ } ++ else if (srccolors[j][i][3] > acutValues[3]) { ++ alphaenc1[4*j + i] = 4; ++ alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7; ++ } ++ else if (srccolors[j][i][3] > acutValues[4]) { ++ alphaenc1[4*j + i] = 5; ++ alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7; ++ } ++ else if (srccolors[j][i][3] > acutValues[5]) { ++ alphaenc1[4*j + i] = 6; ++ alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7; ++ } ++ else if (srccolors[j][i][3] > acutValues[6]) { ++ alphaenc1[4*j + i] = 7; ++ alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7; ++ } ++ else { ++ alphaenc1[4*j + i] = 1; ++ alphadist = srccolors[j][i][3] - alphause[0]; ++ } ++ alphablockerror1 += alphadist * alphadist; ++ } ++ } ++/* for (i = 0; i < 16; i++) { ++ fprintf(stderr, "%d ", alphaenc1[i]); ++ } ++ fprintf(stderr, "cutVals "); ++ for (i = 0; i < 8; i++) { ++ fprintf(stderr, "%d ", acutValues[i]); ++ } ++ fprintf(stderr, "srcVals "); ++ for (j = 0; j < numypixels; j++) ++ for (i = 0; i < numxpixels; i++) { ++ fprintf(stderr, "%d ", srccolors[j][i][3]); ++ } ++ ++ fprintf(stderr, "\n"); ++ }*/ ++ /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax ++ are false but try it anyway */ ++ if (alphablockerror1 >= 32) { ++ ++ /* don't bother if encoding is already very good, this condition should also imply ++ we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */ ++ alphablockerror2 = 0; ++ for (aindex = 0; aindex < 5; aindex++) { ++ /* don't forget here is always rounded down */ ++ acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10; ++ } ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ /* maybe it's overkill to have the most complicated calculation just for the error ++ calculation which we only need to figure out if encoding1 or encoding2 is better... */ ++ if (srccolors[j][i][3] == 0) { ++ alphaenc2[4*j + i] = 6; ++ alphadist = 0; ++ } ++ else if (srccolors[j][i][3] == 255) { ++ alphaenc2[4*j + i] = 7; ++ alphadist = 0; ++ } ++ else if (srccolors[j][i][3] <= acutValues[0]) { ++ alphaenc2[4*j + i] = 0; ++ alphadist = srccolors[j][i][3] - alphabase[0]; ++ } ++ else if (srccolors[j][i][3] <= acutValues[1]) { ++ alphaenc2[4*j + i] = 2; ++ alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5; ++ } ++ else if (srccolors[j][i][3] <= acutValues[2]) { ++ alphaenc2[4*j + i] = 3; ++ alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5; ++ } ++ else if (srccolors[j][i][3] <= acutValues[3]) { ++ alphaenc2[4*j + i] = 4; ++ alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5; ++ } ++ else if (srccolors[j][i][3] <= acutValues[4]) { ++ alphaenc2[4*j + i] = 5; ++ alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5; ++ } ++ else { ++ alphaenc2[4*j + i] = 1; ++ alphadist = srccolors[j][i][3] - alphabase[1]; ++ } ++ alphablockerror2 += alphadist * alphadist; ++ } ++ } ++ ++ ++ /* skip this if the error is already very small ++ this encoding is MUCH better on average than #2 though, but expensive! */ ++ if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) { ++ GLshort blockerrlin1 = 0; ++ GLshort blockerrlin2 = 0; ++ GLubyte nralphainrangelow = 0; ++ GLubyte nralphainrangehigh = 0; ++ alphatest[0] = 0xff; ++ alphatest[1] = 0x0; ++ /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */ ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28))) ++ alphatest[1] = srccolors[j][i][3]; ++ if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28)) ++ alphatest[0] = srccolors[j][i][3]; ++ } ++ } ++ /* shouldn't happen too often, don't really care about those degenerated cases */ ++ if (alphatest[1] <= alphatest[0]) { ++ alphatest[0] = 1; ++ alphatest[1] = 254; ++/* fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/ ++ } ++ for (aindex = 0; aindex < 5; aindex++) { ++ /* don't forget here is always rounded down */ ++ acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10; ++ } ++ ++ /* find the "average" difference between the alpha values and the next encoded value. ++ This is then used to calculate new base values. ++ Should there be some weighting, i.e. those values closer to alphatest[x] have more weight, ++ since they will see more improvement, and also because the values in the middle are somewhat ++ likely to get no improvement at all (because the base values might move in different directions)? ++ OTOH it would mean the values in the middle are even less likely to get an improvement ++ */ ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ if (srccolors[j][i][3] <= alphatest[0] / 2) { ++ } ++ else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) { ++ } ++ else if (srccolors[j][i][3] <= acutValues[0]) { ++ blockerrlin1 += (srccolors[j][i][3] - alphatest[0]); ++ nralphainrangelow += 1; ++ } ++ else if (srccolors[j][i][3] <= acutValues[1]) { ++ blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5); ++ blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5); ++ nralphainrangelow += 1; ++ nralphainrangehigh += 1; ++ } ++ else if (srccolors[j][i][3] <= acutValues[2]) { ++ blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5); ++ blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5); ++ nralphainrangelow += 1; ++ nralphainrangehigh += 1; ++ } ++ else if (srccolors[j][i][3] <= acutValues[3]) { ++ blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5); ++ blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5); ++ nralphainrangelow += 1; ++ nralphainrangehigh += 1; ++ } ++ else if (srccolors[j][i][3] <= acutValues[4]) { ++ blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5); ++ blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5); ++ nralphainrangelow += 1; ++ nralphainrangehigh += 1; ++ } ++ else { ++ blockerrlin2 += (srccolors[j][i][3] - alphatest[1]); ++ nralphainrangehigh += 1; ++ } ++ } ++ } ++ /* shouldn't happen often, needed to avoid div by zero */ ++ if (nralphainrangelow == 0) nralphainrangelow = 1; ++ if (nralphainrangehigh == 0) nralphainrangehigh = 1; ++ alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow); ++/* fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow); ++ fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/ ++ /* again shouldn't really happen often... */ ++ if (alphatest[0] < 0) { ++ alphatest[0] = 0; ++/* fprintf(stderr, "adj alpha base val to 0\n");*/ ++ } ++ alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh); ++ if (alphatest[1] > 255) { ++ alphatest[1] = 255; ++/* fprintf(stderr, "adj alpha base val to 255\n");*/ ++ } ++ ++ alphablockerror3 = 0; ++ for (aindex = 0; aindex < 5; aindex++) { ++ /* don't forget here is always rounded down */ ++ acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10; ++ } ++ for (j = 0; j < numypixels; j++) { ++ for (i = 0; i < numxpixels; i++) { ++ /* maybe it's overkill to have the most complicated calculation just for the error ++ calculation which we only need to figure out if encoding1 or encoding2 is better... */ ++ if (srccolors[j][i][3] <= alphatest[0] / 2) { ++ alphaenc3[4*j + i] = 6; ++ alphadist = srccolors[j][i][3]; ++ } ++ else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) { ++ alphaenc3[4*j + i] = 7; ++ alphadist = 255 - srccolors[j][i][3]; ++ } ++ else if (srccolors[j][i][3] <= acutValues[0]) { ++ alphaenc3[4*j + i] = 0; ++ alphadist = srccolors[j][i][3] - alphatest[0]; ++ } ++ else if (srccolors[j][i][3] <= acutValues[1]) { ++ alphaenc3[4*j + i] = 2; ++ alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5; ++ } ++ else if (srccolors[j][i][3] <= acutValues[2]) { ++ alphaenc3[4*j + i] = 3; ++ alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5; ++ } ++ else if (srccolors[j][i][3] <= acutValues[3]) { ++ alphaenc3[4*j + i] = 4; ++ alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5; ++ } ++ else if (srccolors[j][i][3] <= acutValues[4]) { ++ alphaenc3[4*j + i] = 5; ++ alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5; ++ } ++ else { ++ alphaenc3[4*j + i] = 1; ++ alphadist = srccolors[j][i][3] - alphatest[1]; ++ } ++ alphablockerror3 += alphadist * alphadist; ++ } ++ } ++ } ++ } ++ /* write the alpha values and encoding back. */ ++ if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) { ++/* if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/ ++ writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 ); ++ } ++ else if (alphablockerror2 <= alphablockerror3) { ++/* if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/ ++ writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 ); ++ } ++ else { ++/* fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/ ++ writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 ); ++ } ++} ++ ++static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr, ++ GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps) ++{ ++ GLubyte i, j, c; ++ const GLchan *curaddr; ++ for (j = 0; j < numypixels; j++) { ++ curaddr = srcaddr + j * srcRowStride * comps; ++ for (i = 0; i < numxpixels; i++) { ++ for (c = 0; c < comps; c++) { ++ srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255); ++ } ++ } ++ } ++} ++ ++ ++static void tx_compress_dxtn(GLint srccomps, GLint width, GLint height, const GLubyte *srcPixData, ++ GLenum destFormat, GLubyte *dest, GLint dstRowStride) ++{ ++ GLubyte *blkaddr = dest; ++ GLubyte srcpixels[4][4][4]; ++ const GLchan *srcaddr = srcPixData; ++ GLint numxpixels, numypixels; ++ GLint i, j; ++ GLint dstRowDiff; ++ ++ switch (destFormat) { ++ case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: ++ case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: ++ /* hmm we used to get called without dstRowStride... */ ++ dstRowDiff = dstRowStride >= (width * 2) ? dstRowStride - (((width + 3) & ~3) * 2) : 0; ++/* fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n", ++ width, height, dstRowStride); */ ++ for (j = 0; j < height; j += 4) { ++ if (height > j + 3) numypixels = 4; ++ else numypixels = height - j; ++ srcaddr = srcPixData + j * width * srccomps; ++ for (i = 0; i < width; i += 4) { ++ if (width > i + 3) numxpixels = 4; ++ else numxpixels = width - i; ++ extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps); ++ encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat); ++ srcaddr += srccomps * numxpixels; ++ blkaddr += 8; ++ } ++ blkaddr += dstRowDiff; ++ } ++ break; ++ case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: ++ dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0; ++/* fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n", ++ width, height, dstRowStride); */ ++ for (j = 0; j < height; j += 4) { ++ if (height > j + 3) numypixels = 4; ++ else numypixels = height - j; ++ srcaddr = srcPixData + j * width * srccomps; ++ for (i = 0; i < width; i += 4) { ++ if (width > i + 3) numxpixels = 4; ++ else numxpixels = width - i; ++ extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps); ++ *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0); ++ *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0); ++ *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0); ++ *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0); ++ *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0); ++ *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0); ++ *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0); ++ *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0); ++ encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat); ++ srcaddr += srccomps * numxpixels; ++ blkaddr += 8; ++ } ++ blkaddr += dstRowDiff; ++ } ++ break; ++ case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: ++ dstRowDiff = dstRowStride >= (width * 4) ? dstRowStride - (((width + 3) & ~3) * 4) : 0; ++/* fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n", ++ width, height, dstRowStride); */ ++ for (j = 0; j < height; j += 4) { ++ if (height > j + 3) numypixels = 4; ++ else numypixels = height - j; ++ srcaddr = srcPixData + j * width * srccomps; ++ for (i = 0; i < width; i += 4) { ++ if (width > i + 3) numxpixels = 4; ++ else numxpixels = width - i; ++ extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps); ++ encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels); ++ encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat); ++ srcaddr += srccomps * numxpixels; ++ blkaddr += 16; ++ } ++ blkaddr += dstRowDiff; ++ } ++ break; ++ default: ++ assert(false); ++ return; ++ } ++} +diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c +index baa3988f0a..3f8e7a49a2 100644 +--- a/src/mesa/main/texformat.c ++++ b/src/mesa/main/texformat.c +@@ -249,9 +249,7 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, + * 1D ARRAY textures in S3TC format. + */ + if (target != GL_TEXTURE_1D && target != GL_TEXTURE_1D_ARRAY) { +- if (ctx->Mesa_DXTn) +- RETURN_IF_SUPPORTED(MESA_FORMAT_RGB_DXT1); +- RETURN_IF_SUPPORTED(MESA_FORMAT_RGB_FXT1); ++ RETURN_IF_SUPPORTED(MESA_FORMAT_RGB_DXT1); + } + RETURN_IF_SUPPORTED(MESA_FORMAT_BGR_UNORM8); + RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8X8_UNORM); +@@ -260,9 +258,7 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, + case GL_COMPRESSED_RGBA_ARB: + /* We don't use texture compression for 1D and 1D array textures. */ + if (target != GL_TEXTURE_1D && target != GL_TEXTURE_1D_ARRAY) { +- if (ctx->Mesa_DXTn) +- RETURN_IF_SUPPORTED(MESA_FORMAT_RGBA_DXT3); /* Not rgba_dxt1, see spec */ +- RETURN_IF_SUPPORTED(MESA_FORMAT_RGBA_FXT1); ++ RETURN_IF_SUPPORTED(MESA_FORMAT_RGBA_DXT3); /* Not rgba_dxt1, see spec */ + } + RETURN_IF_SUPPORTED(MESA_FORMAT_A8B8G8R8_UNORM); + RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_UNORM); +@@ -502,15 +498,13 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, + RETURN_IF_SUPPORTED(MESA_FORMAT_A8R8G8B8_SRGB); + break; + case GL_COMPRESSED_SRGB_EXT: +- if (ctx->Mesa_DXTn) +- RETURN_IF_SUPPORTED(MESA_FORMAT_SRGB_DXT1); ++ RETURN_IF_SUPPORTED(MESA_FORMAT_SRGB_DXT1); + RETURN_IF_SUPPORTED(MESA_FORMAT_BGR_SRGB8); + RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_SRGB); + RETURN_IF_SUPPORTED(MESA_FORMAT_A8R8G8B8_SRGB); + break; + case GL_COMPRESSED_SRGB_ALPHA_EXT: +- if (ctx->Mesa_DXTn) +- RETURN_IF_SUPPORTED(MESA_FORMAT_SRGBA_DXT3); /* Not srgba_dxt1, see spec */ ++ RETURN_IF_SUPPORTED(MESA_FORMAT_SRGBA_DXT3); /* Not srgba_dxt1, see spec */ + RETURN_IF_SUPPORTED(MESA_FORMAT_A8B8G8R8_SRGB); + RETURN_IF_SUPPORTED(MESA_FORMAT_B8G8R8A8_SRGB); + RETURN_IF_SUPPORTED(MESA_FORMAT_A8R8G8B8_SRGB); +diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c +index 5509d808cd..1d6da393c2 100644 +--- a/src/mesa/main/teximage.c ++++ b/src/mesa/main/teximage.c +@@ -2765,38 +2765,6 @@ _mesa_choose_texture_format(struct gl_context *ctx, + } + } + +- /* If the application requested compression to an S3TC format but we don't +- * have the DXTn library, force a generic compressed format instead. +- */ +- if (internalFormat != format && format != GL_NONE) { +- const GLenum before = internalFormat; +- +- switch (internalFormat) { +- case GL_COMPRESSED_RGB_S3TC_DXT1_EXT: +- if (!ctx->Mesa_DXTn) +- internalFormat = GL_COMPRESSED_RGB; +- break; +- case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT: +- case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT: +- case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT: +- if (!ctx->Mesa_DXTn) +- internalFormat = GL_COMPRESSED_RGBA; +- break; +- default: +- break; +- } +- +- if (before != internalFormat) { +- _mesa_warning(ctx, +- "DXT compression requested (%s), " +- "but libtxc_dxtn library not installed. Using %s " +- "instead.", +- _mesa_enum_to_string(before), +- _mesa_enum_to_string(internalFormat)); +- } +- } +- +- /* choose format from scratch */ + f = ctx->Driver.ChooseTextureFormat(ctx, target, internalFormat, + format, type); + assert(f != MESA_FORMAT_NONE); +diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c +index 381ff9dae0..b96313e2aa 100644 +--- a/src/mesa/state_tracker/st_context.c ++++ b/src/mesa/state_tracker/st_context.c +@@ -405,7 +405,7 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe, + /* GL limits and extensions */ + st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions); + st_init_extensions(pipe->screen, &ctx->Const, +- &ctx->Extensions, &st->options, ctx->Mesa_DXTn); ++ &ctx->Extensions, &st->options); + + if (st_have_perfmon(st)) { + ctx->Extensions.AMD_performance_monitor = GL_TRUE; +diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c +index 74193cc492..65b6bd34aa 100644 +--- a/src/mesa/state_tracker/st_extensions.c ++++ b/src/mesa/state_tracker/st_extensions.c +@@ -569,8 +569,7 @@ get_max_samples_for_formats(struct pipe_screen *screen, + void st_init_extensions(struct pipe_screen *screen, + struct gl_constants *consts, + struct gl_extensions *extensions, +- struct st_config_options *options, +- boolean has_lib_dxtc) ++ struct st_config_options *options) + { + unsigned i; + GLboolean *extension_table = (GLboolean *) extensions; +@@ -960,11 +959,6 @@ void st_init_extensions(struct pipe_screen *screen, + + /* Below are the cases which cannot be moved into tables easily. */ + +- if (!has_lib_dxtc && !options->force_s3tc_enable) { +- extensions->EXT_texture_compression_s3tc = GL_FALSE; +- extensions->ANGLE_texture_compression_dxt = GL_FALSE; +- } +- + if (screen->get_shader_param(screen, PIPE_SHADER_TESS_CTRL, + PIPE_SHADER_CAP_MAX_INSTRUCTIONS) > 0) { + extensions->ARB_tessellation_shader = GL_TRUE; +diff --git a/src/mesa/state_tracker/st_extensions.h b/src/mesa/state_tracker/st_extensions.h +index faff11fd5d..951185caa3 100644 +--- a/src/mesa/state_tracker/st_extensions.h ++++ b/src/mesa/state_tracker/st_extensions.h +@@ -40,8 +40,7 @@ extern void st_init_limits(struct pipe_screen *screen, + extern void st_init_extensions(struct pipe_screen *screen, + struct gl_constants *consts, + struct gl_extensions *extensions, +- struct st_config_options *options, +- boolean has_lib_dxtc); ++ struct st_config_options *options); + + + #endif /* ST_EXTENSIONS_H */ +diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c +index 012f1a4015..4d0b4265b7 100644 +--- a/src/mesa/state_tracker/st_format.c ++++ b/src/mesa/state_tracker/st_format.c +@@ -2275,13 +2275,13 @@ st_ChooseTextureFormat(struct gl_context *ctx, GLenum target, + } + + pFormat = st_choose_format(st, internalFormat, format, type, +- pTarget, 0, bindings, ctx->Mesa_DXTn); ++ pTarget, 0, bindings, GL_TRUE); + + if (pFormat == PIPE_FORMAT_NONE && !is_renderbuffer) { + /* try choosing format again, this time without render target bindings */ + pFormat = st_choose_format(st, internalFormat, format, type, + pTarget, 0, PIPE_BIND_SAMPLER_VIEW, +- ctx->Mesa_DXTn); ++ GL_TRUE); + } + + if (pFormat == PIPE_FORMAT_NONE) { +diff --git a/src/mesa/state_tracker/st_manager.c b/src/mesa/state_tracker/st_manager.c +index 5ad0816184..999e1230cb 100644 +--- a/src/mesa/state_tracker/st_manager.c ++++ b/src/mesa/state_tracker/st_manager.c +@@ -1137,7 +1137,7 @@ get_version(struct pipe_screen *screen, + _mesa_init_extensions(&extensions); + + st_init_limits(screen, &consts, &extensions); +- st_init_extensions(screen, &consts, &extensions, options, GL_TRUE); ++ st_init_extensions(screen, &consts, &extensions, options); + + return _mesa_get_version(&extensions, &consts, api); + } +-- +2.13.5 + diff --git a/SOURCES/0002-anv-blorp-Add-a-device-parameter-to-blorp_surf_for_a.patch b/SOURCES/0002-anv-blorp-Add-a-device-parameter-to-blorp_surf_for_a.patch new file mode 100644 index 0000000..013e866 --- /dev/null +++ b/SOURCES/0002-anv-blorp-Add-a-device-parameter-to-blorp_surf_for_a.patch @@ -0,0 +1,182 @@ +From ee170635c5be54cf644ef5c8d4574f30764e244f Mon Sep 17 00:00:00 2001 +From: Jason Ekstrand +Date: Fri, 3 Nov 2017 15:18:45 -0700 +Subject: [PATCH 2/5] anv/blorp: Add a device parameter to + blorp_surf_for_anv_image + +Cc: "17.3" +Tested-by: Lyude Paul +Reviewed-by: Kenneth Graunke +Signed-off-by: Lyude +--- + src/intel/vulkan/anv_blorp.c | 54 ++++++++++++++++++++++++++++---------------- + 1 file changed, 34 insertions(+), 20 deletions(-) + +diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c +index 79f5234c55..c00d38b52c 100644 +--- a/src/intel/vulkan/anv_blorp.c ++++ b/src/intel/vulkan/anv_blorp.c +@@ -176,7 +176,8 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device, + } + + static void +-get_blorp_surf_for_anv_image(const struct anv_image *image, ++get_blorp_surf_for_anv_image(const struct anv_device *device, ++ const struct anv_image *image, + VkImageAspectFlags aspect, + enum isl_aux_usage aux_usage, + struct blorp_surf *blorp_surf) +@@ -257,9 +258,11 @@ void anv_CmdCopyImage( + VkImageAspectFlagBits aspect = (1 << a); + + struct blorp_surf src_surf, dst_surf; +- get_blorp_surf_for_anv_image(src_image, aspect, src_image->aux_usage, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, ++ src_image, aspect, src_image->aux_usage, + &src_surf); +- get_blorp_surf_for_anv_image(dst_image, aspect, dst_image->aux_usage, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, ++ dst_image, aspect, dst_image->aux_usage, + &dst_surf); + + for (unsigned i = 0; i < layer_count; i++) { +@@ -308,8 +311,8 @@ copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer, + for (unsigned r = 0; r < regionCount; r++) { + const VkImageAspectFlags aspect = pRegions[r].imageSubresource.aspectMask; + +- get_blorp_surf_for_anv_image(anv_image, aspect, anv_image->aux_usage, +- &image.surf); ++ get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect, ++ anv_image->aux_usage, &image.surf); + image.offset = + anv_sanitize_image_offset(anv_image->type, pRegions[r].imageOffset); + image.level = pRegions[r].imageSubresource.mipLevel; +@@ -454,9 +457,11 @@ void anv_CmdBlitImage( + const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource; + const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource; + +- get_blorp_surf_for_anv_image(src_image, src_res->aspectMask, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, ++ src_image, src_res->aspectMask, + src_image->aux_usage, &src); +- get_blorp_surf_for_anv_image(dst_image, dst_res->aspectMask, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, ++ dst_image, dst_res->aspectMask, + dst_image->aux_usage, &dst); + + struct anv_format src_format = +@@ -832,7 +837,8 @@ void anv_CmdClearColorImage( + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + + struct blorp_surf surf; +- get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, ++ image, VK_IMAGE_ASPECT_COLOR_BIT, + image->aux_usage, &surf); + + for (unsigned r = 0; r < rangeCount; r++) { +@@ -885,14 +891,16 @@ void anv_CmdClearDepthStencilImage( + + struct blorp_surf depth, stencil; + if (image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT) { +- get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, ++ image, VK_IMAGE_ASPECT_DEPTH_BIT, + ISL_AUX_USAGE_NONE, &depth); + } else { + memset(&depth, 0, sizeof(depth)); + } + + if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { +- get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_STENCIL_BIT, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, ++ image, VK_IMAGE_ASPECT_STENCIL_BIT, + ISL_AUX_USAGE_NONE, &stencil); + } else { + memset(&stencil, 0, sizeof(stencil)); +@@ -1212,7 +1220,8 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) + struct anv_image_view *iview = fb->attachments[a]; + const struct anv_image *image = iview->image; + struct blorp_surf surf; +- get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, image, ++ VK_IMAGE_ASPECT_COLOR_BIT, + att_state->aux_usage, &surf); + + if (att_state->fast_clear) { +@@ -1359,7 +1368,8 @@ anv_cmd_buffer_clear_subpass(struct anv_cmd_buffer *cmd_buffer) + } + + static void +-resolve_image(struct blorp_batch *batch, ++resolve_image(struct anv_device *device, ++ struct blorp_batch *batch, + const struct anv_image *src_image, + enum isl_aux_usage src_aux_usage, + uint32_t src_level, uint32_t src_layer, +@@ -1380,9 +1390,9 @@ resolve_image(struct blorp_batch *batch, + VkImageAspectFlagBits aspect = 1 << a; + + struct blorp_surf src_surf, dst_surf; +- get_blorp_surf_for_anv_image(src_image, aspect, ++ get_blorp_surf_for_anv_image(device, src_image, aspect, + src_aux_usage, &src_surf); +- get_blorp_surf_for_anv_image(dst_image, aspect, ++ get_blorp_surf_for_anv_image(device, dst_image, aspect, + dst_aux_usage, &dst_surf); + + blorp_blit(batch, +@@ -1422,7 +1432,7 @@ void anv_CmdResolveImage( + anv_get_layerCount(dst_image, &pRegions[r].dstSubresource); + + for (uint32_t layer = 0; layer < layer_count; layer++) { +- resolve_image(&batch, ++ resolve_image(cmd_buffer->device, &batch, + src_image, src_image->aux_usage, + pRegions[r].srcSubresource.mipLevel, + pRegions[r].srcSubresource.baseArrayLayer + layer, +@@ -1456,7 +1466,8 @@ anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer, + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + + struct blorp_surf surf; +- get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, image, ++ VK_IMAGE_ASPECT_COLOR_BIT, + image->aux_usage == ISL_AUX_USAGE_NONE ? + ISL_AUX_USAGE_CCS_D : image->aux_usage, + &surf); +@@ -1553,7 +1564,8 @@ anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer) + + assert(src_iview->aspect_mask == dst_iview->aspect_mask); + +- resolve_image(&batch, src_iview->image, src_aux_usage, ++ resolve_image(cmd_buffer->device, &batch, ++ src_iview->image, src_aux_usage, + src_iview->isl.base_level, + src_iview->isl.base_array_layer, + dst_iview->image, dst_aux_usage, +@@ -1590,8 +1602,9 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer, + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + + struct blorp_surf surf; +- get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_DEPTH_BIT, +- ISL_AUX_USAGE_NONE, &surf); ++ get_blorp_surf_for_anv_image(cmd_buffer->device, image, ++ VK_IMAGE_ASPECT_DEPTH_BIT, ISL_AUX_USAGE_NONE, ++ &surf); + + /* Manually add the aux HiZ surf */ + surf.aux_surf = &image->aux_surface.isl, +@@ -1634,7 +1647,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer, + BLORP_BATCH_PREDICATE_ENABLE); + + struct blorp_surf surf; +- get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT, ++ get_blorp_surf_for_anv_image(cmd_buffer->device, image, ++ VK_IMAGE_ASPECT_COLOR_BIT, + image->aux_usage == ISL_AUX_USAGE_CCS_E ? + ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_CCS_D, + &surf); +-- +2.14.3 + diff --git a/SOURCES/0003-blorp-Turn-anv_CmdCopyBuffer-into-a-blorp_buffer_cop.patch b/SOURCES/0003-blorp-Turn-anv_CmdCopyBuffer-into-a-blorp_buffer_cop.patch new file mode 100644 index 0000000..f38b276 --- /dev/null +++ b/SOURCES/0003-blorp-Turn-anv_CmdCopyBuffer-into-a-blorp_buffer_cop.patch @@ -0,0 +1,312 @@ +From d9266ae66c9db12a9c2578a33bbd5ebd131b489f Mon Sep 17 00:00:00 2001 +From: Kenneth Graunke +Date: Mon, 28 Aug 2017 15:57:20 -0700 +Subject: [PATCH 3/5] blorp: Turn anv_CmdCopyBuffer into a blorp_buffer_copy() + helper. + +I want to be able to copy between buffer objects using BLORP in the i965 +driver. Anvil already had code to do this, in a reasonably efficient +manner - first using large bpp copies, then smaller bpp copies. + +This patch moves that logic into BLORP as blorp_buffer_copy(), so we +can use it in both drivers. + +Reviewed-by: Jason Ekstrand +Signed-off-by: Lyude +--- + src/intel/blorp/blorp.h | 6 +++ + src/intel/blorp/blorp_blit.c | 119 +++++++++++++++++++++++++++++++++++++++++++ + src/intel/vulkan/anv_blorp.c | 117 +++++++----------------------------------- + 3 files changed, 143 insertions(+), 99 deletions(-) + +diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h +index d19920e87f..e712b4fbb3 100644 +--- a/src/intel/blorp/blorp.h ++++ b/src/intel/blorp/blorp.h +@@ -133,6 +133,12 @@ blorp_copy(struct blorp_batch *batch, + uint32_t dst_x, uint32_t dst_y, + uint32_t src_width, uint32_t src_height); + ++void ++blorp_buffer_copy(struct blorp_batch *batch, ++ struct blorp_address src, ++ struct blorp_address dst, ++ uint64_t size); ++ + void + blorp_fast_clear(struct blorp_batch *batch, + const struct blorp_surf *surf, enum isl_format format, +diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c +index 35008cbbb0..b012a0a0b3 100644 +--- a/src/intel/blorp/blorp_blit.c ++++ b/src/intel/blorp/blorp_blit.c +@@ -2513,3 +2513,122 @@ blorp_copy(struct blorp_batch *batch, + + do_blorp_blit(batch, ¶ms, &wm_prog_key, &coords); + } ++ ++static enum isl_format ++isl_format_for_size(unsigned size_B) ++{ ++ switch (size_B) { ++ case 1: return ISL_FORMAT_R8_UINT; ++ case 2: return ISL_FORMAT_R8G8_UINT; ++ case 4: return ISL_FORMAT_R8G8B8A8_UINT; ++ case 8: return ISL_FORMAT_R16G16B16A16_UINT; ++ case 16: return ISL_FORMAT_R32G32B32A32_UINT; ++ default: ++ unreachable("Not a power-of-two format size"); ++ } ++} ++ ++/** ++ * Returns the greatest common divisor of a and b that is a power of two. ++ */ ++static uint64_t ++gcd_pow2_u64(uint64_t a, uint64_t b) ++{ ++ assert(a > 0 || b > 0); ++ ++ unsigned a_log2 = ffsll(a) - 1; ++ unsigned b_log2 = ffsll(b) - 1; ++ ++ /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which ++ * case, the MIN2() will take the other one. If both are 0 then we will ++ * hit the assert above. ++ */ ++ return 1 << MIN2(a_log2, b_log2); ++} ++ ++static void ++do_buffer_copy(struct blorp_batch *batch, ++ struct blorp_address *src, ++ struct blorp_address *dst, ++ int width, int height, int block_size) ++{ ++ /* The actual format we pick doesn't matter as blorp will throw it away. ++ * The only thing that actually matters is the size. ++ */ ++ enum isl_format format = isl_format_for_size(block_size); ++ ++ UNUSED bool ok; ++ struct isl_surf surf; ++ ok = isl_surf_init(batch->blorp->isl_dev, &surf, ++ .dim = ISL_SURF_DIM_2D, ++ .format = format, ++ .width = width, ++ .height = height, ++ .depth = 1, ++ .levels = 1, ++ .array_len = 1, ++ .samples = 1, ++ .row_pitch = width * block_size, ++ .usage = ISL_SURF_USAGE_TEXTURE_BIT | ++ ISL_SURF_USAGE_RENDER_TARGET_BIT, ++ .tiling_flags = ISL_TILING_LINEAR_BIT); ++ assert(ok); ++ ++ struct blorp_surf src_blorp_surf = { ++ .surf = &surf, ++ .addr = *src, ++ }; ++ ++ struct blorp_surf dst_blorp_surf = { ++ .surf = &surf, ++ .addr = *dst, ++ }; ++ ++ blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0, ++ 0, 0, 0, 0, width, height); ++} ++ ++/* This is maximum possible width/height our HW can handle */ ++#define MAX_SURFACE_DIM (1ull << 14) ++ ++void ++blorp_buffer_copy(struct blorp_batch *batch, ++ struct blorp_address src, ++ struct blorp_address dst, ++ uint64_t size) ++{ ++ uint64_t copy_size = size; ++ ++ /* First, we compute the biggest format that can be used with the ++ * given offsets and size. ++ */ ++ int bs = 16; ++ bs = gcd_pow2_u64(bs, src.offset); ++ bs = gcd_pow2_u64(bs, dst.offset); ++ bs = gcd_pow2_u64(bs, size); ++ ++ /* First, we make a bunch of max-sized copies */ ++ uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; ++ while (copy_size >= max_copy_size) { ++ do_buffer_copy(batch, &src, &dst, MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs); ++ copy_size -= max_copy_size; ++ src.offset += max_copy_size; ++ dst.offset += max_copy_size; ++ } ++ ++ /* Now make a max-width copy */ ++ uint64_t height = copy_size / (MAX_SURFACE_DIM * bs); ++ assert(height < MAX_SURFACE_DIM); ++ if (height != 0) { ++ uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs; ++ do_buffer_copy(batch, &src, &dst, MAX_SURFACE_DIM, height, bs); ++ copy_size -= rect_copy_size; ++ src.offset += rect_copy_size; ++ dst.offset += rect_copy_size; ++ } ++ ++ /* Finally, make a small copy to finish it off */ ++ if (copy_size != 0) { ++ do_buffer_copy(batch, &src, &dst, copy_size / bs, 1, bs); ++ } ++} +diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c +index c00d38b52c..3a64b60178 100644 +--- a/src/intel/vulkan/anv_blorp.c ++++ b/src/intel/vulkan/anv_blorp.c +@@ -546,56 +546,6 @@ isl_format_for_size(unsigned size_B) + } + } + +-static void +-do_buffer_copy(struct blorp_batch *batch, +- struct anv_bo *src, uint64_t src_offset, +- struct anv_bo *dst, uint64_t dst_offset, +- int width, int height, int block_size) +-{ +- struct anv_device *device = batch->blorp->driver_ctx; +- +- /* The actual format we pick doesn't matter as blorp will throw it away. +- * The only thing that actually matters is the size. +- */ +- enum isl_format format = isl_format_for_size(block_size); +- +- UNUSED bool ok; +- struct isl_surf surf; +- ok = isl_surf_init(&device->isl_dev, &surf, +- .dim = ISL_SURF_DIM_2D, +- .format = format, +- .width = width, +- .height = height, +- .depth = 1, +- .levels = 1, +- .array_len = 1, +- .samples = 1, +- .row_pitch = width * block_size, +- .usage = ISL_SURF_USAGE_TEXTURE_BIT | +- ISL_SURF_USAGE_RENDER_TARGET_BIT, +- .tiling_flags = ISL_TILING_LINEAR_BIT); +- assert(ok); +- +- struct blorp_surf src_blorp_surf = { +- .surf = &surf, +- .addr = { +- .buffer = src, +- .offset = src_offset, +- }, +- }; +- +- struct blorp_surf dst_blorp_surf = { +- .surf = &surf, +- .addr = { +- .buffer = dst, +- .offset = dst_offset, +- }, +- }; +- +- blorp_copy(batch, &src_blorp_surf, 0, 0, &dst_blorp_surf, 0, 0, +- 0, 0, 0, 0, width, height); +-} +- + /** + * Returns the greatest common divisor of a and b that is a power of two. + */ +@@ -632,48 +582,16 @@ void anv_CmdCopyBuffer( + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0); + + for (unsigned r = 0; r < regionCount; r++) { +- uint64_t src_offset = src_buffer->offset + pRegions[r].srcOffset; +- uint64_t dst_offset = dst_buffer->offset + pRegions[r].dstOffset; +- uint64_t copy_size = pRegions[r].size; +- +- /* First, we compute the biggest format that can be used with the +- * given offsets and size. +- */ +- int bs = 16; +- bs = gcd_pow2_u64(bs, src_offset); +- bs = gcd_pow2_u64(bs, dst_offset); +- bs = gcd_pow2_u64(bs, pRegions[r].size); +- +- /* First, we make a bunch of max-sized copies */ +- uint64_t max_copy_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs; +- while (copy_size >= max_copy_size) { +- do_buffer_copy(&batch, src_buffer->bo, src_offset, +- dst_buffer->bo, dst_offset, +- MAX_SURFACE_DIM, MAX_SURFACE_DIM, bs); +- copy_size -= max_copy_size; +- src_offset += max_copy_size; +- dst_offset += max_copy_size; +- } +- +- /* Now make a max-width copy */ +- uint64_t height = copy_size / (MAX_SURFACE_DIM * bs); +- assert(height < MAX_SURFACE_DIM); +- if (height != 0) { +- uint64_t rect_copy_size = height * MAX_SURFACE_DIM * bs; +- do_buffer_copy(&batch, src_buffer->bo, src_offset, +- dst_buffer->bo, dst_offset, +- MAX_SURFACE_DIM, height, bs); +- copy_size -= rect_copy_size; +- src_offset += rect_copy_size; +- dst_offset += rect_copy_size; +- } ++ struct blorp_address src = { ++ .buffer = src_buffer->bo, ++ .offset = src_buffer->offset + pRegions[r].srcOffset, ++ }; ++ struct blorp_address dst = { ++ .buffer = dst_buffer->bo, ++ .offset = dst_buffer->offset + pRegions[r].dstOffset, ++ }; + +- /* Finally, make a small copy to finish it off */ +- if (copy_size != 0) { +- do_buffer_copy(&batch, src_buffer->bo, src_offset, +- dst_buffer->bo, dst_offset, +- copy_size / bs, 1, bs); +- } ++ blorp_buffer_copy(&batch, src, dst, pRegions[r].size); + } + + blorp_batch_finish(&batch); +@@ -715,15 +633,16 @@ void anv_CmdUpdateBuffer( + + anv_state_flush(cmd_buffer->device, tmp_data); + +- int bs = 16; +- bs = gcd_pow2_u64(bs, dstOffset); +- bs = gcd_pow2_u64(bs, copy_size); ++ struct blorp_address src = { ++ .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, ++ .offset = tmp_data.offset, ++ }; ++ struct blorp_address dst = { ++ .buffer = dst_buffer->bo, ++ .offset = dst_buffer->offset + dstOffset, ++ }; + +- do_buffer_copy(&batch, +- &cmd_buffer->device->dynamic_state_pool.block_pool.bo, +- tmp_data.offset, +- dst_buffer->bo, dst_buffer->offset + dstOffset, +- copy_size / bs, 1, bs); ++ blorp_buffer_copy(&batch, src, dst, copy_size); + + dataSize -= copy_size; + dstOffset += copy_size; +-- +2.14.3 + diff --git a/SOURCES/0004-intel-blorp-Make-the-MOCS-setting-part-of-blorp_addr.patch b/SOURCES/0004-intel-blorp-Make-the-MOCS-setting-part-of-blorp_addr.patch new file mode 100644 index 0000000..97073a2 --- /dev/null +++ b/SOURCES/0004-intel-blorp-Make-the-MOCS-setting-part-of-blorp_addr.patch @@ -0,0 +1,288 @@ +From f70d7f3f4600febac0a6d1f62e14230eace8a67b Mon Sep 17 00:00:00 2001 +From: Jason Ekstrand +Date: Fri, 3 Nov 2017 15:20:08 -0700 +Subject: [PATCH 4/5] intel/blorp: Make the MOCS setting part of blorp_address + +This makes our MOCS settings significantly more flexible. + +Cc: "17.3" +Tested-by: Lyude Paul +Reviewed-by: Kenneth Graunke +Signed-off-by: Lyude +--- + src/intel/blorp/blorp.h | 7 +------ + src/intel/blorp/blorp_genX_exec.h | 16 +++++++-------- + src/intel/vulkan/anv_blorp.c | 11 +++++++--- + src/intel/vulkan/genX_blorp_exec.c | 1 + + src/mesa/drivers/dri/i965/brw_blorp.c | 31 +++++++++++++++-------------- + src/mesa/drivers/dri/i965/genX_blorp_exec.c | 10 ++++++++++ + 6 files changed, 43 insertions(+), 33 deletions(-) + +diff --git a/src/intel/blorp/blorp.h b/src/intel/blorp/blorp.h +index e712b4fbb3..ac45828a42 100644 +--- a/src/intel/blorp/blorp.h ++++ b/src/intel/blorp/blorp.h +@@ -45,12 +45,6 @@ struct blorp_context { + + const struct brw_compiler *compiler; + +- struct { +- uint32_t tex; +- uint32_t rb; +- uint32_t vb; +- } mocs; +- + bool (*lookup_shader)(struct blorp_context *blorp, + const void *key, uint32_t key_size, + uint32_t *kernel_out, void *prog_data_out); +@@ -95,6 +89,7 @@ struct blorp_address { + uint32_t read_domains; + uint32_t write_domain; + uint32_t offset; ++ uint32_t mocs; + }; + + struct blorp_surf +diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h +index 565acca929..d0f0299d17 100644 +--- a/src/intel/blorp/blorp_genX_exec.h ++++ b/src/intel/blorp/blorp_genX_exec.h +@@ -269,7 +269,7 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch, + vb[0].VertexBufferIndex = 0; + vb[0].BufferPitch = 3 * sizeof(float); + #if GEN_GEN >= 6 +- vb[0].VertexBufferMOCS = batch->blorp->mocs.vb; ++ vb[0].VertexBufferMOCS = vb[0].BufferStartingAddress.mocs; + #endif + #if GEN_GEN >= 7 + vb[0].AddressModifyEnable = true; +@@ -290,7 +290,7 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch, + vb[1].VertexBufferIndex = 1; + vb[1].BufferPitch = 0; + #if GEN_GEN >= 6 +- vb[1].VertexBufferMOCS = batch->blorp->mocs.vb; ++ vb[1].VertexBufferMOCS = vb[1].BufferStartingAddress.mocs; + #endif + #if GEN_GEN >= 7 + vb[1].AddressModifyEnable = true; +@@ -1235,13 +1235,11 @@ blorp_emit_surface_state(struct blorp_batch *batch, + write_disable_mask |= ISL_CHANNEL_ALPHA_BIT; + } + +- const uint32_t mocs = +- is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex; +- + isl_surf_fill_state(batch->blorp->isl_dev, state, + .surf = &surf, .view = &surface->view, + .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, +- .mocs = mocs, .clear_color = surface->clear_color, ++ .mocs = surface->addr.mocs, ++ .clear_color = surface->clear_color, + .write_disables = write_disable_mask); + + blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, +@@ -1363,14 +1361,14 @@ blorp_emit_depth_stencil_config(struct blorp_batch *batch, + if (dw == NULL) + return; + +- struct isl_depth_stencil_hiz_emit_info info = { +- .mocs = batch->blorp->mocs.tex, +- }; ++ struct isl_depth_stencil_hiz_emit_info info = { }; + + if (params->depth.enabled) { + info.view = ¶ms->depth.view; ++ info.mocs = params->depth.addr.mocs; + } else if (params->stencil.enabled) { + info.view = ¶ms->stencil.view; ++ info.mocs = params->stencil.addr.mocs; + } + + if (params->depth.enabled) { +diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c +index 3a64b60178..b7e9524a24 100644 +--- a/src/intel/vulkan/anv_blorp.c ++++ b/src/intel/vulkan/anv_blorp.c +@@ -92,9 +92,6 @@ anv_device_init_blorp(struct anv_device *device) + anv_pipeline_cache_init(&device->blorp_shader_cache, device, true); + blorp_init(&device->blorp, device, &device->isl_dev); + device->blorp.compiler = device->instance->physicalDevice.compiler; +- device->blorp.mocs.tex = device->default_mocs; +- device->blorp.mocs.rb = device->default_mocs; +- device->blorp.mocs.vb = device->default_mocs; + device->blorp.lookup_shader = lookup_blorp_shader; + device->blorp.upload_shader = upload_blorp_shader; + switch (device->info.gen) { +@@ -156,6 +153,7 @@ get_blorp_surf_for_anv_buffer(struct anv_device *device, + .addr = { + .buffer = buffer->bo, + .offset = buffer->offset + offset, ++ .mocs = device->default_mocs, + }, + }; + +@@ -194,6 +192,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device, + .addr = { + .buffer = image->bo, + .offset = image->offset + surface->offset, ++ .mocs = device->default_mocs, + }, + }; + +@@ -202,6 +201,7 @@ get_blorp_surf_for_anv_image(const struct anv_device *device, + blorp_surf->aux_addr = (struct blorp_address) { + .buffer = image->bo, + .offset = image->offset + image->aux_surface.offset, ++ .mocs = device->default_mocs, + }; + blorp_surf->aux_usage = aux_usage; + } +@@ -585,10 +585,12 @@ void anv_CmdCopyBuffer( + struct blorp_address src = { + .buffer = src_buffer->bo, + .offset = src_buffer->offset + pRegions[r].srcOffset, ++ .mocs = cmd_buffer->device->default_mocs, + }; + struct blorp_address dst = { + .buffer = dst_buffer->bo, + .offset = dst_buffer->offset + pRegions[r].dstOffset, ++ .mocs = cmd_buffer->device->default_mocs, + }; + + blorp_buffer_copy(&batch, src, dst, pRegions[r].size); +@@ -636,10 +638,12 @@ void anv_CmdUpdateBuffer( + struct blorp_address src = { + .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .offset = tmp_data.offset, ++ .mocs = cmd_buffer->device->default_mocs, + }; + struct blorp_address dst = { + .buffer = dst_buffer->bo, + .offset = dst_buffer->offset + dstOffset, ++ .mocs = cmd_buffer->device->default_mocs, + }; + + blorp_buffer_copy(&batch, src, dst, copy_size); +@@ -1530,6 +1534,7 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer, + surf.aux_addr = (struct blorp_address) { + .buffer = image->bo, + .offset = image->offset + image->aux_surface.offset, ++ .mocs = cmd_buffer->device->default_mocs, + }; + surf.aux_usage = ISL_AUX_USAGE_HIZ; + +diff --git a/src/intel/vulkan/genX_blorp_exec.c b/src/intel/vulkan/genX_blorp_exec.c +index f041fc71b5..b4b05c7022 100644 +--- a/src/intel/vulkan/genX_blorp_exec.c ++++ b/src/intel/vulkan/genX_blorp_exec.c +@@ -134,6 +134,7 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, + *addr = (struct blorp_address) { + .buffer = &cmd_buffer->device->dynamic_state_pool.block_pool.bo, + .offset = vb_state.offset, ++ .mocs = cmd_buffer->device->default_mocs, + }; + + return vb_state.map; +diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c +index eb08de438d..2b7d960f0c 100644 +--- a/src/mesa/drivers/dri/i965/brw_blorp.c ++++ b/src/mesa/drivers/dri/i965/brw_blorp.c +@@ -82,15 +82,9 @@ brw_blorp_init(struct brw_context *brw) + brw->blorp.exec = gen5_blorp_exec; + break; + case 6: +- brw->blorp.mocs.tex = 0; +- brw->blorp.mocs.rb = 0; +- brw->blorp.mocs.vb = 0; + brw->blorp.exec = gen6_blorp_exec; + break; + case 7: +- brw->blorp.mocs.tex = GEN7_MOCS_L3; +- brw->blorp.mocs.rb = GEN7_MOCS_L3; +- brw->blorp.mocs.vb = GEN7_MOCS_L3; + if (brw->is_haswell) { + brw->blorp.exec = gen75_blorp_exec; + } else { +@@ -98,21 +92,12 @@ brw_blorp_init(struct brw_context *brw) + } + break; + case 8: +- brw->blorp.mocs.tex = BDW_MOCS_WB; +- brw->blorp.mocs.rb = BDW_MOCS_PTE; +- brw->blorp.mocs.vb = BDW_MOCS_WB; + brw->blorp.exec = gen8_blorp_exec; + break; + case 9: +- brw->blorp.mocs.tex = SKL_MOCS_WB; +- brw->blorp.mocs.rb = SKL_MOCS_PTE; +- brw->blorp.mocs.vb = SKL_MOCS_WB; + brw->blorp.exec = gen9_blorp_exec; + break; + case 10: +- brw->blorp.mocs.tex = CNL_MOCS_WB; +- brw->blorp.mocs.rb = CNL_MOCS_PTE; +- brw->blorp.mocs.vb = CNL_MOCS_WB; + brw->blorp.exec = gen10_blorp_exec; + break; + default: +@@ -123,6 +108,20 @@ brw_blorp_init(struct brw_context *brw) + brw->blorp.upload_shader = brw_blorp_upload_shader; + } + ++static uint32_t tex_mocs[] = { ++ [7] = GEN7_MOCS_L3, ++ [8] = BDW_MOCS_WB, ++ [9] = SKL_MOCS_WB, ++ [10] = CNL_MOCS_WB, ++}; ++ ++static uint32_t rb_mocs[] = { ++ [7] = GEN7_MOCS_L3, ++ [8] = BDW_MOCS_PTE, ++ [9] = SKL_MOCS_PTE, ++ [10] = CNL_MOCS_PTE, ++}; ++ + static void + blorp_surf_for_miptree(struct brw_context *brw, + struct blorp_surf *surf, +@@ -155,6 +154,7 @@ blorp_surf_for_miptree(struct brw_context *brw, + .read_domains = is_render_target ? I915_GEM_DOMAIN_RENDER : + I915_GEM_DOMAIN_SAMPLER, + .write_domain = is_render_target ? I915_GEM_DOMAIN_RENDER : 0, ++ .mocs = is_render_target ? rb_mocs[devinfo->gen] : tex_mocs[devinfo->gen], + }; + + surf->aux_usage = aux_usage; +@@ -184,6 +184,7 @@ blorp_surf_for_miptree(struct brw_context *brw, + .read_domains = is_render_target ? I915_GEM_DOMAIN_RENDER : + I915_GEM_DOMAIN_SAMPLER, + .write_domain = is_render_target ? I915_GEM_DOMAIN_RENDER : 0, ++ .mocs = surf->addr.mocs, + }; + + if (mt->mcs_buf) { +diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c +index 62d5c4a792..74c1add281 100644 +--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c ++++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c +@@ -145,6 +145,16 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size, + .read_domains = I915_GEM_DOMAIN_VERTEX, + .write_domain = 0, + .offset = offset, ++ ++#if GEN_GEN == 10 ++ .mocs = CNL_MOCS_WB, ++#elif GEN_GEN == 9 ++ .mocs = SKL_MOCS_WB, ++#elif GEN_GEN == 8 ++ .mocs = BDW_MOCS_WB, ++#elif GEN_GEN == 7 ++ .mocs = GEN7_MOCS_L3, ++#endif + }; + + return data; +-- +2.14.3 + diff --git a/SOURCES/0005-i965-Use-PTE-MOCS-for-all-external-buffers.patch b/SOURCES/0005-i965-Use-PTE-MOCS-for-all-external-buffers.patch new file mode 100644 index 0000000..367e04a --- /dev/null +++ b/SOURCES/0005-i965-Use-PTE-MOCS-for-all-external-buffers.patch @@ -0,0 +1,136 @@ +From 066ea39ce113d8fe1992a6892f7094a6dfae6242 Mon Sep 17 00:00:00 2001 +From: Jason Ekstrand +Date: Fri, 3 Nov 2017 15:26:17 -0700 +Subject: [PATCH 5/5] i965: Use PTE MOCS for all external buffers + +We were already using PTE for all render targets in case one happened to +get scanned out. However, this still wasn't 100% correct because there +are still possibly cases where we may want to texture from an external +buffer even though we don't know the caching mode. This can happen, for +instance, on buffers imported from another GPU via prime. + +Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101691 +Cc: "17.3" +Tested-by: Lyude Paul +Reviewed-by: Kenneth Graunke +Signed-off-by: Lyude +--- + src/mesa/drivers/dri/i965/brw_blorp.c | 7 ++++--- + src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 23 ++++++++++++++++------- + 2 files changed, 20 insertions(+), 10 deletions(-) + +diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c +index 2b7d960f0c..48b3da7375 100644 +--- a/src/mesa/drivers/dri/i965/brw_blorp.c ++++ b/src/mesa/drivers/dri/i965/brw_blorp.c +@@ -108,14 +108,14 @@ brw_blorp_init(struct brw_context *brw) + brw->blorp.upload_shader = brw_blorp_upload_shader; + } + +-static uint32_t tex_mocs[] = { ++static uint32_t wb_mocs[] = { + [7] = GEN7_MOCS_L3, + [8] = BDW_MOCS_WB, + [9] = SKL_MOCS_WB, + [10] = CNL_MOCS_WB, + }; + +-static uint32_t rb_mocs[] = { ++static uint32_t pte_mocs[] = { + [7] = GEN7_MOCS_L3, + [8] = BDW_MOCS_PTE, + [9] = SKL_MOCS_PTE, +@@ -154,7 +154,8 @@ blorp_surf_for_miptree(struct brw_context *brw, + .read_domains = is_render_target ? I915_GEM_DOMAIN_RENDER : + I915_GEM_DOMAIN_SAMPLER, + .write_domain = is_render_target ? I915_GEM_DOMAIN_RENDER : 0, +- .mocs = is_render_target ? rb_mocs[devinfo->gen] : tex_mocs[devinfo->gen], ++ .mocs = (is_render_target || mt->bo->external) ? pte_mocs[devinfo->gen] : ++ wb_mocs[devinfo->gen], + }; + + surf->aux_usage = aux_usage; +diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +index 17e760c329..87f1aa379d 100644 +--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c ++++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +@@ -60,20 +60,28 @@ enum { + INTEL_AUX_BUFFER_DISABLED = 1 << 1, + }; + +-uint32_t tex_mocs[] = { ++uint32_t wb_mocs[] = { + [7] = GEN7_MOCS_L3, + [8] = BDW_MOCS_WB, + [9] = SKL_MOCS_WB, + [10] = CNL_MOCS_WB, + }; + +-uint32_t rb_mocs[] = { ++uint32_t pte_mocs[] = { + [7] = GEN7_MOCS_L3, + [8] = BDW_MOCS_PTE, + [9] = SKL_MOCS_PTE, + [10] = CNL_MOCS_PTE, + }; + ++static uint32_t ++get_tex_mocs(const struct brw_context *brw, struct brw_bo *bo) ++{ ++ const struct gen_device_info *devinfo = &brw->screen->devinfo; ++ ++ return (bo && bo->external ? pte_mocs : wb_mocs)[devinfo->gen]; ++} ++ + static void + get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, + GLenum target, struct isl_view *view, +@@ -244,7 +252,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, + + uint32_t offset; + brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, +- rb_mocs[brw->gen], ++ pte_mocs[brw->gen], + &offset, surf_index, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); +@@ -589,7 +597,7 @@ brw_update_texture_surface(struct gl_context *ctx, + aux_usage = ISL_AUX_USAGE_NONE; + + brw_emit_surface_state(brw, mt, mt->target, view, aux_usage, +- tex_mocs[brw->gen], ++ get_tex_mocs(brw, mt->bo), + surf_offset, surf_index, + I915_GEM_DOMAIN_SAMPLER, 0); + } +@@ -615,7 +623,7 @@ brw_emit_buffer_surface_state(struct brw_context *brw, + .size = buffer_size, + .format = surface_format, + .stride = pitch, +- .mocs = tex_mocs[brw->gen]); ++ .mocs = get_tex_mocs(brw, bo)); + + if (bo) { + brw_emit_reloc(&brw->batch, *out_offset + brw->isl_dev.ss.addr_offset, +@@ -1164,7 +1172,7 @@ update_renderbuffer_read_surfaces(struct brw_context *brw) + aux_usage = ISL_AUX_USAGE_NONE; + + brw_emit_surface_state(brw, irb->mt, target, view, aux_usage, +- tex_mocs[brw->gen], ++ get_tex_mocs(brw, irb->mt->bo), + surf_offset, surf_index, + I915_GEM_DOMAIN_SAMPLER, 0); + +@@ -1657,7 +1665,8 @@ update_image_surface(struct brw_context *brw, + view.base_array_layer, + view.array_len)); + brw_emit_surface_state(brw, mt, mt->target, view, +- ISL_AUX_USAGE_NONE, tex_mocs[brw->gen], ++ ISL_AUX_USAGE_NONE, ++ get_tex_mocs(brw, mt->bo), + surf_offset, surf_index, + I915_GEM_DOMAIN_SAMPLER, + access == GL_READ_ONLY ? 0 : +-- +2.14.3 + diff --git a/SOURCES/make-git-snapshot.sh b/SOURCES/make-git-snapshot.sh index 318d45c..0d774f8 100755 --- a/SOURCES/make-git-snapshot.sh +++ b/SOURCES/make-git-snapshot.sh @@ -15,11 +15,11 @@ DIRNAME=mesa-$( date +%Y%m%d ) echo REF ${REF:+--reference $REF} echo DIRNAME $DIRNAME -echo HEAD ${1:-17.0} +echo HEAD ${1:-17.2} rm -rf $DIRNAME -git clone --depth 1 ${REF:+--reference $REF} --branch 17.0 \ +git clone --depth 1 ${REF:+--reference $REF} --branch 17.2 \ git://git.freedesktop.org/git/mesa/mesa $DIRNAME GIT_DIR=$DIRNAME/.git git archive --format=tar --prefix=$DIRNAME/ ${1:-HEAD} \ diff --git a/SOURCES/mesa-17.3-final.patch b/SOURCES/mesa-17.3-final.patch new file mode 100644 index 0000000..b1e75b5 --- /dev/null +++ b/SOURCES/mesa-17.3-final.patch @@ -0,0 +1,194 @@ +diff --git a/VERSION b/VERSION +index d3b2c4f..afbe633 100644 +--- a/VERSION ++++ b/VERSION +@@ -1 +1 @@ +-17.2.2 ++17.2.3 +diff --git a/docs/relnotes/17.2.3.html b/docs/relnotes/17.2.3.html +new file mode 100644 +index 0000000..6e2aea6 +--- /dev/null ++++ b/docs/relnotes/17.2.3.html +@@ -0,0 +1,181 @@ ++ ++ ++ ++ ++ Mesa Release Notes ++ ++ ++ ++ ++
++

The Mesa 3D Graphics Library

++
++ ++ ++
++ ++

Mesa 17.2.3 Release Notes / October 19, 2017

++ ++

++Mesa 17.2.3 is a bug fix release which fixes bugs found since the 17.2.2 release. ++

++

++Mesa 17.2.3 implements the OpenGL 4.5 API, but the version reported by ++glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / ++glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. ++Some drivers don't support all the features required in OpenGL 4.5. OpenGL ++4.5 is only available if requested at context creation ++because compatibility contexts are not supported. ++

++ ++ ++

SHA256 checksums

++
++fb305eecfeec1fd771fdc96fff973c51871f7bd35fd2bd56cacc27b4b8823220  mesa-17.2.3.tar.gz
++a0b0ec8f7b24dd044d7ab30a8c7e6d3767521e245f88d4ed5dd93315dc56f837  mesa-17.2.3.tar.xz
++
++ ++ ++

New features

++

None

++ ++ ++

Bug fixes

++ ++
    ++ ++
  • Bug 101832 - [PATCH][regression][bisect] Xorg fails to start after f50aa21456d82c8cb6fbaa565835f1acc1720a5d
  • ++ ++
  • Bug 102852 - Scons: Support the new Scons 3.0.0
  • ++ ++
  • Bug 102940 - Regression: Vulkan KMS rendering crashes since 17.2
  • ++ ++
++ ++ ++

Changes

++ ++

Alex Smith (1):

++
    ++
  • radv: Add R16G16B16A16_SNORM fast clear support
  • ++
++ ++

Bas Nieuwenhuizen (2):

++
    ++
  • nir/spirv: Allow loop breaks in a switch body.
  • ++
  • radv: Only set the MTYPE flags on GFX9+.
  • ++
++ ++

Ben Crocker (4):

++
    ++
  • gallivm: fix typo in debug_printf message
  • ++
  • gallivm: allow additional llc options
  • ++
  • gallivm/ppc64le: adjust VSX code generation control.
  • ++
  • gallivm/ppc64le: allow environmental control of Altivec code generation
  • ++
++ ++

Daniel Stone (2):

++
    ++
  • egl/wayland: Check queryImage return for wl_buffer
  • ++
  • egl/wayland: Don't use dmabuf with no modifiers
  • ++
++ ++

Dave Airlie (2):

++
    ++
  • radv: emit fmuladd instead of fma to llvm.
  • ++
  • radv: lower ffma in nir.
  • ++
++ ++

Emil Velikov (6):

++
    ++
  • cherry-ignore: add "anv: Remove unreachable cases from isl_format_for_size"
  • ++
  • cherry-ignore: add "anv/wsi: Allocate enough memory for the entire image"
  • ++
  • swr/rast: do not crash on NULL strings returned by getenv
  • ++
  • wayland-drm: use a copy of the wayland_drm_callbacks struct
  • ++
  • eglmesaext: add forward declaration for struct wl_buffers
  • ++
  • Update version to 17.2.3
  • ++
++ ++

Eric Engestrom (1):

++
    ++
  • scons: use python3-compatible print()
  • ++
++ ++

Ilia Mirkin (2):

++
    ++
  • nv50/ir: fix 64-bit integer shifts
  • ++
  • nv50,nvc0: fix push hint logic in presence of a start offset
  • ++
++ ++

Jason Ekstrand (6):

++
    ++
  • intel/compiler: Don't cmod propagate into a saturated operation
  • ++
  • intel/compiler: Don't propagate cmod into integer multiplies
  • ++
  • glsl/blob: Return false from ensure_can_read on overrun
  • ++
  • glsl/blob: Return false from grow_to_fit if we've ever failed
  • ++
  • nir/opcodes: Fix constant-folding of ufind_msb
  • ++
  • nir: Get rid of the variable on vote intrinsics
  • ++
++ ++

Juan A. Suarez Romero (1):

++
    ++
  • docs: add sha256 checksums for 17.2.2
  • ++
++ ++

Józef Kucia (3):

++
    ++
  • anv: Fix vkCmdFillBuffer()
  • ++
  • spirv: Fix SpvOpAtomicISub
  • ++
  • anv: Do not assert() on VK_ATTACHMENT_UNUSED
  • ++
++ ++

Leo Liu (3):

++
    ++
  • st/va: use pipe transfer_map to map upload buffer
  • ++
  • st/vdpau: don't re-allocate interlaced buffer with packed YUV format
  • ++
  • st/va: don't re-allocate interlaced buffer with pakced format
  • ++
++ ++

Lionel Landwerlin (4):

++
    ++
  • intel: compiler: vec4: add missing default 0 lod
  • ++
  • anv/cmd_buffer: fix push descriptors with set > 0
  • ++
  • anv/cmd_buffer: Reset state in cmd_buffer_destroy
  • ++
  • anv: bo_cache: allow importing a BO larger than needed
  • ++
++ ++

Marek Olšák (3):

++
    ++
  • mesa: fix texture updates for ATI_fragment_shader
  • ++
  • st/mesa: don't use pipe_surface for passing information about EGLImage
  • ++
  • glsl_to_tgsi: fix instruction order for bindless textures
  • ++
++ ++

Nicolai Hähnle (14):

++
    ++
  • st/glsl_to_tgsi: fix conditional assignments to packed shader outputs
  • ++
  • amd/common: fix build_cube_select
  • ++
  • radeonsi/gfx9: fix geometry shaders without output vertices
  • ++
  • util/queue: fix a race condition in the fence code
  • ++
  • glsl/lower_instruction: handle denorms and overflow in ldexp correctly
  • ++
  • radeonsi: move current_rast_prim to r600_common_context
  • ++
  • radeonsi: don't discard points and lines
  • ++
  • radeonsi: deduce rast_prim correctly for tessellation point mode
  • ++
  • radeonsi: fix maximum advertised point size / line width
  • ++
  • st/mesa: don't clobber glGetInternalformat* buffer for GL_NUM_SAMPLE_COUNTS
  • ++
  • st/glsl_to_tgsi: fix indirect access to 64-bit integer
  • ++
  • st/glsl_to_tgsi: fix a use-after-free in merge_two_dsts
  • ++
  • radeonsi: clamp depth comparison value only for fixed point formats
  • ++
  • radeonsi: clamp border colors for upgraded depth textures
  • ++
++ ++

Rob Clark (2):

++
    ++
  • freedreno/a5xx: align height to GMEM
  • ++
  • freedreno/a5xx: fix missing restore state
  • ++
++ ++ ++
++ ++ diff --git a/SPECS/mesa.spec b/SPECS/mesa.spec index 5046980..e676192 100644 --- a/SPECS/mesa.spec +++ b/SPECS/mesa.spec @@ -1,17 +1,17 @@ %if 0%{?rhel} %define with_private_llvm 1 -%define with_vdpau 1 %else %define with_private_llvm 0 +%endif + %define with_vdpau 1 %define with_wayland 1 -%endif %ifnarch ppc %define with_radeonsi 1 %endif -%ifarch %{arm} +%ifarch %{arm} aarch64 %define with_freedreno 1 %endif @@ -22,7 +22,7 @@ %define with_llvm 1 %endif -%ifarch s390 s390x aarch64 +%ifarch s390 s390x %define with_hardware 0 %ifarch s390 %define base_drivers swrast @@ -55,13 +55,13 @@ %define _default_patch_fuzz 2 -%define gitdate 20170307 +%define gitdate 20171019 #% define snapshot Summary: Mesa graphics libraries Name: mesa -Version: 17.0.1 -Release: 6.%{gitdate}%{?dist} +Version: 17.2.3 +Release: 8.%{gitdate}%{?dist} License: MIT Group: System Environment/Libraries URL: http://www.mesa3d.org @@ -77,24 +77,31 @@ Source3: make-git-snapshot.sh # Fedora opts to ignore the optional part of clause 2 and treat that code as 2 clause BSD. Source4: Mesa-MLAA-License-Clarification-Email.txt +Patch0: mesa-17.3-final.patch Patch1: nv50-fix-build.patch +Patch2: 0001-mesa-Squash-merge-of-S3TC-support.patch Patch9: mesa-8.0-llvmpipe-shmget.patch Patch12: mesa-8.0.1-fix-16bpp.patch Patch15: mesa-9.2-hardware-float.patch Patch20: mesa-10.2-evergreen-big-endian.patch -Patch30: 0001-glsl-Allow-compatibility-shaders-with-MESA_GL_VERSIO.patch - -Patch40: 0001-Revert-draw-use-SoA-fetch-not-AoS-one.patch +# For bz1503861, fix visual artifacts on DRI PRIME offloading +# Feel free to drop these patches during the next mesa rebase (>17.2.3) +Patch30: 0001-intel-blorp-Use-mocs.tex-for-depth-stencil.patch +Patch31: 0002-anv-blorp-Add-a-device-parameter-to-blorp_surf_for_a.patch +Patch32: 0003-blorp-Turn-anv_CmdCopyBuffer-into-a-blorp_buffer_cop.patch +Patch33: 0004-intel-blorp-Make-the-MOCS-setting-part-of-blorp_addr.patch +Patch34: 0005-i965-Use-PTE-MOCS-for-all-external-buffers.patch -Patch50: 0001-gallivm-Make-sure-module-has-the-correct-data-layout.patch +Patch40: 0001-intel-Add-more-Coffee-Lake-PCI-IDs.patch BuildRequires: pkgconfig autoconf automake libtool %if %{with_hardware} BuildRequires: kernel-headers BuildRequires: xorg-x11-server-devel %endif -BuildRequires: libdrm-devel >= 2.4.60 +BuildRequires: libatomic +BuildRequires: libdrm-devel >= 2.4.83 BuildRequires: libXxf86vm-devel BuildRequires: expat-devel BuildRequires: xorg-x11-proto-devel @@ -112,7 +119,7 @@ BuildRequires: python-mako BuildRequires: gettext %if 0%{?with_llvm} %if 0%{?with_private_llvm} -BuildRequires: mesa-private-llvm-devel >= 3.9 +BuildRequires: llvm-private-devel >= 5.0 %else BuildRequires: llvm-devel >= 3.0 %endif @@ -121,9 +128,10 @@ BuildRequires: elfutils-libelf-devel BuildRequires: libxml2-python BuildRequires: libudev-devel BuildRequires: bison flex -%if !0%{?rhel} -BuildRequires: pkgconfig(wayland-client) >= %{min_wayland_version} -BuildRequires: pkgconfig(wayland-server) >= %{min_wayland_version} +%if %{with wayland} +BuildRequires: pkgconfig(wayland-client) >= 1.11 +BuildRequires: pkgconfig(wayland-server) >= 1.11 +BuildRequires: pkgconfig(wayland-protocols) >= 1.8.0 %endif BuildRequires: mesa-libGL-devel %if 0%{?with_vdpau} @@ -139,6 +147,7 @@ Summary: Mesa libGL runtime libraries and DRI drivers Group: System Environment/Libraries Provides: libGL Requires: mesa-libglapi = %{version}-%{release} +Requires: libdrm >= 2.4.83 %description libGL Mesa libGL runtime library. @@ -171,6 +180,7 @@ Mesa driver filesystem Summary: Mesa-based DRI drivers Group: User Interface/X Hardware Support Requires: mesa-filesystem%{?_isa} +Requires: libdrm >= 2.4.83 Obsoletes: mesa-dri1-drivers < 7.12 Obsoletes: mesa-dri-llvmcore <= 7.12 %description dri-drivers @@ -237,6 +247,7 @@ Mesa offscreen rendering development package Summary: Mesa gbm library Group: System Environment/Libraries Provides: libgbm +Requires: libdrm >= 2.4.83 Requires: mesa-libglapi = %{version}-%{release} %description libgbm @@ -253,7 +264,7 @@ Provides: libgbm-devel Mesa libgbm development package -%if !0%{?rhel} +%if %{with wayland} %package libwayland-egl Summary: Mesa libwayland-egl library Group: System Environment/Libraries @@ -314,7 +325,9 @@ The drivers with support for the Vulkan API. %setup -q -n mesa-%{gitdate} # make sure you run sanitize-tarball.sh on mesa source tarball or next line will exit grep -q ^/ src/gallium/auxiliary/vl/vl_decoder.c && exit 1 +%patch0 -p1 -b .mesa17.2.3 %patch1 -p1 -b .nv50rtti +%patch2 -p1 -b .s3tc # this fastpath is: # - broken with swrast classic @@ -330,15 +343,17 @@ grep -q ^/ src/gallium/auxiliary/vl/vl_decoder.c && exit 1 %patch15 -p1 -b .hwfloat #patch20 -p1 -b .egbe -%patch30 -p1 -b .glslfix - -%patch40 -p1 -b .bigendian-fix +%patch30 -p1 -b .bz1503861_patch1 +%patch31 -p1 -b .bz1503861_patch2 +%patch32 -p1 -b .bz1503861_patch3 +%patch33 -p1 -b .bz1503861_patch4 +%patch34 -p1 -b .bz1503861_patch5 -%patch50 -p1 -b .gallivm-datalayout-fix +%patch40 -p1 -b .cfl_ids %if 0%{with_private_llvm} -sed -i 's/\[llvm-config\]/\[mesa-private-llvm-config-%{__isa_bits}\]/g' configure.ac -sed -i 's/`$LLVM_CONFIG --version`/$LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR-mesa/' configure.ac +sed -i 's/\[llvm-config\]/\[llvm-private-config-%{__isa_bits}\]/g' configure.ac +sed -i 's/`$LLVM_CONFIG --version`/$LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR-rhel/' configure.ac %endif # need to use libdrm_nouveau2 on F17 @@ -389,7 +404,7 @@ export CXXFLAGS="$RPM_OPT_FLAGS -fno-rtti -fno-exceptions" --enable-dri \ %if %{with_hardware} %{?with_vmware:--enable-xa} \ - --with-gallium-drivers=%{?with_vmware:svga,}%{?with_radeonsi:radeonsi,}%{?with_llvm:swrast,r600,r300}%{?with_freedreno:freedreno,},nouveau,virgl \ + --with-gallium-drivers=%{?with_vmware:svga,}%{?with_radeonsi:radeonsi,}%{?with_llvm:swrast,r600,r300,}%{?with_freedreno:freedreno,}nouveau,virgl \ %else --with-gallium-drivers=%{?with_llvm:swrast} \ %endif @@ -513,6 +528,7 @@ rm -rf $RPM_BUILD_ROOT %endif %if 0%{?with_freedreno} %{_libdir}/dri/kgsl_dri.so +%{_libdir}/dri/msm_dri.so %endif %{_libdir}/dri/nouveau_dri.so %{_libdir}/dri/virtio_gpu_dri.so @@ -607,7 +623,7 @@ rm -rf $RPM_BUILD_ROOT %{_includedir}/gbm.h %{_libdir}/pkgconfig/gbm.pc -%if !0%{?rhel} +%if %{with wayland} %files libwayland-egl %defattr(-,root,root,-) %{_libdir}/libwayland-egl.so.1 @@ -654,6 +670,42 @@ rm -rf $RPM_BUILD_ROOT %endif %changelog +* Mon Jan 15 2018 Dave Airlie - 17.2.3-8.20171019 +- Add missing Intel CFL ids. + +* Thu Nov 30 2017 Lyude Paul - 17.2.3-7.20171019 +- Add patches to fix cache lines with DRI_PRIME + amdgpu (#1503861) + +* Fri Nov 17 2017 Dave Airlie - 17.2.3-6.20171019 +- fix libgbm/dri-drivers requires on libdrm + +* Wed Oct 25 2017 Yaakov Selkowitz - 17.2.3-5.20171019 +- Enable hardware drivers on aarch64 (#1358444) + +* Tue Oct 24 2017 Dave Airlie - 17.2.3-4.20171019 +- Update gitdate and clean out sources. + +* Tue Oct 24 2017 Dave Airlie - 17.2.3-3.20171019 +- Add final 17.2.3 patch. + +* Thu Oct 19 2017 Tom Stellard - 17.2.3-2.20171019 +- Switch to llvm-private + +* Thu Oct 19 2017 Dave Airlie - 17.2.3-1.20171019 +- rebase to 17.2.3 + +* Thu Oct 05 2017 Dave Airlie - 17.2.2-1.20171005 +- rebase to 17.2.2 final release + s3tc support + +* Thu Sep 28 2017 Olivier Fourdan - 17.2.0-2.20170911 +- Enable wayland-egl, add dependencies on wayland-protocols (#1481412) + +* Mon Sep 11 2017 Dave Airlie - 17.2.0-1.20170911 +- rebase to 17.2.0 final release + +* Tue Aug 15 2017 Dave Airlie - 17.2.0-0.1.20170815 +- rebase to 17.2-rc4 + * Thu May 11 2017 Dave Airlie - 17.0.1-6.20170307 - enable VDPAU drivers (#1297276)