Blame SOURCES/gnutls-3.7.6-cpuid-fixes.patch

cd0318
From 8ff391fa011e02c88b0d099061ca62e88ab68011 Mon Sep 17 00:00:00 2001
cd0318
From: Daiki Ueno <ueno@gnu.org>
cd0318
Date: Mon, 15 Aug 2022 09:39:18 +0900
cd0318
Subject: [PATCH] accelerated: clear AVX bits if it cannot be queried through
cd0318
 XSAVE
cd0318
MIME-Version: 1.0
cd0318
Content-Type: text/plain; charset=UTF-8
cd0318
Content-Transfer-Encoding: 8bit
cd0318
cd0318
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
cd0318
Architectures Software Developer’s Manual".
cd0318
cd0318
GnuTLS previously only followed that algorithm when registering the
cd0318
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
cd0318
that the extension bits are propagated to _gnutls_x86_cpuid_s.
cd0318
cd0318
Signed-off-by: Daiki Ueno <ueno@gnu.org>
cd0318
---
cd0318
 lib/accelerated/x86/x86-common.c | 49 +++++++++++++++++++++++++-------
cd0318
 1 file changed, 38 insertions(+), 11 deletions(-)
cd0318
cd0318
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
cd0318
index 7ddaa594e6..b7a88ddeca 100644
cd0318
--- a/lib/accelerated/x86/x86-common.c
cd0318
+++ b/lib/accelerated/x86/x86-common.c
cd0318
@@ -81,6 +81,26 @@ unsigned int _gnutls_x86_cpuid_s[4];
cd0318
 # define bit_AVX 0x10000000
cd0318
 #endif
cd0318
 
cd0318
+#ifndef bit_AVX2
cd0318
+# define bit_AVX2 0x00000020
cd0318
+#endif
cd0318
+
cd0318
+#ifndef bit_AVX512F
cd0318
+# define bit_AVX512F 0x00010000
cd0318
+#endif
cd0318
+
cd0318
+#ifndef bit_AVX512IFMA
cd0318
+# define bit_AVX512IFMA 0x00200000
cd0318
+#endif
cd0318
+
cd0318
+#ifndef bit_AVX512BW
cd0318
+# define bit_AVX512BW 0x40000000
cd0318
+#endif
cd0318
+
cd0318
+#ifndef bit_AVX512VL
cd0318
+# define bit_AVX512VL 0x80000000
cd0318
+#endif
cd0318
+
cd0318
 #ifndef bit_OSXSAVE
cd0318
 # define bit_OSXSAVE 0x8000000
cd0318
 #endif
cd0318
@@ -89,10 +109,6 @@ unsigned int _gnutls_x86_cpuid_s[4];
cd0318
 # define bit_MOVBE 0x00400000
cd0318
 #endif
cd0318
 
cd0318
-#ifndef OSXSAVE_MASK
cd0318
-# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE)
cd0318
-#endif
cd0318
-
cd0318
 #define bit_PADLOCK (0x3 << 6)
cd0318
 #define bit_PADLOCK_PHE (0x3 << 10)
cd0318
 #define bit_PADLOCK_PHE_SHA512 (0x3 << 25)
cd0318
@@ -148,7 +164,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
cd0318
 {
cd0318
 	uint32_t xcr0;
cd0318
 
cd0318
-	if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
cd0318
+	if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
cd0318
 		return 0;
cd0318
 
cd0318
 #if defined(_MSC_VER) && !defined(__clang__)
cd0318
@@ -190,8 +206,9 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
cd0318
 	}
cd0318
 
cd0318
 	if (capabilities & INTEL_AVX) {
cd0318
-		if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
cd0318
-			_gnutls_x86_cpuid_s[1] |= bit_AVX|OSXSAVE_MASK;
cd0318
+		if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
cd0318
+		    check_4th_gen_intel_features(a[1])) {
cd0318
+			_gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
cd0318
 		} else {
cd0318
 			_gnutls_debug_log
cd0318
 			    ("AVX acceleration requested but not available\n");
cd0318
@@ -236,10 +253,7 @@ static unsigned check_sha(void)
cd0318
 #ifdef ASM_X86_64
cd0318
 static unsigned check_avx_movbe(void)
cd0318
 {
cd0318
-	if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
cd0318
-		return 0;
cd0318
-
cd0318
-	return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
cd0318
+	return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
cd0318
 }
cd0318
 
cd0318
 static unsigned check_pclmul(void)
cd0318
@@ -884,6 +898,19 @@ void register_x86_intel_crypto(unsigned capabilities)
cd0318
 	if (capabilities == 0) {
cd0318
 		if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
cd0318
 			return;
cd0318
+		if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
cd0318
+			_gnutls_x86_cpuid_s[1] &= ~bit_AVX;
cd0318
+
cd0318
+			/* Clear AVX2 bits as well, according to what
cd0318
+			 * OpenSSL does.  Should we clear
cd0318
+			 * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
cd0318
+			 * and bit_AVX512CD? */
cd0318
+			_gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|
cd0318
+						    bit_AVX512F|
cd0318
+						    bit_AVX512IFMA|
cd0318
+						    bit_AVX512BW|
cd0318
+						    bit_AVX512BW);
cd0318
+		}
cd0318
 	} else {
cd0318
 		capabilities_to_intel_cpuid(capabilities);
cd0318
 	}
cd0318
-- 
cd0318
2.37.2
cd0318