0a1913
From 300c6315d2e644ae81b43fa2dd7bbf68b3afb5b2 Mon Sep 17 00:00:00 2001
0a1913
From: Daiki Ueno <ueno@gnu.org>
0a1913
Date: Thu, 18 Nov 2021 19:02:03 +0100
0a1913
Subject: [PATCH 1/2] accelerated: fix CPU feature detection for Intel CPUs
0a1913
0a1913
This fixes read_cpuid_vals to correctly read the CPUID quadruple, as
0a1913
well as to set the bit the ustream CRYPTOGAMS uses to identify Intel
0a1913
CPUs.
0a1913
0a1913
Suggested by Rafael Gieschke in:
0a1913
https://gitlab.com/gnutls/gnutls/-/issues/1282
0a1913
0a1913
Signed-off-by: Daiki Ueno <ueno@gnu.org>
0a1913
---
0a1913
 lib/accelerated/x86/x86-common.c | 91 +++++++++++++++++++++++++-------
0a1913
 1 file changed, 71 insertions(+), 20 deletions(-)
0a1913
0a1913
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
0a1913
index 3845c6b4c9..cf615ef24f 100644
0a1913
--- a/lib/accelerated/x86/x86-common.c
0a1913
+++ b/lib/accelerated/x86/x86-common.c
0a1913
@@ -81,15 +81,38 @@ unsigned int _gnutls_x86_cpuid_s[4];
0a1913
 # define bit_AVX 0x10000000
0a1913
 #endif
0a1913
 
0a1913
-#ifndef OSXSAVE_MASK
0a1913
-/* OSXSAVE|FMA|MOVBE */
0a1913
-# define OSXSAVE_MASK (0x8000000|0x1000|0x400000)
0a1913
+#ifndef bit_AVX2
0a1913
+# define bit_AVX2 0x00000020
0a1913
+#endif
0a1913
+
0a1913
+#ifndef bit_AVX512F
0a1913
+# define bit_AVX512F 0x00010000
0a1913
+#endif
0a1913
+
0a1913
+#ifndef bit_AVX512IFMA
0a1913
+# define bit_AVX512IFMA 0x00200000
0a1913
+#endif
0a1913
+
0a1913
+#ifndef bit_AVX512BW
0a1913
+# define bit_AVX512BW 0x40000000
0a1913
+#endif
0a1913
+
0a1913
+#ifndef bit_AVX512VL
0a1913
+# define bit_AVX512VL 0x80000000
0a1913
+#endif
0a1913
+
0a1913
+#ifndef bit_OSXSAVE
0a1913
+# define bit_OSXSAVE 0x8000000
0a1913
 #endif
0a1913
 
0a1913
 #ifndef bit_MOVBE
0a1913
 # define bit_MOVBE 0x00400000
0a1913
 #endif
0a1913
 
0a1913
+#ifndef OSXSAVE_MASK
0a1913
+# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE)
0a1913
+#endif
0a1913
+
0a1913
 #define via_bit_PADLOCK (0x3 << 6)
0a1913
 #define via_bit_PADLOCK_PHE (0x3 << 10)
0a1913
 #define via_bit_PADLOCK_PHE_SHA512 (0x3 << 25)
0a1913
@@ -127,7 +150,7 @@ static unsigned read_cpuid_vals(unsigned int vals[4])
0a1913
 	unsigned t1, t2, t3;
0a1913
 	vals[0] = vals[1] = vals[2] = vals[3] = 0;
0a1913
 
0a1913
-	if (!__get_cpuid(1, &t1, &vals[0], &vals[1], &t2))
0a1913
+	if (!__get_cpuid(1, &t1, &t2, &vals[1], &vals[0]))
0a1913
 		return 0;
0a1913
 	/* suppress AVX512; it works conditionally on certain CPUs on the original code */
0a1913
 	vals[1] &= 0xfffff7ff;
0a1913
@@ -145,7 +168,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx)
0a1913
 {
0a1913
 	uint32_t xcr0;
0a1913
 
0a1913
-	if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK)
0a1913
+	if ((ecx & bit_OSXSAVE) != bit_OSXSAVE)
0a1913
 		return 0;
0a1913
 
0a1913
 #if defined(_MSC_VER) && !defined(__clang__)
0a1913
@@ -233,10 +256,7 @@ static unsigned check_sha(void)
0a1913
 #ifdef ASM_X86_64
0a1913
 static unsigned check_avx_movbe(void)
0a1913
 {
0a1913
-	if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0)
0a1913
-		return 0;
0a1913
-
0a1913
-	return ((_gnutls_x86_cpuid_s[1] & bit_AVX));
0a1913
+	return (_gnutls_x86_cpuid_s[1] & bit_AVX);
0a1913
 }
0a1913
 
0a1913
 static unsigned check_pclmul(void)
0a1913
@@ -514,33 +534,47 @@ void register_x86_padlock_crypto(unsigned capabilities)
0a1913
 }
0a1913
 #endif
0a1913
 
0a1913
-static unsigned check_intel_or_amd(void)
0a1913
+enum x86_cpu_vendor {
0a1913
+	X86_CPU_VENDOR_OTHER,
0a1913
+	X86_CPU_VENDOR_INTEL,
0a1913
+	X86_CPU_VENDOR_AMD,
0a1913
+};
0a1913
+
0a1913
+static enum x86_cpu_vendor check_x86_cpu_vendor(void)
0a1913
 {
0a1913
 	unsigned int a, b, c, d;
0a1913
 
0a1913
-	if (!__get_cpuid(0, &a, &b, &c, &d))
0a1913
-		return 0;
0a1913
+	if (!__get_cpuid(0, &a, &b, &c, &d)) {
0a1913
+		return X86_CPU_VENDOR_OTHER;
0a1913
+	}
0a1913
 
0a1913
-	if ((memcmp(&b, "Genu", 4) == 0 &&
0a1913
-	     memcmp(&d, "ineI", 4) == 0 &&
0a1913
-	     memcmp(&c, "ntel", 4) == 0) ||
0a1913
-	    (memcmp(&b, "Auth", 4) == 0 &&
0a1913
-	     memcmp(&d, "enti", 4) == 0 && memcmp(&c, "cAMD", 4) == 0)) {
0a1913
-		return 1;
0a1913
+	if (memcmp(&b, "Genu", 4) == 0 &&
0a1913
+	    memcmp(&d, "ineI", 4) == 0 &&
0a1913
+	    memcmp(&c, "ntel", 4) == 0) {
0a1913
+		return X86_CPU_VENDOR_INTEL;
0a1913
 	}
0a1913
 
0a1913
-	return 0;
0a1913
+	if (memcmp(&b, "Auth", 4) == 0 &&
0a1913
+	    memcmp(&d, "enti", 4) == 0 &&
0a1913
+	    memcmp(&c, "cAMD", 4) == 0) {
0a1913
+		return X86_CPU_VENDOR_AMD;
0a1913
+	}
0a1913
+
0a1913
+	return X86_CPU_VENDOR_OTHER;
0a1913
 }
0a1913
 
0a1913
 static
0a1913
 void register_x86_intel_crypto(unsigned capabilities)
0a1913
 {
0a1913
 	int ret;
0a1913
+	enum x86_cpu_vendor vendor;
0a1913
 
0a1913
 	memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s));
0a1913
 
0a1913
-	if (check_intel_or_amd() == 0)
0a1913
+	vendor = check_x86_cpu_vendor();
0a1913
+	if (vendor == X86_CPU_VENDOR_OTHER) {
0a1913
 		return;
0a1913
+	}
0a1913
 
0a1913
 	if (capabilities == 0) {
0a1913
 		if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
0a1913
@@ -549,6 +583,23 @@ void register_x86_intel_crypto(unsigned capabilities)
0a1913
 		capabilities_to_intel_cpuid(capabilities);
0a1913
 	}
0a1913
 
0a1913
+	/* CRYPTOGAMS uses the (1 << 30) bit as an indicator of Intel CPUs */
0a1913
+	if (vendor == X86_CPU_VENDOR_INTEL) {
0a1913
+		_gnutls_x86_cpuid_s[0] |= 1 << 30;
0a1913
+	} else {
0a1913
+		_gnutls_x86_cpuid_s[0] &= ~(1 << 30);
0a1913
+	}
0a1913
+
0a1913
+	if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
0a1913
+		_gnutls_x86_cpuid_s[1] &= ~bit_AVX;
0a1913
+
0a1913
+		/* Clear AVX2 bits as well, according to what OpenSSL does.
0a1913
+		 * Should we clear bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER, and
0a1913
+		 * bit_AVX512CD? */
0a1913
+		_gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|bit_AVX512F|bit_AVX512IFMA|
0a1913
+					    bit_AVX512BW|bit_AVX512BW);
0a1913
+	}
0a1913
+
0a1913
 	if (check_ssse3()) {
0a1913
 		_gnutls_debug_log("Intel SSSE3 was detected\n");
0a1913
 
0a1913
-- 
0a1913
2.37.3
0a1913
0a1913
0a1913
From cd509dac9e6d1bf76fd12c72c1fd61f1708c254a Mon Sep 17 00:00:00 2001
0a1913
From: Daiki Ueno <ueno@gnu.org>
0a1913
Date: Mon, 15 Aug 2022 09:39:18 +0900
0a1913
Subject: [PATCH 2/2] accelerated: clear AVX bits if it cannot be queried
0a1913
 through XSAVE
0a1913
MIME-Version: 1.0
0a1913
Content-Type: text/plain; charset=UTF-8
0a1913
Content-Transfer-Encoding: 8bit
0a1913
0a1913
The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32
0a1913
Architectures Software Developer’s Manual".
0a1913
0a1913
GnuTLS previously only followed that algorithm when registering the
0a1913
crypto backend, while the CRYPTOGAMS derived SHA code assembly expects
0a1913
that the extension bits are propagated to _gnutls_x86_cpuid_s.
0a1913
0a1913
Signed-off-by: Daiki Ueno <ueno@gnu.org>
0a1913
---
0a1913
 lib/accelerated/x86/x86-common.c | 18 ++++++++++++++++--
0a1913
 1 file changed, 16 insertions(+), 2 deletions(-)
0a1913
0a1913
diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c
0a1913
index cf615ef24f..655d0c65f2 100644
0a1913
--- a/lib/accelerated/x86/x86-common.c
0a1913
+++ b/lib/accelerated/x86/x86-common.c
0a1913
@@ -210,7 +210,8 @@ static void capabilities_to_intel_cpuid(unsigned capabilities)
0a1913
 	}
0a1913
 
0a1913
 	if (capabilities & INTEL_AVX) {
0a1913
-		if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) {
0a1913
+		if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) &&
0a1913
+		    check_4th_gen_intel_features(a[1])) {
0a1913
 			_gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE;
0a1913
 		} else {
0a1913
 			_gnutls_debug_log
0a1913
@@ -256,7 +257,7 @@ static unsigned check_sha(void)
0a1913
 #ifdef ASM_X86_64
0a1913
 static unsigned check_avx_movbe(void)
0a1913
 {
0a1913
-	return (_gnutls_x86_cpuid_s[1] & bit_AVX);
0a1913
+	return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE);
0a1913
 }
0a1913
 
0a1913
 static unsigned check_pclmul(void)
0a1913
@@ -579,6 +580,19 @@ void register_x86_intel_crypto(unsigned capabilities)
0a1913
 	if (capabilities == 0) {
0a1913
 		if (!read_cpuid_vals(_gnutls_x86_cpuid_s))
0a1913
 			return;
0a1913
+		if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) {
0a1913
+			_gnutls_x86_cpuid_s[1] &= ~bit_AVX;
0a1913
+
0a1913
+			/* Clear AVX2 bits as well, according to what
0a1913
+			 * OpenSSL does.  Should we clear
0a1913
+			 * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER,
0a1913
+			 * and bit_AVX512CD? */
0a1913
+			_gnutls_x86_cpuid_s[2] &= ~(bit_AVX2|
0a1913
+						    bit_AVX512F|
0a1913
+						    bit_AVX512IFMA|
0a1913
+						    bit_AVX512BW|
0a1913
+						    bit_AVX512BW);
0a1913
+		}
0a1913
 	} else {
0a1913
 		capabilities_to_intel_cpuid(capabilities);
0a1913
 	}
0a1913
-- 
0a1913
2.37.3
0a1913