From 8ff391fa011e02c88b0d099061ca62e88ab68011 Mon Sep 17 00:00:00 2001 From: Daiki Ueno Date: Mon, 15 Aug 2022 09:39:18 +0900 Subject: [PATCH] accelerated: clear AVX bits if it cannot be queried through XSAVE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The algorithm to detect AVX is described in 14.3 of "Intel® 64 and IA-32 Architectures Software Developer’s Manual". GnuTLS previously only followed that algorithm when registering the crypto backend, while the CRYPTOGAMS derived SHA code assembly expects that the extension bits are propagated to _gnutls_x86_cpuid_s. Signed-off-by: Daiki Ueno --- lib/accelerated/x86/x86-common.c | 49 +++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c index 7ddaa594e6..b7a88ddeca 100644 --- a/lib/accelerated/x86/x86-common.c +++ b/lib/accelerated/x86/x86-common.c @@ -81,6 +81,26 @@ unsigned int _gnutls_x86_cpuid_s[4]; # define bit_AVX 0x10000000 #endif +#ifndef bit_AVX2 +# define bit_AVX2 0x00000020 +#endif + +#ifndef bit_AVX512F +# define bit_AVX512F 0x00010000 +#endif + +#ifndef bit_AVX512IFMA +# define bit_AVX512IFMA 0x00200000 +#endif + +#ifndef bit_AVX512BW +# define bit_AVX512BW 0x40000000 +#endif + +#ifndef bit_AVX512VL +# define bit_AVX512VL 0x80000000 +#endif + #ifndef bit_OSXSAVE # define bit_OSXSAVE 0x8000000 #endif @@ -89,10 +109,6 @@ unsigned int _gnutls_x86_cpuid_s[4]; # define bit_MOVBE 0x00400000 #endif -#ifndef OSXSAVE_MASK -# define OSXSAVE_MASK (bit_OSXSAVE|bit_MOVBE) -#endif - #define bit_PADLOCK (0x3 << 6) #define bit_PADLOCK_PHE (0x3 << 10) #define bit_PADLOCK_PHE_SHA512 (0x3 << 25) @@ -148,7 +164,7 @@ static unsigned check_4th_gen_intel_features(unsigned ecx) { uint32_t xcr0; - if ((ecx & OSXSAVE_MASK) != OSXSAVE_MASK) + if ((ecx & bit_OSXSAVE) != bit_OSXSAVE) return 0; #if defined(_MSC_VER) && !defined(__clang__) @@ -190,8 +206,9 @@ static void capabilities_to_intel_cpuid(unsigned capabilities) } if (capabilities & INTEL_AVX) { - if ((a[1] & bit_AVX) && check_4th_gen_intel_features(a[1])) { - _gnutls_x86_cpuid_s[1] |= bit_AVX|OSXSAVE_MASK; + if ((a[1] & bit_AVX) && (a[1] & bit_MOVBE) && + check_4th_gen_intel_features(a[1])) { + _gnutls_x86_cpuid_s[1] |= bit_AVX|bit_MOVBE; } else { _gnutls_debug_log ("AVX acceleration requested but not available\n"); @@ -236,10 +253,7 @@ static unsigned check_sha(void) #ifdef ASM_X86_64 static unsigned check_avx_movbe(void) { - if (check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1]) == 0) - return 0; - - return ((_gnutls_x86_cpuid_s[1] & bit_AVX)); + return (_gnutls_x86_cpuid_s[1] & (bit_AVX|bit_MOVBE)) == (bit_AVX|bit_MOVBE); } static unsigned check_pclmul(void) @@ -884,6 +898,19 @@ void register_x86_intel_crypto(unsigned capabilities) if (capabilities == 0) { if (!read_cpuid_vals(_gnutls_x86_cpuid_s)) return; + if (!check_4th_gen_intel_features(_gnutls_x86_cpuid_s[1])) { + _gnutls_x86_cpuid_s[1] &= ~bit_AVX; + + /* Clear AVX2 bits as well, according to what + * OpenSSL does. Should we clear + * bit_AVX512DQ, bit_AVX512PF, bit_AVX512ER, + * and bit_AVX512CD? */ + _gnutls_x86_cpuid_s[2] &= ~(bit_AVX2| + bit_AVX512F| + bit_AVX512IFMA| + bit_AVX512BW| + bit_AVX512BW); + } } else { capabilities_to_intel_cpuid(capabilities); } -- 2.37.2