Blob Blame History Raw
commit c22e4c2a1431c5e77bf4288d35bf7629f2f093aa
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Mon Dec 3 05:54:43 2018 -0800

    x86: Extend CPUID support in struct cpu_features
    
    Extend CPUID support for all feature bits from CPUID.  Add a new macro,
    CPU_FEATURE_USABLE, which can be used to check if a feature is usable at
    run-time, instead of HAS_CPU_FEATURE and HAS_ARCH_FEATURE.
    
    Add COMMON_CPUID_INDEX_D_ECX_1, COMMON_CPUID_INDEX_80000007 and
    COMMON_CPUID_INDEX_80000008 to check CPU feature bits in them.
    
    Tested on i686 and x86-64 as well as using build-many-glibcs.py with
    x86 targets.
    
            * sysdeps/x86/cacheinfo.c (intel_check_word): Updated for
            cpu_features_basic.
            (__cache_sysconf): Likewise.
            (init_cacheinfo): Likewise.
            * sysdeps/x86/cpu-features.c (get_extended_indeces): Also
            populate COMMON_CPUID_INDEX_80000007 and
            COMMON_CPUID_INDEX_80000008.
            (get_common_indices): Also populate COMMON_CPUID_INDEX_D_ECX_1.
            Use CPU_FEATURES_CPU_P (cpu_features, XSAVEC) to check if
            XSAVEC is available.  Set the bit_arch_XXX_Usable bits.
            (init_cpu_features): Use _Static_assert on
            index_arch_Fast_Unaligned_Load.
            __get_cpuid_registers and __get_arch_feature.  Updated for
            cpu_features_basic.  Set stepping in cpu_features.
            * sysdeps/x86/cpu-features.h: (FEATURE_INDEX_1): Changed to enum.
            (FEATURE_INDEX_2): New.
            (FEATURE_INDEX_MAX): Changed to enum.
            (COMMON_CPUID_INDEX_D_ECX_1): New.
            (COMMON_CPUID_INDEX_80000007): Likewise.
            (COMMON_CPUID_INDEX_80000008): Likewise.
            (cpuid_registers): Likewise.
            (cpu_features_basic): Likewise.
            (CPU_FEATURE_USABLE): Likewise.
            (bit_arch_XXX_Usable): Likewise.
            (cpu_features): Use cpuid_registers and cpu_features_basic.
            (bit_arch_XXX): Reweritten.
            (bit_cpu_XXX): Likewise.
            (index_cpu_XXX): Likewise.
            (reg_XXX): Likewise.
            * sysdeps/x86/tst-get-cpu-features.c: Include <stdio.h> and
            <support/check.h>.
            (CHECK_CPU_FEATURE): New.
            (CHECK_CPU_FEATURE_USABLE): Likewise.
            (cpu_kinds): Likewise.
            (do_test): Print vendor, family, model and stepping.  Check
            HAS_CPU_FEATURE and CPU_FEATURE_USABLE.
            (TEST_FUNCTION): Removed.
            Include <support/test-driver.c> instead of
            "../../test-skeleton.c".
            * sysdeps/x86_64/multiarch/sched_cpucount.c (__sched_cpucount):
            Check POPCNT instead of POPCOUNT.
            * sysdeps/x86_64/multiarch/test-multiarch.c (do_test): Likewise.

Backport difference: Adjustments to previous cache sizing
backports (which happened later upstream).

diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
index 57c36d030a76c8b2..f1125f30223f5ca3 100644
--- a/sysdeps/x86/cacheinfo.c
+++ b/sysdeps/x86/cacheinfo.c
@@ -205,8 +205,8 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
 	      /* Intel reused this value.  For family 15, model 6 it
 		 specifies the 3rd level cache.  Otherwise the 2nd
 		 level cache.  */
-	      unsigned int family = cpu_features->family;
-	      unsigned int model = cpu_features->model;
+	      unsigned int family = cpu_features->basic.family;
+	      unsigned int model = cpu_features->basic.model;
 
 	      if (family == 15 && model == 6)
 		{
@@ -258,7 +258,7 @@ intel_check_word (int name, unsigned int value, bool *has_level_2,
 static long int __attribute__ ((noinline))
 handle_intel (int name, const struct cpu_features *cpu_features)
 {
-  unsigned int maxidx = cpu_features->max_cpuid;
+  unsigned int maxidx = cpu_features->basic.max_cpuid;
 
   /* Return -1 for older CPUs.  */
   if (maxidx < 2)
@@ -443,10 +443,10 @@ __cache_sysconf (int name)
 {
   const struct cpu_features *cpu_features = __get_cpu_features ();
 
-  if (cpu_features->kind == arch_kind_intel)
+  if (cpu_features->basic.kind == arch_kind_intel)
     return handle_intel (name, cpu_features);
 
-  if (cpu_features->kind == arch_kind_amd)
+  if (cpu_features->basic.kind == arch_kind_amd)
     return handle_amd (name);
 
   // XXX Fill in more vendors.
@@ -497,9 +497,9 @@ init_cacheinfo (void)
   unsigned int level;
   unsigned int threads = 0;
   const struct cpu_features *cpu_features = __get_cpu_features ();
-  int max_cpuid = cpu_features->max_cpuid;
+  int max_cpuid = cpu_features->basic.max_cpuid;
 
-  if (cpu_features->kind == arch_kind_intel)
+  if (cpu_features->basic.kind == arch_kind_intel)
     {
       data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
 
@@ -538,8 +538,8 @@ init_cacheinfo (void)
 	     highest cache level.  */
 	  if (max_cpuid >= 4)
 	    {
-	      unsigned int family = cpu_features->family;
-	      unsigned int model = cpu_features->model;
+	      unsigned int family = cpu_features->basic.family;
+	      unsigned int model = cpu_features->basic.model;
 
 	      int i = 0;
 
@@ -700,7 +700,7 @@ intel_bug_no_cache_info:
 	  shared += core;
 	}
     }
-  else if (cpu_features->kind == arch_kind_amd)
+  else if (cpu_features->basic.kind == arch_kind_amd)
     {
       data   = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
       long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
@@ -722,7 +722,7 @@ intel_bug_no_cache_info:
 	      threads = 1 << ((ecx >> 12) & 0x0f);
 	    }
 
-	  if (threads == 0 || cpu_features->family >= 0x17)
+	  if (threads == 0 || cpu_features->basic.family >= 0x17)
 	    {
 	      /* If APIC ID width is not available, use logical
 		 processor count.  */
@@ -738,7 +738,7 @@ intel_bug_no_cache_info:
 	    shared /= threads;
 
 	  /* Get shared cache per ccx for Zen architectures.  */
-	  if (cpu_features->family >= 0x17)
+	  if (cpu_features->basic.family >= 0x17)
 	    {
 	      unsigned int eax;
 
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 3b268efbce627e6c..3a02a9c7d08f9603 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -52,7 +52,18 @@ get_extended_indices (struct cpu_features *cpu_features)
 	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ebx,
 	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].ecx,
 	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000001].edx);
-
+  if (eax >= 0x80000007)
+    __cpuid (0x80000007,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].eax,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ebx,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].ecx,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000007].edx);
+  if (eax >= 0x80000008)
+    __cpuid (0x80000008,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].eax,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ebx,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].ecx,
+	     cpu_features->cpuid[COMMON_CPUID_INDEX_80000008].edx);
 }
 
 static void
@@ -78,13 +89,20 @@ get_common_indices (struct cpu_features *cpu_features,
 	}
     }
 
-  if (cpu_features->max_cpuid >= 7)
+  if (cpu_features->basic.max_cpuid >= 7)
     __cpuid_count (7, 0,
 		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
 		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
 		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
 		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
 
+  if (cpu_features->basic.max_cpuid >= 0xd)
+    __cpuid_count (0xd, 1,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].eax,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ebx,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].ecx,
+		   cpu_features->cpuid[COMMON_CPUID_INDEX_D_ECX_1].edx);
+
   /* Can we call xgetbv?  */
   if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
     {
@@ -116,6 +134,18 @@ get_common_indices (struct cpu_features *cpu_features,
 	      if (CPU_FEATURES_CPU_P (cpu_features, FMA))
 		cpu_features->feature[index_arch_FMA_Usable]
 		  |= bit_arch_FMA_Usable;
+	      /* Determine if VAES is usable.  */
+	      if (CPU_FEATURES_CPU_P (cpu_features, VAES))
+		cpu_features->feature[index_arch_VAES_Usable]
+		  |= bit_arch_VAES_Usable;
+	      /* Determine if VPCLMULQDQ is usable.  */
+	      if (CPU_FEATURES_CPU_P (cpu_features, VPCLMULQDQ))
+		cpu_features->feature[index_arch_VPCLMULQDQ_Usable]
+		  |= bit_arch_VPCLMULQDQ_Usable;
+	      /* Determine if XOP is usable.  */
+	      if (CPU_FEATURES_CPU_P (cpu_features, XOP))
+		cpu_features->feature[index_arch_XOP_Usable]
+		  |= bit_arch_XOP_Usable;
 	    }
 
 	  /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
@@ -129,17 +159,69 @@ get_common_indices (struct cpu_features *cpu_features,
 		{
 		  cpu_features->feature[index_arch_AVX512F_Usable]
 		    |= bit_arch_AVX512F_Usable;
+		  /* Determine if AVX512CD is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512CD))
+		    cpu_features->feature[index_arch_AVX512CD_Usable]
+		      |= bit_arch_AVX512CD_Usable;
+		  /* Determine if AVX512ER is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512ER))
+		    cpu_features->feature[index_arch_AVX512ER_Usable]
+		      |= bit_arch_AVX512ER_Usable;
+		  /* Determine if AVX512PF is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512PF))
+		    cpu_features->feature[index_arch_AVX512PF_Usable]
+		      |= bit_arch_AVX512PF_Usable;
+		  /* Determine if AVX512VL is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512VL))
+		    cpu_features->feature[index_arch_AVX512VL_Usable]
+		      |= bit_arch_AVX512VL_Usable;
 		  /* Determine if AVX512DQ is usable.  */
 		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512DQ))
 		    cpu_features->feature[index_arch_AVX512DQ_Usable]
 		      |= bit_arch_AVX512DQ_Usable;
+		  /* Determine if AVX512BW is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512BW))
+		    cpu_features->feature[index_arch_AVX512BW_Usable]
+		      |= bit_arch_AVX512BW_Usable;
+		  /* Determine if AVX512_4FMAPS is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4FMAPS))
+		    cpu_features->feature[index_arch_AVX512_4FMAPS_Usable]
+		      |= bit_arch_AVX512_4FMAPS_Usable;
+		  /* Determine if AVX512_4VNNIW is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_4VNNIW))
+		    cpu_features->feature[index_arch_AVX512_4VNNIW_Usable]
+		      |= bit_arch_AVX512_4VNNIW_Usable;
+		  /* Determine if AVX512_BITALG is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BITALG))
+		    cpu_features->feature[index_arch_AVX512_BITALG_Usable]
+		      |= bit_arch_AVX512_BITALG_Usable;
+		  /* Determine if AVX512_IFMA is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_IFMA))
+		    cpu_features->feature[index_arch_AVX512_IFMA_Usable]
+		      |= bit_arch_AVX512_IFMA_Usable;
+		  /* Determine if AVX512_VBMI is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI))
+		    cpu_features->feature[index_arch_AVX512_VBMI_Usable]
+		      |= bit_arch_AVX512_VBMI_Usable;
+		  /* Determine if AVX512_VBMI2 is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VBMI2))
+		    cpu_features->feature[index_arch_AVX512_VBMI2_Usable]
+		      |= bit_arch_AVX512_VBMI2_Usable;
+		  /* Determine if is AVX512_VNNI usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VNNI))
+		    cpu_features->feature[index_arch_AVX512_VNNI_Usable]
+		      |= bit_arch_AVX512_VNNI_Usable;
+		  /* Determine if AVX512_VPOPCNTDQ is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ))
+		    cpu_features->feature[index_arch_AVX512_VPOPCNTDQ_Usable]
+		      |= bit_arch_AVX512_VPOPCNTDQ_Usable;
 		}
 	    }
 	}
 
       /* For _dl_runtime_resolve, set xsave_state_size to xsave area
 	 size + integer register save size and align it to 64 bytes.  */
-      if (cpu_features->max_cpuid >= 0xd)
+      if (cpu_features->basic.max_cpuid >= 0xd)
 	{
 	  unsigned int eax, ebx, ecx, edx;
 
@@ -154,10 +236,8 @@ get_common_indices (struct cpu_features *cpu_features,
 	      cpu_features->xsave_state_full_size
 		= xsave_state_full_size;
 
-	      __cpuid_count (0xd, 1, eax, ebx, ecx, edx);
-
 	      /* Check if XSAVEC is available.  */
-	      if ((eax & (1 << 1)) != 0)
+	      if (CPU_FEATURES_CPU_P (cpu_features, XSAVEC))
 		{
 		  unsigned int xstate_comp_offsets[32];
 		  unsigned int xstate_comp_sizes[32];
@@ -209,12 +289,25 @@ get_common_indices (struct cpu_features *cpu_features,
     }
 }
 
+_Static_assert (((index_arch_Fast_Unaligned_Load
+		  == index_arch_Fast_Unaligned_Copy)
+		 && (index_arch_Fast_Unaligned_Load
+		     == index_arch_Prefer_PMINUB_for_stringop)
+		 && (index_arch_Fast_Unaligned_Load
+		     == index_arch_Slow_SSE4_2)
+		 && (index_arch_Fast_Unaligned_Load
+		     == index_arch_Fast_Rep_String)
+		 && (index_arch_Fast_Unaligned_Load
+		     == index_arch_Fast_Copy_Backward)),
+		"Incorrect index_arch_Fast_Unaligned_Load");
+
 static inline void
 init_cpu_features (struct cpu_features *cpu_features)
 {
   unsigned int ebx, ecx, edx;
   unsigned int family = 0;
   unsigned int model = 0;
+  unsigned int stepping = 0;
   enum cpu_features_kind kind;
 
 #if !HAS_CPUID
@@ -225,12 +318,12 @@ init_cpu_features (struct cpu_features *cpu_features)
     }
 #endif
 
-  __cpuid (0, cpu_features->max_cpuid, ebx, ecx, edx);
+  __cpuid (0, cpu_features->basic.max_cpuid, ebx, ecx, edx);
 
   /* This spells out "GenuineIntel".  */
   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
     {
-      unsigned int extended_model, stepping;
+      unsigned int extended_model;
 
       kind = arch_kind_intel;
 
@@ -269,15 +362,6 @@ init_cpu_features (struct cpu_features *cpu_features)
 	    case 0x5d:
 	      /* Unaligned load versions are faster than SSSE3
 		 on Silvermont.  */
-#if index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
-# error index_arch_Fast_Unaligned_Load != index_arch_Prefer_PMINUB_for_stringop
-#endif
-#if index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
-# error index_arch_Fast_Unaligned_Load != index_arch_Slow_SSE4_2
-#endif
-#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
-# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Unaligned_Copy
-#endif
 	      cpu_features->feature[index_arch_Fast_Unaligned_Load]
 		|= (bit_arch_Fast_Unaligned_Load
 		    | bit_arch_Fast_Unaligned_Copy
@@ -300,15 +384,6 @@ init_cpu_features (struct cpu_features *cpu_features)
 	    case 0x2f:
 	      /* Rep string instructions, unaligned load, unaligned copy,
 		 and pminub are fast on Intel Core i3, i5 and i7.  */
-#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
-# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Load
-#endif
-#if index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
-# error index_arch_Fast_Rep_String != index_arch_Prefer_PMINUB_for_stringop
-#endif
-#if index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
-# error index_arch_Fast_Rep_String != index_arch_Fast_Unaligned_Copy
-#endif
 	      cpu_features->feature[index_arch_Fast_Rep_String]
 		|= (bit_arch_Fast_Rep_String
 		    | bit_arch_Fast_Unaligned_Load
@@ -352,7 +427,7 @@ init_cpu_features (struct cpu_features *cpu_features)
   /* This spells out "AuthenticAMD".  */
   else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
     {
-      unsigned int extended_model, stepping;
+      unsigned int extended_model;
 
       kind = arch_kind_amd;
 
@@ -374,9 +449,6 @@ init_cpu_features (struct cpu_features *cpu_features)
 
       if (family == 0x15)
 	{
-#if index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
-# error index_arch_Fast_Unaligned_Load != index_arch_Fast_Copy_Backward
-#endif
 	  /* "Excavator"   */
 	  if (model >= 0x60 && model <= 0x7f)
 	  {
@@ -408,9 +480,10 @@ init_cpu_features (struct cpu_features *cpu_features)
 no_cpuid:
 #endif
 
-  cpu_features->family = family;
-  cpu_features->model = model;
-  cpu_features->kind = kind;
+  cpu_features->basic.kind = kind;
+  cpu_features->basic.family = family;
+  cpu_features->basic.model = model;
+  cpu_features->basic.stepping = stepping;
 
 #if HAVE_TUNABLES
   TUNABLE_GET (hwcaps, tunable_val_t *, TUNABLE_CALLBACK (set_hwcaps));
@@ -431,7 +504,7 @@ no_cpuid:
 
 #ifdef __x86_64__
   GLRO(dl_hwcap) = HWCAP_X86_64;
-  if (cpu_features->kind == arch_kind_intel)
+  if (cpu_features->basic.kind == arch_kind_intel)
     {
       const char *platform = NULL;
 
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index fb22d7b9d6226a92..4917182e99a8ee90 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -18,108 +18,58 @@
 #ifndef cpu_features_h
 #define cpu_features_h
 
-#define bit_arch_Fast_Rep_String		(1 << 0)
-#define bit_arch_Fast_Copy_Backward		(1 << 1)
-#define bit_arch_Slow_BSF			(1 << 2)
-#define bit_arch_Fast_Unaligned_Load		(1 << 4)
-#define bit_arch_Prefer_PMINUB_for_stringop	(1 << 5)
-#define bit_arch_AVX_Usable			(1 << 6)
-#define bit_arch_FMA_Usable			(1 << 7)
-#define bit_arch_FMA4_Usable			(1 << 8)
-#define bit_arch_Slow_SSE4_2			(1 << 9)
-#define bit_arch_AVX2_Usable			(1 << 10)
-#define bit_arch_AVX_Fast_Unaligned_Load	(1 << 11)
-#define bit_arch_AVX512F_Usable			(1 << 12)
-#define bit_arch_AVX512DQ_Usable		(1 << 13)
-#define bit_arch_I586				(1 << 14)
-#define bit_arch_I686				(1 << 15)
-#define bit_arch_Prefer_MAP_32BIT_EXEC		(1 << 16)
-#define bit_arch_Prefer_No_VZEROUPPER		(1 << 17)
-#define bit_arch_Fast_Unaligned_Copy		(1 << 18)
-#define bit_arch_Prefer_ERMS			(1 << 19)
-#define bit_arch_Prefer_No_AVX512		(1 << 20)
-#define bit_arch_MathVec_Prefer_No_AVX512	(1 << 21)
-#define bit_arch_XSAVEC_Usable			(1 << 22)
-#define bit_arch_Prefer_FSRM			(1 << 23)
-
-/* CPUID Feature flags.  */
-
-/* COMMON_CPUID_INDEX_1.  */
-#define bit_cpu_CX8		(1 << 8)
-#define bit_cpu_CMOV		(1 << 15)
-#define bit_cpu_SSE		(1 << 25)
-#define bit_cpu_SSE2		(1 << 26)
-#define bit_cpu_SSSE3		(1 << 9)
-#define bit_cpu_SSE4_1		(1 << 19)
-#define bit_cpu_SSE4_2		(1 << 20)
-#define bit_cpu_OSXSAVE		(1 << 27)
-#define bit_cpu_AVX		(1 << 28)
-#define bit_cpu_POPCOUNT	(1 << 23)
-#define bit_cpu_FMA		(1 << 12)
-#define bit_cpu_FMA4		(1 << 16)
-#define bit_cpu_HTT		(1 << 28)
-#define bit_cpu_LZCNT		(1 << 5)
-#define bit_cpu_MOVBE		(1 << 22)
-#define bit_cpu_POPCNT		(1 << 23)
-
-/* COMMON_CPUID_INDEX_7.  */
-#define bit_cpu_BMI1		(1 << 3)
-#define bit_cpu_BMI2		(1 << 8)
-#define bit_cpu_ERMS		(1 << 9)
-#define bit_cpu_RTM		(1 << 11)
-#define bit_cpu_AVX2		(1 << 5)
-#define bit_cpu_AVX512F		(1 << 16)
-#define bit_cpu_AVX512DQ	(1 << 17)
-#define bit_cpu_AVX512PF	(1 << 26)
-#define bit_cpu_AVX512ER	(1 << 27)
-#define bit_cpu_AVX512CD	(1 << 28)
-#define bit_cpu_AVX512BW	(1 << 30)
-#define bit_cpu_AVX512VL	(1u << 31)
-#define bit_cpu_IBT		(1u << 20)
-#define bit_cpu_SHSTK		(1u << 7)
-#define bit_cpu_FSRM		(1 << 4)
-
-/* XCR0 Feature flags.  */
-#define bit_XMM_state		(1 << 1)
-#define bit_YMM_state		(1 << 2)
-#define bit_Opmask_state	(1 << 5)
-#define bit_ZMM0_15_state	(1 << 6)
-#define bit_ZMM16_31_state	(1 << 7)
+enum
+{
+  /* The integer bit array index for the first set of internal feature
+     bits.  */
+  FEATURE_INDEX_1 = 0,
+  FEATURE_INDEX_2,
+  /* The current maximum size of the feature integer bit array.  */
+  FEATURE_INDEX_MAX
+};
 
-/* The integer bit array index for the first set of internal feature bits.  */
-#define FEATURE_INDEX_1 0
+enum
+{
+  COMMON_CPUID_INDEX_1 = 0,
+  COMMON_CPUID_INDEX_7,
+  COMMON_CPUID_INDEX_80000001,
+  COMMON_CPUID_INDEX_D_ECX_1,
+  COMMON_CPUID_INDEX_80000007,
+  COMMON_CPUID_INDEX_80000008,
+  /* Keep the following line at the end.  */
+  COMMON_CPUID_INDEX_MAX
+};
 
-/* The current maximum size of the feature integer bit array.  */
-#define FEATURE_INDEX_MAX 1
+struct cpuid_registers
+{
+  unsigned int eax;
+  unsigned int ebx;
+  unsigned int ecx;
+  unsigned int edx;
+};
 
-enum
-  {
-    COMMON_CPUID_INDEX_1 = 0,
-    COMMON_CPUID_INDEX_7,
-    COMMON_CPUID_INDEX_80000001,
-    /* Keep the following line at the end.  */
-    COMMON_CPUID_INDEX_MAX
-  };
+enum cpu_features_kind
+{
+  arch_kind_unknown = 0,
+  arch_kind_intel,
+  arch_kind_amd,
+  arch_kind_other
+};
 
-struct cpu_features
+struct cpu_features_basic
 {
-  enum cpu_features_kind
-    {
-      arch_kind_unknown = 0,
-      arch_kind_intel,
-      arch_kind_amd,
-      arch_kind_other
-    } kind;
+  enum cpu_features_kind kind;
   int max_cpuid;
-  struct cpuid_registers
-  {
-    unsigned int eax;
-    unsigned int ebx;
-    unsigned int ecx;
-    unsigned int edx;
-  } cpuid[COMMON_CPUID_INDEX_MAX];
   unsigned int family;
   unsigned int model;
+  unsigned int stepping;
+};
+
+struct cpu_features
+{
+  struct cpuid_registers cpuid[COMMON_CPUID_INDEX_MAX];
+  unsigned int feature[FEATURE_INDEX_MAX];
+  struct cpu_features_basic basic;
   /* The state size for XSAVEC or XSAVE.  The type must be unsigned long
      int so that we use
 
@@ -132,7 +82,6 @@ struct cpu_features
      GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC_Usable
    */
   unsigned int xsave_state_full_size;
-  unsigned int feature[FEATURE_INDEX_MAX];
   /* Data cache size for use in memory and string routines, typically
      L1 size.  */
   unsigned long int data_cache_size;
@@ -148,112 +97,838 @@ struct cpu_features
 extern const struct cpu_features *__get_cpu_features (void)
      __attribute__ ((const));
 
-# if defined (_LIBC) && !IS_IN (nonlib)
-/* Unused for x86.  */
-#  define INIT_ARCH()
-#  define __get_cpu_features()	(&GLRO(dl_x86_cpu_features))
-# endif
-
-
 /* Only used directly in cpu-features.c.  */
 # define CPU_FEATURES_CPU_P(ptr, name) \
   ((ptr->cpuid[index_cpu_##name].reg_##name & (bit_cpu_##name)) != 0)
 # define CPU_FEATURES_ARCH_P(ptr, name) \
   ((ptr->feature[index_arch_##name] & (bit_arch_##name)) != 0)
 
-/* HAS_* evaluates to true if we may use the feature at runtime.  */
-# define HAS_CPU_FEATURE(name) \
-   CPU_FEATURES_CPU_P (__get_cpu_features (), name)
+/* HAS_CPU_FEATURE evaluates to true if CPU supports the feature.  */
+#define HAS_CPU_FEATURE(name) \
+  CPU_FEATURES_CPU_P (__get_cpu_features (), name)
+/* HAS_ARCH_FEATURE evaluates to true if we may use the feature at
+   runtime.  */
 # define HAS_ARCH_FEATURE(name) \
-   CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
-
-# define index_cpu_CX8		COMMON_CPUID_INDEX_1
-# define index_cpu_CMOV		COMMON_CPUID_INDEX_1
-# define index_cpu_SSE		COMMON_CPUID_INDEX_1
-# define index_cpu_SSE2		COMMON_CPUID_INDEX_1
-# define index_cpu_SSSE3	COMMON_CPUID_INDEX_1
-# define index_cpu_SSE4_1	COMMON_CPUID_INDEX_1
-# define index_cpu_SSE4_2	COMMON_CPUID_INDEX_1
-# define index_cpu_AVX		COMMON_CPUID_INDEX_1
-# define index_cpu_AVX2		COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512F	COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512DQ	COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512PF	COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512ER	COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512CD	COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512BW	COMMON_CPUID_INDEX_7
-# define index_cpu_AVX512VL	COMMON_CPUID_INDEX_7
-# define index_cpu_ERMS		COMMON_CPUID_INDEX_7
-# define index_cpu_RTM		COMMON_CPUID_INDEX_7
-# define index_cpu_FMA		COMMON_CPUID_INDEX_1
-# define index_cpu_FMA4		COMMON_CPUID_INDEX_80000001
-# define index_cpu_POPCOUNT	COMMON_CPUID_INDEX_1
-# define index_cpu_OSXSAVE	COMMON_CPUID_INDEX_1
-# define index_cpu_HTT		COMMON_CPUID_INDEX_1
-# define index_cpu_BMI1		COMMON_CPUID_INDEX_7
-# define index_cpu_BMI2		COMMON_CPUID_INDEX_7
-# define index_cpu_LZCNT	COMMON_CPUID_INDEX_80000001
-# define index_cpu_MOVBE	COMMON_CPUID_INDEX_1
-# define index_cpu_POPCNT	COMMON_CPUID_INDEX_1
-# define index_cpu_IBT		COMMON_CPUID_INDEX_7
-# define index_cpu_SHSTK	COMMON_CPUID_INDEX_7
-# define index_cpu_FSRM		COMMON_CPUID_INDEX_7
-
-# define reg_CX8		edx
-# define reg_CMOV		edx
-# define reg_SSE		edx
-# define reg_SSE2		edx
-# define reg_SSSE3		ecx
-# define reg_SSE4_1		ecx
-# define reg_SSE4_2		ecx
-# define reg_AVX		ecx
-# define reg_AVX2		ebx
-# define reg_AVX512F		ebx
-# define reg_AVX512DQ		ebx
-# define reg_AVX512PF		ebx
-# define reg_AVX512ER		ebx
-# define reg_AVX512CD		ebx
-# define reg_AVX512BW		ebx
-# define reg_AVX512VL		ebx
-# define reg_ERMS		ebx
-# define reg_RTM		ebx
-# define reg_FMA		ecx
-# define reg_FMA4		ecx
-# define reg_POPCOUNT		ecx
-# define reg_OSXSAVE		ecx
-# define reg_HTT		edx
-# define reg_BMI1		ebx
-# define reg_BMI2		ebx
-# define reg_LZCNT		ecx
-# define reg_MOVBE		ecx
-# define reg_POPCNT		ecx
-# define reg_IBT		edx
-# define reg_SHSTK		ecx
-# define reg_FSRM		edx
-
-# define index_arch_Fast_Rep_String	FEATURE_INDEX_1
-# define index_arch_Fast_Copy_Backward	FEATURE_INDEX_1
-# define index_arch_Slow_BSF		FEATURE_INDEX_1
-# define index_arch_Fast_Unaligned_Load	FEATURE_INDEX_1
-# define index_arch_Prefer_PMINUB_for_stringop FEATURE_INDEX_1
-# define index_arch_AVX_Usable		FEATURE_INDEX_1
-# define index_arch_FMA_Usable		FEATURE_INDEX_1
-# define index_arch_FMA4_Usable		FEATURE_INDEX_1
-# define index_arch_Slow_SSE4_2		FEATURE_INDEX_1
-# define index_arch_AVX2_Usable		FEATURE_INDEX_1
-# define index_arch_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
-# define index_arch_AVX512F_Usable	FEATURE_INDEX_1
-# define index_arch_AVX512DQ_Usable	FEATURE_INDEX_1
-# define index_arch_I586		FEATURE_INDEX_1
-# define index_arch_I686		FEATURE_INDEX_1
-# define index_arch_Prefer_MAP_32BIT_EXEC FEATURE_INDEX_1
-# define index_arch_Prefer_No_VZEROUPPER FEATURE_INDEX_1
-# define index_arch_Fast_Unaligned_Copy	FEATURE_INDEX_1
-# define index_arch_Prefer_ERMS		FEATURE_INDEX_1
-# define index_arch_Prefer_No_AVX512	FEATURE_INDEX_1
-# define index_arch_MathVec_Prefer_No_AVX512 FEATURE_INDEX_1
-# define index_arch_XSAVEC_Usable	FEATURE_INDEX_1
-# define index_arch_Prefer_FSRM		FEATURE_INDEX_1
+  CPU_FEATURES_ARCH_P (__get_cpu_features (), name)
+/* CPU_FEATURE_USABLE evaluates to true if the feature is usable.  */
+#define CPU_FEATURE_USABLE(name)				\
+  ((need_arch_feature_##name && HAS_ARCH_FEATURE (name##_Usable))	\
+   || (!need_arch_feature_##name && HAS_CPU_FEATURE(name)))
+
+/* Architecture features.  */
+
+/* FEATURE_INDEX_1.  */
+#define bit_arch_AVX_Usable			(1u << 0)
+#define bit_arch_AVX2_Usable			(1u << 1)
+#define bit_arch_AVX512F_Usable			(1u << 2)
+#define bit_arch_AVX512CD_Usable		(1u << 3)
+#define bit_arch_AVX512ER_Usable		(1u << 4)
+#define bit_arch_AVX512PF_Usable		(1u << 5)
+#define bit_arch_AVX512VL_Usable		(1u << 6)
+#define bit_arch_AVX512DQ_Usable		(1u << 7)
+#define bit_arch_AVX512BW_Usable		(1u << 8)
+#define bit_arch_AVX512_4FMAPS_Usable		(1u << 9)
+#define bit_arch_AVX512_4VNNIW_Usable		(1u << 10)
+#define bit_arch_AVX512_BITALG_Usable		(1u << 11)
+#define bit_arch_AVX512_IFMA_Usable		(1u << 12)
+#define bit_arch_AVX512_VBMI_Usable		(1u << 13)
+#define bit_arch_AVX512_VBMI2_Usable		(1u << 14)
+#define bit_arch_AVX512_VNNI_Usable		(1u << 15)
+#define bit_arch_AVX512_VPOPCNTDQ_Usable	(1u << 16)
+#define bit_arch_FMA_Usable			(1u << 17)
+#define bit_arch_FMA4_Usable			(1u << 18)
+#define bit_arch_VAES_Usable			(1u << 19)
+#define bit_arch_VPCLMULQDQ_Usable		(1u << 20)
+#define bit_arch_XOP_Usable			(1u << 21)
+#define bit_arch_XSAVEC_Usable			(1u << 22)
+
+#define index_arch_AVX_Usable			FEATURE_INDEX_1
+#define index_arch_AVX2_Usable			FEATURE_INDEX_1
+#define index_arch_AVX512F_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512CD_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512ER_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512PF_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512VL_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512BW_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512DQ_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_4FMAPS_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_4VNNIW_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_BITALG_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_IFMA_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_VBMI_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_VBMI2_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_VNNI_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_VPOPCNTDQ_Usable	FEATURE_INDEX_1
+#define index_arch_FMA_Usable			FEATURE_INDEX_1
+#define index_arch_FMA4_Usable			FEATURE_INDEX_1
+#define index_arch_VAES_Usable			FEATURE_INDEX_1
+#define index_arch_VPCLMULQDQ_Usable		FEATURE_INDEX_1
+#define index_arch_XOP_Usable			FEATURE_INDEX_1
+#define index_arch_XSAVEC_Usable		FEATURE_INDEX_1
+
+/* Unused.  Compiler will optimize them out.  */
+#define bit_arch_SSE3_Usable			(1u << 0)
+#define bit_arch_PCLMULQDQ_Usable		(1u << 0)
+#define bit_arch_SSSE3_Usable			(1u << 0)
+#define bit_arch_CMPXCHG16B_Usable		(1u << 0)
+#define bit_arch_SSE4_1_Usable			(1u << 0)
+#define bit_arch_SSE4_2_Usable			(1u << 0)
+#define bit_arch_MOVBE_Usable			(1u << 0)
+#define bit_arch_POPCNT_Usable			(1u << 0)
+#define bit_arch_AES_Usable			(1u << 0)
+#define bit_arch_XSAVE_Usable			(1u << 0)
+#define bit_arch_OSXSAVE_Usable			(1u << 0)
+#define bit_arch_F16C_Usable			(1u << 0)
+#define bit_arch_RDRAND_Usable			(1u << 0)
+#define bit_arch_FPU_Usable			(1u << 0)
+#define bit_arch_TSC_Usable			(1u << 0)
+#define bit_arch_MSR_Usable			(1u << 0)
+#define bit_arch_CX8_Usable			(1u << 0)
+#define bit_arch_SEP_Usable			(1u << 0)
+#define bit_arch_CMOV_Usable			(1u << 0)
+#define bit_arch_CLFSH_Usable			(1u << 0)
+#define bit_arch_MMX_Usable			(1u << 0)
+#define bit_arch_FXSR_Usable			(1u << 0)
+#define bit_arch_SSE_Usable			(1u << 0)
+#define bit_arch_SSE2_Usable			(1u << 0)
+#define bit_arch_FSGSBASE_Usable		(1u << 0)
+#define bit_arch_BMI1_Usable			(1u << 0)
+#define bit_arch_HLE_Usable			(1u << 0)
+#define bit_arch_BMI2_Usable			(1u << 0)
+#define bit_arch_ERMS_Usable			(1u << 0)
+#define bit_arch_RTM_Usable			(1u << 0)
+#define bit_arch_RDSEED_Usable			(1u << 0)
+#define bit_arch_ADX_Usable			(1u << 0)
+#define bit_arch_CLFLUSHOPT_Usable		(1u << 0)
+#define bit_arch_CLWB_Usable			(1u << 0)
+#define bit_arch_SHA_Usable			(1u << 0)
+#define bit_arch_PREFETCHWT1_Usable		(1u << 0)
+#define bit_arch_GFNI_Usable			(1u << 0)
+#define bit_arch_RDPID_Usable			(1u << 0)
+#define bit_arch_CLDEMOTE_Usable		(1u << 0)
+#define bit_arch_MOVDIRI_Usable			(1u << 0)
+#define bit_arch_MOVDIR64B_Usable		(1u << 0)
+#define bit_arch_FSRM_Usable			(1u << 0)
+#define bit_arch_LAHF64_SAHF64_Usable		(1u << 0)
+#define bit_arch_SVM_Usable			(1u << 0)
+#define bit_arch_LZCNT_Usable			(1u << 0)
+#define bit_arch_SSE4A_Usable			(1u << 0)
+#define bit_arch_PREFETCHW_Usable		(1u << 0)
+#define bit_arch_TBM_Usable			(1u << 0)
+#define bit_arch_SYSCALL_SYSRET_Usable		(1u << 0)
+#define bit_arch_RDTSCP_Usable			(1u << 0)
+#define bit_arch_XSAVEOPT_Usable		(1u << 0)
+#define bit_arch_XGETBV_ECX_1_Usable		(1u << 0)
+#define bit_arch_XSAVES_Usable			(1u << 0)
+#define bit_arch_INVARIANT_TSC_Usable		(1u << 0)
+#define bit_arch_WBNOINVD_Usable		(1u << 0)
+
+/* Unused.  Compiler will optimize them out.  */
+#define index_arch_SSE3_Usable			FEATURE_INDEX_1
+#define index_arch_PCLMULQDQ_Usable		FEATURE_INDEX_1
+#define index_arch_SSSE3_Usable			FEATURE_INDEX_1
+#define index_arch_CMPXCHG16B_Usable		FEATURE_INDEX_1
+#define index_arch_SSE4_1_Usable		FEATURE_INDEX_1
+#define index_arch_SSE4_2_Usable		FEATURE_INDEX_1
+#define index_arch_MOVBE_Usable			FEATURE_INDEX_1
+#define index_arch_POPCNT_Usable		FEATURE_INDEX_1
+#define index_arch_AES_Usable			FEATURE_INDEX_1
+#define index_arch_XSAVE_Usable			FEATURE_INDEX_1
+#define index_arch_OSXSAVE_Usable		FEATURE_INDEX_1
+#define index_arch_F16C_Usable			FEATURE_INDEX_1
+#define index_arch_RDRAND_Usable		FEATURE_INDEX_1
+#define index_arch_FPU_Usable			FEATURE_INDEX_1
+#define index_arch_TSC_Usable			FEATURE_INDEX_1
+#define index_arch_MSR_Usable			FEATURE_INDEX_1
+#define index_arch_CX8_Usable			FEATURE_INDEX_1
+#define index_arch_SEP_Usable			FEATURE_INDEX_1
+#define index_arch_CMOV_Usable			FEATURE_INDEX_1
+#define index_arch_CLFSH_Usable			FEATURE_INDEX_1
+#define index_arch_MMX_Usable			FEATURE_INDEX_1
+#define index_arch_FXSR_Usable			FEATURE_INDEX_1
+#define index_arch_SSE_Usable			FEATURE_INDEX_1
+#define index_arch_SSE2_Usable			FEATURE_INDEX_1
+#define index_arch_FSGSBASE_Usable		FEATURE_INDEX_1
+#define index_arch_BMI1_Usable			FEATURE_INDEX_1
+#define index_arch_HLE_Usable			FEATURE_INDEX_1
+#define index_arch_BMI2_Usable			FEATURE_INDEX_1
+#define index_arch_ERMS_Usable			FEATURE_INDEX_1
+#define index_arch_RTM_Usable			FEATURE_INDEX_1
+#define index_arch_RDSEED_Usable		FEATURE_INDEX_1
+#define index_arch_ADX_Usable			FEATURE_INDEX_1
+#define index_arch_CLFLUSHOPT_Usable		FEATURE_INDEX_1
+#define index_arch_CLWB_Usable			FEATURE_INDEX_1
+#define index_arch_SHA_Usable			FEATURE_INDEX_1
+#define index_arch_PREFETCHWT1_Usable		FEATURE_INDEX_1
+#define index_arch_GFNI_Usable			FEATURE_INDEX_1
+#define index_arch_RDPID_Usable			FEATURE_INDEX_1
+#define index_arch_CLDEMOTE_Usable		FEATURE_INDEX_1
+#define index_arch_MOVDIRI_Usable		FEATURE_INDEX_1
+#define index_arch_MOVDIR64B_Usable		FEATURE_INDEX_1
+#define index_arch_FSRM_Usable			FEATURE_INDEX_1
+#define index_arch_LAHF64_SAHF64_Usable		FEATURE_INDEX_1
+#define index_arch_LZCNT_Usable			FEATURE_INDEX_1
+#define index_arch_SSE4A_Usable			FEATURE_INDEX_1
+#define index_arch_PREFETCHW_Usable		FEATURE_INDEX_1
+#define index_arch_TBM_Usable			FEATURE_INDEX_1
+#define index_arch_SYSCALL_SYSRET_Usable	FEATURE_INDEX_1
+#define index_arch_RDTSCP_Usable		FEATURE_INDEX_1
+#define index_arch_XSAVEOPT_Usable		FEATURE_INDEX_1
+#define index_arch_XGETBV_ECX_1_Usable		FEATURE_INDEX_1
+#define index_arch_XSAVES_Usable		FEATURE_INDEX_1
+#define index_arch_INVARIANT_TSC_Usable		FEATURE_INDEX_1
+#define index_arch_WBNOINVD_Usable		FEATURE_INDEX_1
+
+/* COMMON_CPUID_INDEX_1.  */
+
+/* ECX.  */
+#define	need_arch_feature_SSE3			0
+#define	need_arch_feature_PCLMULQDQ		0
+#define need_arch_feature_SSSE3			0
+#define need_arch_feature_FMA			1
+#define need_arch_feature_CMPXCHG16B		0
+#define need_arch_feature_SSE4_1		0
+#define need_arch_feature_SSE4_2		0
+#define need_arch_feature_MOVBE			0
+#define need_arch_feature_POPCNT		0
+#define need_arch_feature_AES			0
+#define need_arch_feature_XSAVE			0
+#define need_arch_feature_OSXSAVE		0
+#define need_arch_feature_AVX			1
+#define need_arch_feature_F16C			0
+#define need_arch_feature_RDRAND		0
+
+/* EDX.  */
+#define need_arch_feature_FPU			0
+#define need_arch_feature_TSC			0
+#define need_arch_feature_MSR			0
+#define need_arch_feature_CX8			0
+#define need_arch_feature_SEP			0
+#define need_arch_feature_CMOV			0
+#define need_arch_feature_CLFSH			0
+#define need_arch_feature_MMX			0
+#define need_arch_feature_FXSR			0
+#define need_arch_feature_SSE			0
+#define need_arch_feature_SSE2			0
+
+/* COMMON_CPUID_INDEX_7.  */
+
+/* EBX.  */
+#define need_arch_feature_FSGSBASE		0
+#define need_arch_feature_BMI1			0
+#define need_arch_feature_HLE			0
+#define need_arch_feature_AVX2			1
+#define need_arch_feature_BMI2			0
+#define need_arch_feature_ERMS			0
+#define need_arch_feature_RTM			0
+#define need_arch_feature_AVX512F		1
+#define need_arch_feature_AVX512DQ		1
+#define need_arch_feature_RDSEED		0
+#define need_arch_feature_ADX			0
+#define need_arch_feature_AVX512_IFMA		1
+#define need_arch_feature_CLFLUSHOPT		0
+#define need_arch_feature_CLWB			0
+#define need_arch_feature_AVX512PF		1
+#define need_arch_feature_AVX512ER		1
+#define need_arch_feature_AVX512CD		1
+#define need_arch_feature_SHA			0
+#define need_arch_feature_AVX512BW		1
+#define need_arch_feature_AVX512VL		1
+
+/* ECX.  */
+#define need_arch_feature_PREFETCHWT1		0
+#define need_arch_feature_AVX512_VBMI		1
+#define need_arch_feature_AVX512_VBMI2		1
+#define need_arch_feature_GFNI			0
+#define need_arch_feature_VAES			1
+#define need_arch_feature_VPCLMULQDQ		1
+#define need_arch_feature_AVX512_VNNI		1
+#define need_arch_feature_AVX512_BITALG		1
+#define need_arch_feature_AVX512_VPOPCNTDQ	1
+#define need_arch_feature_RDPID			0
+#define need_arch_feature_CLDEMOTE		0
+#define need_arch_feature_MOVDIRI		0
+#define need_arch_feature_MOVDIR64B		0
+
+/* EDX.  */
+#define need_arch_feature_AVX512_4VNNIW		1
+#define need_arch_feature_AVX512_4FMAPS		1
+#define need_arch_feature_FSRM			0
+
+/* COMMON_CPUID_INDEX_80000001.  */
+
+/* ECX.  */
+#define need_arch_feature_LAHF64_SAHF64		0
+#define need_arch_feature_LZCNT			0
+#define need_arch_feature_SSE4A			0
+#define need_arch_feature_PREFETCHW		0
+#define need_arch_feature_XOP			1
+#define need_arch_feature_FMA4			1
+#define need_arch_feature_TBM			0
+#define need_arch_feature_SYSCALL_SYSRET	0
+#define need_arch_feature_RDTSCP		0
+#define need_arch_feature_XSAVEOPT		0
+#define need_arch_feature_XSAVEC		1
+#define need_arch_feature_XGETBV_ECX_1		0
+#define need_arch_feature_XSAVES		0
+#define need_arch_feature_INVARIANT_TSC		0
+#define need_arch_feature_WBNOINVD		0
+
+/* CPU features.  */
+
+/* COMMON_CPUID_INDEX_1.  */
+
+/* ECX.  */
+#define bit_cpu_SSE3		(1u << 0)
+#define bit_cpu_PCLMULQDQ	(1u << 1)
+#define bit_cpu_DTES64		(1u << 2)
+#define bit_cpu_MONITOR		(1u << 3)
+#define bit_cpu_DS_CPL		(1u << 4)
+#define bit_cpu_VMX		(1u << 5)
+#define bit_cpu_SMX		(1u << 6)
+#define bit_cpu_EST		(1u << 7)
+#define bit_cpu_TM2		(1u << 8)
+#define bit_cpu_SSSE3		(1u << 9)
+#define bit_cpu_CNXT_ID		(1u << 10)
+#define bit_cpu_SDBG		(1u << 11)
+#define bit_cpu_FMA		(1u << 12)
+#define bit_cpu_CMPXCHG16B	(1u << 13)
+#define bit_cpu_XTPRUPDCTRL	(1u << 14)
+#define bit_cpu_PDCM		(1u << 15)
+#define bit_cpu_PCID		(1u << 17)
+#define bit_cpu_DCA		(1u << 18)
+#define bit_cpu_SSE4_1		(1u << 19)
+#define bit_cpu_SSE4_2		(1u << 20)
+#define bit_cpu_X2APIC		(1u << 21)
+#define bit_cpu_MOVBE		(1u << 22)
+#define bit_cpu_POPCNT		(1u << 23)
+#define bit_cpu_TSC_DEADLINE	(1u << 24)
+#define bit_cpu_AES		(1u << 25)
+#define bit_cpu_XSAVE		(1u << 26)
+#define bit_cpu_OSXSAVE		(1u << 27)
+#define bit_cpu_AVX		(1u << 28)
+#define bit_cpu_F16C		(1u << 29)
+#define bit_cpu_RDRAND		(1u << 30)
+
+/* EDX.  */
+#define bit_cpu_FPU		(1u << 0)
+#define bit_cpu_VME		(1u << 1)
+#define bit_cpu_DE		(1u << 2)
+#define bit_cpu_PSE		(1u << 3)
+#define bit_cpu_TSC		(1u << 4)
+#define bit_cpu_MSR		(1u << 5)
+#define bit_cpu_PAE		(1u << 6)
+#define bit_cpu_MCE		(1u << 7)
+#define bit_cpu_CX8		(1u << 8)
+#define bit_cpu_APIC		(1u << 9)
+#define bit_cpu_SEP		(1u << 11)
+#define bit_cpu_MTRR		(1u << 12)
+#define bit_cpu_PGE		(1u << 13)
+#define bit_cpu_MCA		(1u << 14)
+#define bit_cpu_CMOV		(1u << 15)
+#define bit_cpu_PAT		(1u << 16)
+#define bit_cpu_PSE_36		(1u << 17)
+#define bit_cpu_PSN		(1u << 18)
+#define bit_cpu_CLFSH		(1u << 20)
+#define bit_cpu_DS		(1u << 21)
+#define bit_cpu_ACPI		(1u << 22)
+#define bit_cpu_MMX		(1u << 23)
+#define bit_cpu_FXSR		(1u << 24)
+#define bit_cpu_SSE		(1u << 25)
+#define bit_cpu_SSE2		(1u << 26)
+#define bit_cpu_SS		(1u << 27)
+#define bit_cpu_HTT		(1u << 28)
+#define bit_cpu_TM		(1u << 29)
+#define bit_cpu_PBE		(1u << 31)
+
+/* COMMON_CPUID_INDEX_7.  */
+
+/* EBX.  */
+#define bit_cpu_FSGSBASE	(1u << 0)
+#define bit_cpu_TSC_ADJUST	(1u << 1)
+#define bit_cpu_SGX		(1u << 2)
+#define bit_cpu_BMI1		(1u << 3)
+#define bit_cpu_HLE		(1u << 4)
+#define bit_cpu_AVX2		(1u << 5)
+#define bit_cpu_SMEP		(1u << 7)
+#define bit_cpu_BMI2		(1u << 8)
+#define bit_cpu_ERMS		(1u << 9)
+#define bit_cpu_INVPCID		(1u << 10)
+#define bit_cpu_RTM		(1u << 11)
+#define bit_cpu_PQM		(1u << 12)
+#define bit_cpu_MPX		(1u << 14)
+#define bit_cpu_PQE		(1u << 15)
+#define bit_cpu_AVX512F		(1u << 16)
+#define bit_cpu_AVX512DQ	(1u << 17)
+#define bit_cpu_RDSEED		(1u << 18)
+#define bit_cpu_ADX		(1u << 19)
+#define bit_cpu_SMAP		(1u << 20)
+#define bit_cpu_AVX512_IFMA	(1u << 21)
+#define bit_cpu_CLFLUSHOPT	(1u << 22)
+#define bit_cpu_CLWB		(1u << 24)
+#define bit_cpu_TRACE		(1u << 25)
+#define bit_cpu_AVX512PF	(1u << 26)
+#define bit_cpu_AVX512ER	(1u << 27)
+#define bit_cpu_AVX512CD	(1u << 28)
+#define bit_cpu_SHA		(1u << 29)
+#define bit_cpu_AVX512BW	(1u << 30)
+#define bit_cpu_AVX512VL	(1u << 31)
+
+/* ECX.  */
+#define bit_cpu_PREFETCHWT1	(1u << 0)
+#define bit_cpu_AVX512_VBMI	(1u << 1)
+#define bit_cpu_UMIP		(1u << 2)
+#define bit_cpu_PKU		(1u << 3)
+#define bit_cpu_OSPKE		(1u << 4)
+#define bit_cpu_WAITPKG		(1u << 5)
+#define bit_cpu_AVX512_VBMI2	(1u << 6)
+#define bit_cpu_SHSTK		(1u << 7)
+#define bit_cpu_GFNI		(1u << 8)
+#define bit_cpu_VAES		(1u << 9)
+#define bit_cpu_VPCLMULQDQ	(1u << 10)
+#define bit_cpu_AVX512_VNNI	(1u << 11)
+#define bit_cpu_AVX512_BITALG	(1u << 12)
+#define bit_cpu_AVX512_VPOPCNTDQ (1u << 14)
+#define bit_cpu_RDPID		(1u << 22)
+#define bit_cpu_CLDEMOTE	(1u << 25)
+#define bit_cpu_MOVDIRI		(1u << 27)
+#define bit_cpu_MOVDIR64B	(1u << 28)
+#define bit_cpu_SGX_LC		(1u << 30)
+
+/* EDX.  */
+#define bit_cpu_AVX512_4VNNIW	(1u << 2)
+#define bit_cpu_AVX512_4FMAPS	(1u << 3)
+#define bit_cpu_FSRM		(1u << 4)
+#define bit_cpu_PCONFIG		(1u << 18)
+#define bit_cpu_IBT		(1u << 20)
+#define bit_cpu_IBRS_IBPB	(1u << 26)
+#define bit_cpu_STIBP		(1u << 27)
+#define bit_cpu_CAPABILITIES	(1u << 29)
+#define bit_cpu_SSBD		(1u << 31)
+
+/* COMMON_CPUID_INDEX_80000001.  */
+
+/* ECX.  */
+#define bit_cpu_LAHF64_SAHF64	(1u << 0)
+#define bit_cpu_SVM		(1u << 2)
+#define bit_cpu_LZCNT		(1u << 5)
+#define bit_cpu_SSE4A		(1u << 6)
+#define bit_cpu_PREFETCHW	(1u << 8)
+#define bit_cpu_XOP		(1u << 11)
+#define bit_cpu_LWP		(1u << 15)
+#define bit_cpu_FMA4		(1u << 16)
+#define bit_cpu_TBM		(1u << 21)
+
+/* EDX.  */
+#define bit_cpu_SYSCALL_SYSRET	(1u << 11)
+#define bit_cpu_NX		(1u << 20)
+#define bit_cpu_PAGE1GB		(1u << 26)
+#define bit_cpu_RDTSCP		(1u << 27)
+#define bit_cpu_LM		(1u << 29)
+
+/* COMMON_CPUID_INDEX_D_ECX_1.  */
+
+/* EAX.  */
+#define bit_cpu_XSAVEOPT	(1u << 0)
+#define bit_cpu_XSAVEC		(1u << 1)
+#define bit_cpu_XGETBV_ECX_1	(1u << 2)
+#define bit_cpu_XSAVES		(1u << 3)
+
+/* COMMON_CPUID_INDEX_80000007.  */
+
+/* EDX.  */
+#define bit_cpu_INVARIANT_TSC	(1u << 8)
+
+/* COMMON_CPUID_INDEX_80000008.  */
+
+/* EBX.  */
+#define bit_cpu_WBNOINVD	(1u << 9)
+
+/* COMMON_CPUID_INDEX_1.  */
+
+/* ECX.  */
+#define index_cpu_SSE3		COMMON_CPUID_INDEX_1
+#define index_cpu_PCLMULQDQ	COMMON_CPUID_INDEX_1
+#define index_cpu_DTES64	COMMON_CPUID_INDEX_1
+#define index_cpu_MONITOR	COMMON_CPUID_INDEX_1
+#define index_cpu_DS_CPL	COMMON_CPUID_INDEX_1
+#define index_cpu_VMX		COMMON_CPUID_INDEX_1
+#define index_cpu_SMX		COMMON_CPUID_INDEX_1
+#define index_cpu_EST		COMMON_CPUID_INDEX_1
+#define index_cpu_TM2		COMMON_CPUID_INDEX_1
+#define index_cpu_SSSE3		COMMON_CPUID_INDEX_1
+#define index_cpu_CNXT_ID	COMMON_CPUID_INDEX_1
+#define index_cpu_SDBG		COMMON_CPUID_INDEX_1
+#define index_cpu_FMA		COMMON_CPUID_INDEX_1
+#define index_cpu_CMPXCHG16B	COMMON_CPUID_INDEX_1
+#define index_cpu_XTPRUPDCTRL	COMMON_CPUID_INDEX_1
+#define index_cpu_PDCM		COMMON_CPUID_INDEX_1
+#define index_cpu_PCID		COMMON_CPUID_INDEX_1
+#define index_cpu_DCA		COMMON_CPUID_INDEX_1
+#define index_cpu_SSE4_1	COMMON_CPUID_INDEX_1
+#define index_cpu_SSE4_2	COMMON_CPUID_INDEX_1
+#define index_cpu_X2APIC	COMMON_CPUID_INDEX_1
+#define index_cpu_MOVBE		COMMON_CPUID_INDEX_1
+#define index_cpu_POPCNT	COMMON_CPUID_INDEX_1
+#define index_cpu_TSC_DEADLINE	COMMON_CPUID_INDEX_1
+#define index_cpu_AES		COMMON_CPUID_INDEX_1
+#define index_cpu_XSAVE		COMMON_CPUID_INDEX_1
+#define index_cpu_OSXSAVE	COMMON_CPUID_INDEX_1
+#define index_cpu_AVX		COMMON_CPUID_INDEX_1
+#define index_cpu_F16C		COMMON_CPUID_INDEX_1
+#define index_cpu_RDRAND	COMMON_CPUID_INDEX_1
+
+/* ECX.  */
+#define index_cpu_FPU		COMMON_CPUID_INDEX_1
+#define index_cpu_VME		COMMON_CPUID_INDEX_1
+#define index_cpu_DE		COMMON_CPUID_INDEX_1
+#define index_cpu_PSE		COMMON_CPUID_INDEX_1
+#define index_cpu_TSC		COMMON_CPUID_INDEX_1
+#define index_cpu_MSR		COMMON_CPUID_INDEX_1
+#define index_cpu_PAE		COMMON_CPUID_INDEX_1
+#define index_cpu_MCE		COMMON_CPUID_INDEX_1
+#define index_cpu_CX8		COMMON_CPUID_INDEX_1
+#define index_cpu_APIC		COMMON_CPUID_INDEX_1
+#define index_cpu_SEP		COMMON_CPUID_INDEX_1
+#define index_cpu_MTRR		COMMON_CPUID_INDEX_1
+#define index_cpu_PGE		COMMON_CPUID_INDEX_1
+#define index_cpu_MCA		COMMON_CPUID_INDEX_1
+#define index_cpu_CMOV		COMMON_CPUID_INDEX_1
+#define index_cpu_PAT		COMMON_CPUID_INDEX_1
+#define index_cpu_PSE_36	COMMON_CPUID_INDEX_1
+#define index_cpu_PSN		COMMON_CPUID_INDEX_1
+#define index_cpu_CLFSH		COMMON_CPUID_INDEX_1
+#define index_cpu_DS		COMMON_CPUID_INDEX_1
+#define index_cpu_ACPI		COMMON_CPUID_INDEX_1
+#define index_cpu_MMX		COMMON_CPUID_INDEX_1
+#define index_cpu_FXSR		COMMON_CPUID_INDEX_1
+#define index_cpu_SSE		COMMON_CPUID_INDEX_1
+#define index_cpu_SSE2		COMMON_CPUID_INDEX_1
+#define index_cpu_SS		COMMON_CPUID_INDEX_1
+#define index_cpu_HTT		COMMON_CPUID_INDEX_1
+#define index_cpu_TM		COMMON_CPUID_INDEX_1
+#define index_cpu_PBE		COMMON_CPUID_INDEX_1
+
+/* COMMON_CPUID_INDEX_7.  */
+
+/* EBX.  */
+#define index_cpu_FSGSBASE	COMMON_CPUID_INDEX_7
+#define index_cpu_TSC_ADJUST	COMMON_CPUID_INDEX_7
+#define index_cpu_SGX		COMMON_CPUID_INDEX_7
+#define index_cpu_BMI1		COMMON_CPUID_INDEX_7
+#define index_cpu_HLE		COMMON_CPUID_INDEX_7
+#define index_cpu_AVX2		COMMON_CPUID_INDEX_7
+#define index_cpu_SMEP		COMMON_CPUID_INDEX_7
+#define index_cpu_BMI2		COMMON_CPUID_INDEX_7
+#define index_cpu_ERMS		COMMON_CPUID_INDEX_7
+#define index_cpu_INVPCID	COMMON_CPUID_INDEX_7
+#define index_cpu_RTM		COMMON_CPUID_INDEX_7
+#define index_cpu_PQM		COMMON_CPUID_INDEX_7
+#define index_cpu_MPX		COMMON_CPUID_INDEX_7
+#define index_cpu_PQE		COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512F	COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512DQ	COMMON_CPUID_INDEX_7
+#define index_cpu_RDSEED	COMMON_CPUID_INDEX_7
+#define index_cpu_ADX		COMMON_CPUID_INDEX_7
+#define index_cpu_SMAP		COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_IFMA	COMMON_CPUID_INDEX_7
+#define index_cpu_CLFLUSHOPT	COMMON_CPUID_INDEX_7
+#define index_cpu_CLWB		COMMON_CPUID_INDEX_7
+#define index_cpu_TRACE		COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512PF	COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512ER	COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512CD	COMMON_CPUID_INDEX_7
+#define index_cpu_SHA		COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512BW	COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512VL	COMMON_CPUID_INDEX_7
+
+/* ECX.  */
+#define index_cpu_PREFETCHWT1	COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VBMI	COMMON_CPUID_INDEX_7
+#define index_cpu_UMIP		COMMON_CPUID_INDEX_7
+#define index_cpu_PKU		COMMON_CPUID_INDEX_7
+#define index_cpu_OSPKE		COMMON_CPUID_INDEX_7
+#define index_cpu_WAITPKG	COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VBMI2	COMMON_CPUID_INDEX_7
+#define index_cpu_SHSTK		COMMON_CPUID_INDEX_7
+#define index_cpu_GFNI		COMMON_CPUID_INDEX_7
+#define index_cpu_VAES		COMMON_CPUID_INDEX_7
+#define index_cpu_VPCLMULQDQ	COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VNNI	COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_BITALG COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VPOPCNTDQ COMMON_CPUID_INDEX_7
+#define index_cpu_RDPID		COMMON_CPUID_INDEX_7
+#define index_cpu_CLDEMOTE	COMMON_CPUID_INDEX_7
+#define index_cpu_MOVDIRI	COMMON_CPUID_INDEX_7
+#define index_cpu_MOVDIR64B	COMMON_CPUID_INDEX_7
+#define index_cpu_SGX_LC	COMMON_CPUID_INDEX_7
+
+/* EDX.  */
+#define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_4FMAPS	COMMON_CPUID_INDEX_7
+#define index_cpu_FSRM		COMMON_CPUID_INDEX_7
+#define index_cpu_PCONFIG	COMMON_CPUID_INDEX_7
+#define index_cpu_IBT		COMMON_CPUID_INDEX_7
+#define index_cpu_IBRS_IBPB	COMMON_CPUID_INDEX_7
+#define index_cpu_STIBP		COMMON_CPUID_INDEX_7
+#define index_cpu_CAPABILITIES	COMMON_CPUID_INDEX_7
+#define index_cpu_SSBD		COMMON_CPUID_INDEX_7
+
+/* COMMON_CPUID_INDEX_80000001.  */
+
+/* ECX.  */
+#define index_cpu_LAHF64_SAHF64 COMMON_CPUID_INDEX_80000001
+#define index_cpu_SVM		COMMON_CPUID_INDEX_80000001
+#define index_cpu_LZCNT		COMMON_CPUID_INDEX_80000001
+#define index_cpu_SSE4A		COMMON_CPUID_INDEX_80000001
+#define index_cpu_PREFETCHW	COMMON_CPUID_INDEX_80000001
+#define index_cpu_XOP		COMMON_CPUID_INDEX_80000001
+#define index_cpu_LWP		COMMON_CPUID_INDEX_80000001
+#define index_cpu_FMA4		COMMON_CPUID_INDEX_80000001
+#define index_cpu_TBM		COMMON_CPUID_INDEX_80000001
+
+/* EDX.  */
+#define index_cpu_SYSCALL_SYSRET COMMON_CPUID_INDEX_80000001
+#define index_cpu_NX		COMMON_CPUID_INDEX_80000001
+#define index_cpu_PAGE1GB	COMMON_CPUID_INDEX_80000001
+#define index_cpu_RDTSCP	COMMON_CPUID_INDEX_80000001
+#define index_cpu_LM		COMMON_CPUID_INDEX_80000001
+
+/* COMMON_CPUID_INDEX_D_ECX_1.  */
+
+/* EAX.  */
+#define index_cpu_XSAVEOPT	COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XSAVEC	COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XGETBV_ECX_1	COMMON_CPUID_INDEX_D_ECX_1
+#define index_cpu_XSAVES	COMMON_CPUID_INDEX_D_ECX_1
+
+/* COMMON_CPUID_INDEX_80000007.  */
+
+/* EDX.  */
+#define index_cpu_INVARIANT_TSC	COMMON_CPUID_INDEX_80000007
+
+/* COMMON_CPUID_INDEX_80000008.  */
+
+/* EBX.  */
+#define index_cpu_WBNOINVD	COMMON_CPUID_INDEX_80000008
+
+/* COMMON_CPUID_INDEX_1.  */
+
+/* ECX.  */
+#define reg_SSE3		ecx
+#define reg_PCLMULQDQ		ecx
+#define reg_DTES64		ecx
+#define reg_MONITOR		ecx
+#define reg_DS_CPL		ecx
+#define reg_VMX			ecx
+#define reg_SMX			ecx
+#define reg_EST			ecx
+#define reg_TM2			ecx
+#define reg_SSSE3		ecx
+#define reg_CNXT_ID		ecx
+#define reg_SDBG		ecx
+#define reg_FMA			ecx
+#define reg_CMPXCHG16B		ecx
+#define reg_XTPRUPDCTRL		ecx
+#define reg_PDCM		ecx
+#define reg_PCID		ecx
+#define reg_DCA			ecx
+#define reg_SSE4_1		ecx
+#define reg_SSE4_2		ecx
+#define reg_X2APIC		ecx
+#define reg_MOVBE		ecx
+#define reg_POPCNT		ecx
+#define reg_TSC_DEADLINE	ecx
+#define reg_AES			ecx
+#define reg_XSAVE		ecx
+#define reg_OSXSAVE		ecx
+#define reg_AVX			ecx
+#define reg_F16C		ecx
+#define reg_RDRAND		ecx
+
+/* EDX.  */
+#define reg_FPU			edx
+#define reg_VME			edx
+#define reg_DE			edx
+#define reg_PSE			edx
+#define reg_TSC			edx
+#define reg_MSR			edx
+#define reg_PAE			edx
+#define reg_MCE			edx
+#define reg_CX8			edx
+#define reg_APIC		edx
+#define reg_SEP			edx
+#define reg_MTRR		edx
+#define reg_PGE			edx
+#define reg_MCA			edx
+#define reg_CMOV		edx
+#define reg_PAT			edx
+#define reg_PSE_36		edx
+#define reg_PSN			edx
+#define reg_CLFSH		edx
+#define reg_DS			edx
+#define reg_ACPI		edx
+#define reg_MMX			edx
+#define reg_FXSR		edx
+#define reg_SSE			edx
+#define reg_SSE2		edx
+#define reg_SS			edx
+#define reg_HTT			edx
+#define reg_TM			edx
+#define reg_PBE			edx
+
+/* COMMON_CPUID_INDEX_7.  */
+
+/* EBX.  */
+#define reg_FSGSBASE		ebx
+#define reg_TSC_ADJUST		ebx
+#define reg_SGX			ebx
+#define reg_BMI1		ebx
+#define reg_HLE			ebx
+#define reg_BMI2		ebx
+#define reg_AVX2		ebx
+#define reg_SMEP		ebx
+#define reg_ERMS		ebx
+#define reg_INVPCID		ebx
+#define reg_RTM			ebx
+#define reg_PQM			ebx
+#define reg_MPX			ebx
+#define reg_PQE			ebx
+#define reg_AVX512F		ebx
+#define reg_AVX512DQ		ebx
+#define reg_RDSEED		ebx
+#define reg_ADX			ebx
+#define reg_SMAP		ebx
+#define reg_AVX512_IFMA		ebx
+#define reg_CLFLUSHOPT		ebx
+#define reg_CLWB		ebx
+#define reg_TRACE		ebx
+#define reg_AVX512PF		ebx
+#define reg_AVX512ER		ebx
+#define reg_AVX512CD		ebx
+#define reg_SHA			ebx
+#define reg_AVX512BW		ebx
+#define reg_AVX512VL		ebx
+
+/* ECX.  */
+#define reg_PREFETCHWT1		ecx
+#define reg_AVX512_VBMI		ecx
+#define reg_UMIP		ecx
+#define reg_PKU			ecx
+#define reg_OSPKE		ecx
+#define reg_WAITPKG		ecx
+#define reg_AVX512_VBMI2	ecx
+#define reg_SHSTK		ecx
+#define reg_GFNI		ecx
+#define reg_VAES		ecx
+#define reg_VPCLMULQDQ		ecx
+#define reg_AVX512_VNNI		ecx
+#define reg_AVX512_BITALG	ecx
+#define reg_AVX512_VPOPCNTDQ	ecx
+#define reg_RDPID		ecx
+#define reg_CLDEMOTE		ecx
+#define reg_MOVDIRI		ecx
+#define reg_MOVDIR64B		ecx
+#define reg_SGX_LC		ecx
+
+/* EDX.  */
+#define reg_AVX512_4VNNIW	edx
+#define reg_AVX512_4FMAPS	edx
+#define reg_FSRM		edx
+#define reg_PCONFIG		edx
+#define reg_IBT			edx
+#define reg_IBRS_IBPB		edx
+#define reg_STIBP		edx
+#define reg_CAPABILITIES	edx
+#define reg_SSBD		edx
+
+/* COMMON_CPUID_INDEX_80000001.  */
+
+/* ECX.  */
+#define reg_LAHF64_SAHF64	ecx
+#define reg_SVM			ecx
+#define reg_LZCNT		ecx
+#define reg_SSE4A		ecx
+#define reg_PREFETCHW		ecx
+#define reg_XOP			ecx
+#define reg_LWP			ecx
+#define reg_FMA4		ecx
+#define reg_TBM			ecx
+
+/* EDX.  */
+#define reg_SYSCALL_SYSRET	edx
+#define reg_NX			edx
+#define reg_PAGE1GB		edx
+#define reg_RDTSCP		edx
+#define reg_LM			edx
+
+/* COMMON_CPUID_INDEX_D_ECX_1.  */
+
+/* EAX.  */
+#define reg_XSAVEOPT		eax
+#define reg_XSAVEC		eax
+#define reg_XGETBV_ECX_1	eax
+#define reg_XSAVES		eax
+
+/* COMMON_CPUID_INDEX_80000007.  */
+
+/* EDX.  */
+#define reg_INVARIANT_TSC	edx
+
+/* COMMON_CPUID_INDEX_80000008.  */
+
+/* EBX.  */
+#define reg_WBNOINVD		ebx
+
+/* FEATURE_INDEX_2.  */
+#define bit_arch_I586				(1u << 0)
+#define bit_arch_I686				(1u << 1)
+#define bit_arch_Fast_Rep_String		(1u << 2)
+#define bit_arch_Fast_Copy_Backward		(1u << 3)
+#define bit_arch_Fast_Unaligned_Load		(1u << 4)
+#define bit_arch_Fast_Unaligned_Copy		(1u << 5)
+#define bit_arch_Slow_BSF			(1u << 6)
+#define bit_arch_Slow_SSE4_2			(1u << 7)
+#define bit_arch_AVX_Fast_Unaligned_Load	(1u << 8)
+#define bit_arch_Prefer_MAP_32BIT_EXEC		(1u << 9)
+#define bit_arch_Prefer_PMINUB_for_stringop	(1u << 10)
+#define bit_arch_Prefer_No_VZEROUPPER		(1u << 11)
+#define bit_arch_Prefer_ERMS			(1u << 12)
+#define bit_arch_Prefer_FSRM			(1u << 13)
+#define bit_arch_Prefer_No_AVX512		(1u << 14)
+#define bit_arch_MathVec_Prefer_No_AVX512	(1u << 15)
+
+#define index_arch_Fast_Rep_String		FEATURE_INDEX_2
+#define index_arch_Fast_Copy_Backward		FEATURE_INDEX_2
+#define index_arch_Slow_BSF			FEATURE_INDEX_2
+#define index_arch_Fast_Unaligned_Load		FEATURE_INDEX_2
+#define index_arch_Prefer_PMINUB_for_stringop 	FEATURE_INDEX_2
+#define index_arch_Fast_Unaligned_Copy		FEATURE_INDEX_2
+#define index_arch_I586				FEATURE_INDEX_2
+#define index_arch_I686				FEATURE_INDEX_2
+#define index_arch_Slow_SSE4_2			FEATURE_INDEX_2
+#define index_arch_AVX_Fast_Unaligned_Load	FEATURE_INDEX_2
+#define index_arch_Prefer_MAP_32BIT_EXEC	FEATURE_INDEX_2
+#define index_arch_Prefer_No_VZEROUPPER		FEATURE_INDEX_2
+#define index_arch_Prefer_ERMS			FEATURE_INDEX_2
+#define index_arch_Prefer_No_AVX512		FEATURE_INDEX_2
+#define index_arch_MathVec_Prefer_No_AVX512	FEATURE_INDEX_2
+#define index_arch_Prefer_FSRM			FEATURE_INDEX_2
+
+/* XCR0 Feature flags.  */
+#define bit_XMM_state		(1u << 1)
+#define bit_YMM_state		(1u << 2)
+#define bit_Opmask_state	(1u << 5)
+#define bit_ZMM0_15_state	(1u << 6)
+#define bit_ZMM16_31_state	(1u << 7)
+
+# if defined (_LIBC) && !IS_IN (nonlib)
+/* Unused for x86.  */
+#  define INIT_ARCH()
+#  define __get_cpu_features()	(&GLRO(dl_x86_cpu_features))
+#  define x86_get_cpuid_registers(i) \
+       (&(GLRO(dl_x86_cpu_features).cpuid[i]))
+# endif
 
 #ifdef __x86_64__
 # define HAS_CPUID 1
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
index b2fac197dac7708e..64a7fd6157242bdd 100644
--- a/sysdeps/x86/tst-get-cpu-features.c
+++ b/sysdeps/x86/tst-get-cpu-features.c
@@ -17,15 +17,271 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <stdlib.h>
+#include <stdio.h>
 #include <cpu-features.h>
+#include <support/check.h>
+
+#define CHECK_CPU_FEATURE(name)		\
+  {					\
+    if (HAS_CPU_FEATURE (name))		\
+      printf ("  " #name "\n");		\
+  }
+
+#define CHECK_CPU_FEATURE_USABLE(name)	\
+  {					\
+    if (CPU_FEATURE_USABLE(name))	\
+      printf ("  " #name "\n");		\
+  }
+
+static const char * const cpu_kinds[] =
+{
+  "Unknown",
+  "Intel",
+  "AMD",
+  "Other",
+};
 
 static int
 do_test (void)
 {
-  if (__get_cpu_features ()->kind == arch_kind_unknown)
-    abort ();
+  const struct cpu_features *cpu_features = __get_cpu_features ();
+
+  switch (cpu_features->basic.kind)
+    {
+    case arch_kind_intel:
+    case arch_kind_amd:
+    case arch_kind_other:
+      printf ("Vendor: %s\n", cpu_kinds[cpu_features->basic.kind]);
+      printf ("Family: 0x%x\n", cpu_features->basic.family);
+      printf ("Model: 0x%x\n", cpu_features->basic.model);
+      printf ("Stepping: 0x%x\n", cpu_features->basic.stepping);
+      break;
+
+    default:
+      abort ();
+    }
+
+#ifdef __SSE2__
+  TEST_VERIFY_EXIT (HAS_CPU_FEATURE (SSE2));
+#endif
+
+  printf ("CPU features:\n");
+  CHECK_CPU_FEATURE (SSE3);
+  CHECK_CPU_FEATURE (PCLMULQDQ);
+  CHECK_CPU_FEATURE (DTES64);
+  CHECK_CPU_FEATURE (MONITOR);
+  CHECK_CPU_FEATURE (DS_CPL);
+  CHECK_CPU_FEATURE (VMX);
+  CHECK_CPU_FEATURE (SMX);
+  CHECK_CPU_FEATURE (EST);
+  CHECK_CPU_FEATURE (TM2);
+  CHECK_CPU_FEATURE (SSSE3);
+  CHECK_CPU_FEATURE (CNXT_ID);
+  CHECK_CPU_FEATURE (SDBG);
+  CHECK_CPU_FEATURE (FMA);
+  CHECK_CPU_FEATURE (CMPXCHG16B);
+  CHECK_CPU_FEATURE (XTPRUPDCTRL);
+  CHECK_CPU_FEATURE (PDCM);
+  CHECK_CPU_FEATURE (PCID);
+  CHECK_CPU_FEATURE (DCA);
+  CHECK_CPU_FEATURE (SSE4_1);
+  CHECK_CPU_FEATURE (SSE4_2);
+  CHECK_CPU_FEATURE (X2APIC);
+  CHECK_CPU_FEATURE (MOVBE);
+  CHECK_CPU_FEATURE (POPCNT);
+  CHECK_CPU_FEATURE (TSC_DEADLINE);
+  CHECK_CPU_FEATURE (AES);
+  CHECK_CPU_FEATURE (XSAVE);
+  CHECK_CPU_FEATURE (OSXSAVE);
+  CHECK_CPU_FEATURE (AVX);
+  CHECK_CPU_FEATURE (F16C);
+  CHECK_CPU_FEATURE (RDRAND);
+  CHECK_CPU_FEATURE (FPU);
+  CHECK_CPU_FEATURE (VME);
+  CHECK_CPU_FEATURE (DE);
+  CHECK_CPU_FEATURE (PSE);
+  CHECK_CPU_FEATURE (TSC);
+  CHECK_CPU_FEATURE (MSR);
+  CHECK_CPU_FEATURE (PAE);
+  CHECK_CPU_FEATURE (MCE);
+  CHECK_CPU_FEATURE (CX8);
+  CHECK_CPU_FEATURE (APIC);
+  CHECK_CPU_FEATURE (SEP);
+  CHECK_CPU_FEATURE (MTRR);
+  CHECK_CPU_FEATURE (PGE);
+  CHECK_CPU_FEATURE (MCA);
+  CHECK_CPU_FEATURE (CMOV);
+  CHECK_CPU_FEATURE (PAT);
+  CHECK_CPU_FEATURE (PSE_36);
+  CHECK_CPU_FEATURE (PSN);
+  CHECK_CPU_FEATURE (CLFSH);
+  CHECK_CPU_FEATURE (DS);
+  CHECK_CPU_FEATURE (ACPI);
+  CHECK_CPU_FEATURE (MMX);
+  CHECK_CPU_FEATURE (FXSR);
+  CHECK_CPU_FEATURE (SSE);
+  CHECK_CPU_FEATURE (SSE2);
+  CHECK_CPU_FEATURE (SS);
+  CHECK_CPU_FEATURE (HTT);
+  CHECK_CPU_FEATURE (TM);
+  CHECK_CPU_FEATURE (PBE);
+  CHECK_CPU_FEATURE (FSGSBASE);
+  CHECK_CPU_FEATURE (TSC_ADJUST);
+  CHECK_CPU_FEATURE (SGX);
+  CHECK_CPU_FEATURE (BMI1);
+  CHECK_CPU_FEATURE (HLE);
+  CHECK_CPU_FEATURE (AVX2);
+  CHECK_CPU_FEATURE (SMEP);
+  CHECK_CPU_FEATURE (BMI2);
+  CHECK_CPU_FEATURE (ERMS);
+  CHECK_CPU_FEATURE (INVPCID);
+  CHECK_CPU_FEATURE (RTM);
+  CHECK_CPU_FEATURE (PQM);
+  CHECK_CPU_FEATURE (MPX);
+  CHECK_CPU_FEATURE (PQE);
+  CHECK_CPU_FEATURE (AVX512F);
+  CHECK_CPU_FEATURE (AVX512DQ);
+  CHECK_CPU_FEATURE (RDSEED);
+  CHECK_CPU_FEATURE (ADX);
+  CHECK_CPU_FEATURE (SMAP);
+  CHECK_CPU_FEATURE (AVX512_IFMA);
+  CHECK_CPU_FEATURE (CLFLUSHOPT);
+  CHECK_CPU_FEATURE (CLWB);
+  CHECK_CPU_FEATURE (TRACE);
+  CHECK_CPU_FEATURE (AVX512PF);
+  CHECK_CPU_FEATURE (AVX512ER);
+  CHECK_CPU_FEATURE (AVX512CD);
+  CHECK_CPU_FEATURE (SHA);
+  CHECK_CPU_FEATURE (AVX512BW);
+  CHECK_CPU_FEATURE (AVX512VL);
+  CHECK_CPU_FEATURE (PREFETCHWT1);
+  CHECK_CPU_FEATURE (AVX512_VBMI);
+  CHECK_CPU_FEATURE (UMIP);
+  CHECK_CPU_FEATURE (PKU);
+  CHECK_CPU_FEATURE (OSPKE);
+  CHECK_CPU_FEATURE (WAITPKG);
+  CHECK_CPU_FEATURE (AVX512_VBMI2);
+  CHECK_CPU_FEATURE (SHSTK);
+  CHECK_CPU_FEATURE (GFNI);
+  CHECK_CPU_FEATURE (VAES);
+  CHECK_CPU_FEATURE (VPCLMULQDQ);
+  CHECK_CPU_FEATURE (AVX512_VNNI);
+  CHECK_CPU_FEATURE (AVX512_BITALG);
+  CHECK_CPU_FEATURE (AVX512_VPOPCNTDQ);
+  CHECK_CPU_FEATURE (RDPID);
+  CHECK_CPU_FEATURE (CLDEMOTE);
+  CHECK_CPU_FEATURE (MOVDIRI);
+  CHECK_CPU_FEATURE (MOVDIR64B);
+  CHECK_CPU_FEATURE (SGX_LC);
+  CHECK_CPU_FEATURE (AVX512_4VNNIW);
+  CHECK_CPU_FEATURE (AVX512_4FMAPS);
+  CHECK_CPU_FEATURE (FSRM);
+  CHECK_CPU_FEATURE (PCONFIG);
+  CHECK_CPU_FEATURE (IBT);
+  CHECK_CPU_FEATURE (IBRS_IBPB);
+  CHECK_CPU_FEATURE (STIBP);
+  CHECK_CPU_FEATURE (CAPABILITIES);
+  CHECK_CPU_FEATURE (SSBD);
+  CHECK_CPU_FEATURE (LAHF64_SAHF64);
+  CHECK_CPU_FEATURE (SVM);
+  CHECK_CPU_FEATURE (LZCNT);
+  CHECK_CPU_FEATURE (SSE4A);
+  CHECK_CPU_FEATURE (PREFETCHW);
+  CHECK_CPU_FEATURE (XOP);
+  CHECK_CPU_FEATURE (LWP);
+  CHECK_CPU_FEATURE (FMA4);
+  CHECK_CPU_FEATURE (TBM);
+  CHECK_CPU_FEATURE (SYSCALL_SYSRET);
+  CHECK_CPU_FEATURE (NX);
+  CHECK_CPU_FEATURE (PAGE1GB);
+  CHECK_CPU_FEATURE (RDTSCP);
+  CHECK_CPU_FEATURE (LM);
+  CHECK_CPU_FEATURE (XSAVEOPT);
+  CHECK_CPU_FEATURE (XSAVEC);
+  CHECK_CPU_FEATURE (XGETBV_ECX_1);
+  CHECK_CPU_FEATURE (XSAVES);
+  CHECK_CPU_FEATURE (INVARIANT_TSC);
+  CHECK_CPU_FEATURE (WBNOINVD);
+
+  printf ("Usable CPU features:\n");
+  CHECK_CPU_FEATURE_USABLE (SSE3);
+  CHECK_CPU_FEATURE_USABLE (PCLMULQDQ);
+  CHECK_CPU_FEATURE_USABLE (SSSE3);
+  CHECK_CPU_FEATURE_USABLE (FMA);
+  CHECK_CPU_FEATURE_USABLE (CMPXCHG16B);
+  CHECK_CPU_FEATURE_USABLE (SSE4_1);
+  CHECK_CPU_FEATURE_USABLE (SSE4_2);
+  CHECK_CPU_FEATURE_USABLE (MOVBE);
+  CHECK_CPU_FEATURE_USABLE (POPCNT);
+  CHECK_CPU_FEATURE_USABLE (AES);
+  CHECK_CPU_FEATURE_USABLE (XSAVE);
+  CHECK_CPU_FEATURE_USABLE (OSXSAVE);
+  CHECK_CPU_FEATURE_USABLE (AVX);
+  CHECK_CPU_FEATURE_USABLE (F16C);
+  CHECK_CPU_FEATURE_USABLE (RDRAND);
+  CHECK_CPU_FEATURE_USABLE (FPU);
+  CHECK_CPU_FEATURE_USABLE (TSC);
+  CHECK_CPU_FEATURE_USABLE (MSR);
+  CHECK_CPU_FEATURE_USABLE (CX8);
+  CHECK_CPU_FEATURE_USABLE (SEP);
+  CHECK_CPU_FEATURE_USABLE (CMOV);
+  CHECK_CPU_FEATURE_USABLE (CLFSH);
+  CHECK_CPU_FEATURE_USABLE (MMX);
+  CHECK_CPU_FEATURE_USABLE (FXSR);
+  CHECK_CPU_FEATURE_USABLE (SSE);
+  CHECK_CPU_FEATURE_USABLE (SSE2);
+  CHECK_CPU_FEATURE_USABLE (FSGSBASE);
+  CHECK_CPU_FEATURE_USABLE (BMI1);
+  CHECK_CPU_FEATURE_USABLE (HLE);
+  CHECK_CPU_FEATURE_USABLE (AVX2);
+  CHECK_CPU_FEATURE_USABLE (BMI2);
+  CHECK_CPU_FEATURE_USABLE (ERMS);
+  CHECK_CPU_FEATURE_USABLE (AVX512F);
+  CHECK_CPU_FEATURE_USABLE (AVX512DQ);
+  CHECK_CPU_FEATURE_USABLE (RDSEED);
+  CHECK_CPU_FEATURE_USABLE (ADX);
+  CHECK_CPU_FEATURE_USABLE (AVX512_IFMA);
+  CHECK_CPU_FEATURE_USABLE (CLFLUSHOPT);
+  CHECK_CPU_FEATURE_USABLE (CLWB);
+  CHECK_CPU_FEATURE_USABLE (AVX512PF);
+  CHECK_CPU_FEATURE_USABLE (AVX512ER);
+  CHECK_CPU_FEATURE_USABLE (AVX512CD);
+  CHECK_CPU_FEATURE_USABLE (SHA);
+  CHECK_CPU_FEATURE_USABLE (AVX512BW);
+  CHECK_CPU_FEATURE_USABLE (AVX512VL);
+  CHECK_CPU_FEATURE_USABLE (PREFETCHWT1);
+  CHECK_CPU_FEATURE_USABLE (AVX512_VBMI);
+  CHECK_CPU_FEATURE_USABLE (AVX512_VBMI2);
+  CHECK_CPU_FEATURE_USABLE (GFNI);
+  CHECK_CPU_FEATURE_USABLE (VAES);
+  CHECK_CPU_FEATURE_USABLE (VPCLMULQDQ);
+  CHECK_CPU_FEATURE_USABLE (AVX512_VNNI);
+  CHECK_CPU_FEATURE_USABLE (AVX512_BITALG);
+  CHECK_CPU_FEATURE_USABLE (AVX512_VPOPCNTDQ);
+  CHECK_CPU_FEATURE_USABLE (RDPID);
+  CHECK_CPU_FEATURE_USABLE (CLDEMOTE);
+  CHECK_CPU_FEATURE_USABLE (MOVDIRI);
+  CHECK_CPU_FEATURE_USABLE (MOVDIR64B);
+  CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
+  CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
+  CHECK_CPU_FEATURE_USABLE (FSRM);
+  CHECK_CPU_FEATURE_USABLE (LAHF64_SAHF64);
+  CHECK_CPU_FEATURE_USABLE (LZCNT);
+  CHECK_CPU_FEATURE_USABLE (SSE4A);
+  CHECK_CPU_FEATURE_USABLE (PREFETCHW);
+  CHECK_CPU_FEATURE_USABLE (XOP);
+  CHECK_CPU_FEATURE_USABLE (FMA4);
+  CHECK_CPU_FEATURE_USABLE (TBM);
+  CHECK_CPU_FEATURE_USABLE (SYSCALL_SYSRET);
+  CHECK_CPU_FEATURE_USABLE (RDTSCP);
+  CHECK_CPU_FEATURE_USABLE (XSAVEOPT);
+  CHECK_CPU_FEATURE_USABLE (XSAVEC);
+  CHECK_CPU_FEATURE_USABLE (XGETBV_ECX_1);
+  CHECK_CPU_FEATURE_USABLE (XSAVES);
+  CHECK_CPU_FEATURE_USABLE (INVARIANT_TSC);
+  CHECK_CPU_FEATURE_USABLE (WBNOINVD);
+
   return 0;
 }
 
-#define TEST_FUNCTION do_test ()
-#include "../../test-skeleton.c"
+#include <support/test-driver.c>
diff --git a/sysdeps/x86_64/multiarch/sched_cpucount.c b/sysdeps/x86_64/multiarch/sched_cpucount.c
index d10d74ae21e05d47..7949119dcdb5a94b 100644
--- a/sysdeps/x86_64/multiarch/sched_cpucount.c
+++ b/sysdeps/x86_64/multiarch/sched_cpucount.c
@@ -33,4 +33,4 @@
 #undef __sched_cpucount
 
 libc_ifunc (__sched_cpucount,
-	    HAS_CPU_FEATURE (POPCOUNT) ? popcount_cpucount : generic_cpucount);
+	    HAS_CPU_FEATURE (POPCNT) ? popcount_cpucount : generic_cpucount);
diff --git a/sysdeps/x86_64/multiarch/test-multiarch.c b/sysdeps/x86_64/multiarch/test-multiarch.c
index aa872f27dbe7ea2f..417147c3d5f325a5 100644
--- a/sysdeps/x86_64/multiarch/test-multiarch.c
+++ b/sysdeps/x86_64/multiarch/test-multiarch.c
@@ -85,8 +85,8 @@ do_test (int argc, char **argv)
 		       , "HAS_CPU_FEATURE (SSE4_1)");
   fails += check_proc ("ssse3", HAS_CPU_FEATURE (SSSE3),
 		       "HAS_CPU_FEATURE (SSSE3)");
-  fails += check_proc ("popcnt", HAS_CPU_FEATURE (POPCOUNT),
-		       "HAS_CPU_FEATURE (POPCOUNT)");
+  fails += check_proc ("popcnt", HAS_CPU_FEATURE (POPCNT),
+		       "HAS_CPU_FEATURE (POPCNT)");
 
   printf ("%d differences between /proc/cpuinfo and glibc code.\n", fails);