b1dca6
commit 4fdd4d41a17dda26c854ed935658154a17d4b906
b1dca6
Author: H.J. Lu <hjl.tools@gmail.com>
b1dca6
Date:   Thu Jun 25 15:12:57 2020 -0700
b1dca6
b1dca6
    x86: Detect Intel Advanced Matrix Extensions
b1dca6
    
b1dca6
    Intel Advanced Matrix Extensions (Intel AMX) is a new programming
b1dca6
    paradigm consisting of two components: a set of 2-dimensional registers
b1dca6
    (tiles) representing sub-arrays from a larger 2-dimensional memory image,
b1dca6
    and accelerators able to operate on tiles.  Intel AMX is an extensible
b1dca6
    architecture.  New accelerators can be added and the existing accelerator
b1dca6
    may be enhanced to provide higher performance.  The initial features are
b1dca6
    AMX-BF16, AMX-TILE and AMX-INT8, which are usable only if the operating
b1dca6
    system supports both XTILECFG state and XTILEDATA state.
b1dca6
    
b1dca6
    Add AMX-BF16, AMX-TILE and AMX-INT8 support to HAS_CPU_FEATURE and
b1dca6
    CPU_FEATURE_USABLE.
b1dca6
b1dca6
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
b1dca6
index 7b2a5bc3ed27ec39..21565474839efffc 100644
b1dca6
--- a/sysdeps/x86/cpu-features.c
b1dca6
+++ b/sysdeps/x86/cpu-features.c
b1dca6
@@ -239,6 +239,24 @@ get_common_indices (struct cpu_features *cpu_features,
b1dca6
 	    }
b1dca6
 	}
b1dca6
 
b1dca6
+      /* Are XTILECFG and XTILEDATA states usable?  */
b1dca6
+      if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
b1dca6
+	  == (bit_XTILECFG_state | bit_XTILEDATA_state))
b1dca6
+	{
b1dca6
+	  /* Determine if AMX_BF16 is usable.  */
b1dca6
+	  if (CPU_FEATURES_CPU_P (cpu_features, AMX_BF16))
b1dca6
+	    cpu_features->usable[index_arch_AMX_BF16_Usable]
b1dca6
+	      |= bit_arch_AMX_BF16_Usable;
b1dca6
+	  /* Determine if AMX_TILE is usable.  */
b1dca6
+	  if (CPU_FEATURES_CPU_P (cpu_features, AMX_TILE))
b1dca6
+	    cpu_features->usable[index_arch_AMX_TILE_Usable]
b1dca6
+	      |= bit_arch_AMX_TILE_Usable;
b1dca6
+	  /* Determine if AMX_INT8 is usable.  */
b1dca6
+	  if (CPU_FEATURES_CPU_P (cpu_features, AMX_INT8))
b1dca6
+	    cpu_features->usable[index_arch_AMX_INT8_Usable]
b1dca6
+	      |= bit_arch_AMX_INT8_Usable;
b1dca6
+	}
b1dca6
+
b1dca6
       /* For _dl_runtime_resolve, set xsave_state_size to xsave area
b1dca6
 	 size + integer register save size and align it to 64 bytes.  */
b1dca6
       if (cpu_features->basic.max_cpuid >= 0xd)
b1dca6
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
b1dca6
index 41c3855e94d16b49..7c46242aad69d427 100644
b1dca6
--- a/sysdeps/x86/cpu-features.h
b1dca6
+++ b/sysdeps/x86/cpu-features.h
b1dca6
@@ -156,6 +156,9 @@ extern const struct cpu_features *__get_cpu_features (void)
b1dca6
 #define bit_arch_AVX512_VP2INTERSECT_Usable	(1u << 24)
b1dca6
 #define bit_arch_AVX512_BF16_Usable		(1u << 25)
b1dca6
 #define bit_arch_PKU_Usable			(1u << 26)
b1dca6
+#define bit_arch_AMX_BF16_Usable		(1u << 27)
b1dca6
+#define bit_arch_AMX_TILE_Usable		(1u << 28)
b1dca6
+#define bit_arch_AMX_INT8_Usable		(1u << 29)
b1dca6
 
b1dca6
 #define index_arch_AVX_Usable			USABLE_FEATURE_INDEX_1
b1dca6
 #define index_arch_AVX2_Usable			USABLE_FEATURE_INDEX_1
b1dca6
@@ -184,6 +187,9 @@ extern const struct cpu_features *__get_cpu_features (void)
b1dca6
 #define index_arch_AVX512_VP2INTERSECT_Usable	USABLE_FEATURE_INDEX_1
b1dca6
 #define index_arch_AVX512_BF16_Usable		USABLE_FEATURE_INDEX_1
b1dca6
 #define index_arch_PKU_Usable			USABLE_FEATURE_INDEX_1
b1dca6
+#define index_arch_AMX_BF16_Usable		USABLE_FEATURE_INDEX_1
b1dca6
+#define index_arch_AMX_TILE_Usable		USABLE_FEATURE_INDEX_1
b1dca6
+#define index_arch_AMX_INT8_Usable		USABLE_FEATURE_INDEX_1
b1dca6
 
b1dca6
 #define feature_AVX_Usable			usable
b1dca6
 #define feature_AVX2_Usable			usable
b1dca6
@@ -212,6 +218,9 @@ extern const struct cpu_features *__get_cpu_features (void)
b1dca6
 #define feature_AVX512_VP2INTERSECT_Usable	usable
b1dca6
 #define feature_AVX512_BF16_Usable		usable
b1dca6
 #define feature_PKU_Usable			usable
b1dca6
+#define feature_AMX_BF16_Usable			usable
b1dca6
+#define feature_AMX_TILE_Usable			usable
b1dca6
+#define feature_AMX_INT8_Usable			usable
b1dca6
 
b1dca6
 /* CPU features.  */
b1dca6
 
b1dca6
@@ -347,6 +356,9 @@ extern const struct cpu_features *__get_cpu_features (void)
b1dca6
 #define bit_cpu_TSXLDTRK	(1u << 16)
b1dca6
 #define bit_cpu_PCONFIG		(1u << 18)
b1dca6
 #define bit_cpu_IBT		(1u << 20)
b1dca6
+#define bit_cpu_AMX_BF16	(1u << 22)
b1dca6
+#define bit_cpu_AMX_TILE	(1u << 24)
b1dca6
+#define bit_cpu_AMX_INT8	(1u << 25)
b1dca6
 #define bit_cpu_IBRS_IBPB	(1u << 26)
b1dca6
 #define bit_cpu_STIBP		(1u << 27)
b1dca6
 #define bit_cpu_L1D_FLUSH	(1u << 28)
b1dca6
@@ -529,6 +541,9 @@ extern const struct cpu_features *__get_cpu_features (void)
b1dca6
 #define index_cpu_TSXLDTRK	COMMON_CPUID_INDEX_7
b1dca6
 #define index_cpu_PCONFIG	COMMON_CPUID_INDEX_7
b1dca6
 #define index_cpu_IBT		COMMON_CPUID_INDEX_7
b1dca6
+#define index_cpu_AMX_BF16	COMMON_CPUID_INDEX_7
b1dca6
+#define index_cpu_AMX_TILE	COMMON_CPUID_INDEX_7
b1dca6
+#define index_cpu_AMX_INT8	COMMON_CPUID_INDEX_7
b1dca6
 #define index_cpu_IBRS_IBPB	COMMON_CPUID_INDEX_7
b1dca6
 #define index_cpu_STIBP		COMMON_CPUID_INDEX_7
b1dca6
 #define index_cpu_L1D_FLUSH	COMMON_CPUID_INDEX_7
b1dca6
@@ -711,6 +726,9 @@ extern const struct cpu_features *__get_cpu_features (void)
b1dca6
 #define reg_TSXLDTRK		edx
b1dca6
 #define reg_PCONFIG		edx
b1dca6
 #define reg_IBT			edx
b1dca6
+#define reg_AMX_BF16		edx
b1dca6
+#define reg_AMX_TILE		edx
b1dca6
+#define reg_AMX_INT8		edx
b1dca6
 #define reg_IBRS_IBPB		edx
b1dca6
 #define reg_STIBP		edx
b1dca6
 #define reg_L1D_FLUSH		edx
b1dca6
@@ -819,6 +837,8 @@ extern const struct cpu_features *__get_cpu_features (void)
b1dca6
 #define bit_Opmask_state	(1u << 5)
b1dca6
 #define bit_ZMM0_15_state	(1u << 6)
b1dca6
 #define bit_ZMM16_31_state	(1u << 7)
b1dca6
+#define bit_XTILECFG_state	(1u << 17)
b1dca6
+#define bit_XTILEDATA_state	(1u << 18)
b1dca6
 
b1dca6
 # if defined (_LIBC) && !IS_IN (nonlib)
b1dca6
 /* Unused for x86.  */
b1dca6
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
b1dca6
index 08688ace2a0ae35e..c4d91be3a48de886 100644
b1dca6
--- a/sysdeps/x86/tst-get-cpu-features.c
b1dca6
+++ b/sysdeps/x86/tst-get-cpu-features.c
b1dca6
@@ -185,6 +185,9 @@ do_test (void)
b1dca6
   CHECK_CPU_FEATURE (TSXLDTRK);
b1dca6
   CHECK_CPU_FEATURE (PCONFIG);
b1dca6
   CHECK_CPU_FEATURE (IBT);
b1dca6
+  CHECK_CPU_FEATURE (AMX_BF16);
b1dca6
+  CHECK_CPU_FEATURE (AMX_TILE);
b1dca6
+  CHECK_CPU_FEATURE (AMX_INT8);
b1dca6
   CHECK_CPU_FEATURE (IBRS_IBPB);
b1dca6
   CHECK_CPU_FEATURE (STIBP);
b1dca6
   CHECK_CPU_FEATURE (L1D_FLUSH);
b1dca6
@@ -237,6 +240,9 @@ do_test (void)
b1dca6
   CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
b1dca6
   CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
b1dca6
   CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
b1dca6
+  CHECK_CPU_FEATURE_USABLE (AMX_BF16);
b1dca6
+  CHECK_CPU_FEATURE_USABLE (AMX_TILE);
b1dca6
+  CHECK_CPU_FEATURE_USABLE (AMX_INT8);
b1dca6
   CHECK_CPU_FEATURE_USABLE (XOP);
b1dca6
   CHECK_CPU_FEATURE_USABLE (FMA4);
b1dca6
   CHECK_CPU_FEATURE_USABLE (XSAVEC);