|
|
e354a5 |
commit 4fdd4d41a17dda26c854ed935658154a17d4b906
|
|
|
e354a5 |
Author: H.J. Lu <hjl.tools@gmail.com>
|
|
|
e354a5 |
Date: Thu Jun 25 15:12:57 2020 -0700
|
|
|
e354a5 |
|
|
|
e354a5 |
x86: Detect Intel Advanced Matrix Extensions
|
|
|
e354a5 |
|
|
|
e354a5 |
Intel Advanced Matrix Extensions (Intel AMX) is a new programming
|
|
|
e354a5 |
paradigm consisting of two components: a set of 2-dimensional registers
|
|
|
e354a5 |
(tiles) representing sub-arrays from a larger 2-dimensional memory image,
|
|
|
e354a5 |
and accelerators able to operate on tiles. Intel AMX is an extensible
|
|
|
e354a5 |
architecture. New accelerators can be added and the existing accelerator
|
|
|
e354a5 |
may be enhanced to provide higher performance. The initial features are
|
|
|
e354a5 |
AMX-BF16, AMX-TILE and AMX-INT8, which are usable only if the operating
|
|
|
e354a5 |
system supports both XTILECFG state and XTILEDATA state.
|
|
|
e354a5 |
|
|
|
e354a5 |
Add AMX-BF16, AMX-TILE and AMX-INT8 support to HAS_CPU_FEATURE and
|
|
|
e354a5 |
CPU_FEATURE_USABLE.
|
|
|
e354a5 |
|
|
|
e354a5 |
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
|
|
|
e354a5 |
index 7b2a5bc3ed27ec39..21565474839efffc 100644
|
|
|
e354a5 |
--- a/sysdeps/x86/cpu-features.c
|
|
|
e354a5 |
+++ b/sysdeps/x86/cpu-features.c
|
|
|
e354a5 |
@@ -239,6 +239,24 @@ get_common_indices (struct cpu_features *cpu_features,
|
|
|
e354a5 |
}
|
|
|
e354a5 |
}
|
|
|
e354a5 |
|
|
|
e354a5 |
+ /* Are XTILECFG and XTILEDATA states usable? */
|
|
|
e354a5 |
+ if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
|
|
|
e354a5 |
+ == (bit_XTILECFG_state | bit_XTILEDATA_state))
|
|
|
e354a5 |
+ {
|
|
|
e354a5 |
+ /* Determine if AMX_BF16 is usable. */
|
|
|
e354a5 |
+ if (CPU_FEATURES_CPU_P (cpu_features, AMX_BF16))
|
|
|
e354a5 |
+ cpu_features->usable[index_arch_AMX_BF16_Usable]
|
|
|
e354a5 |
+ |= bit_arch_AMX_BF16_Usable;
|
|
|
e354a5 |
+ /* Determine if AMX_TILE is usable. */
|
|
|
e354a5 |
+ if (CPU_FEATURES_CPU_P (cpu_features, AMX_TILE))
|
|
|
e354a5 |
+ cpu_features->usable[index_arch_AMX_TILE_Usable]
|
|
|
e354a5 |
+ |= bit_arch_AMX_TILE_Usable;
|
|
|
e354a5 |
+ /* Determine if AMX_INT8 is usable. */
|
|
|
e354a5 |
+ if (CPU_FEATURES_CPU_P (cpu_features, AMX_INT8))
|
|
|
e354a5 |
+ cpu_features->usable[index_arch_AMX_INT8_Usable]
|
|
|
e354a5 |
+ |= bit_arch_AMX_INT8_Usable;
|
|
|
e354a5 |
+ }
|
|
|
e354a5 |
+
|
|
|
e354a5 |
/* For _dl_runtime_resolve, set xsave_state_size to xsave area
|
|
|
e354a5 |
size + integer register save size and align it to 64 bytes. */
|
|
|
e354a5 |
if (cpu_features->basic.max_cpuid >= 0xd)
|
|
|
e354a5 |
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
|
|
|
e354a5 |
index 41c3855e94d16b49..7c46242aad69d427 100644
|
|
|
e354a5 |
--- a/sysdeps/x86/cpu-features.h
|
|
|
e354a5 |
+++ b/sysdeps/x86/cpu-features.h
|
|
|
e354a5 |
@@ -156,6 +156,9 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|
|
e354a5 |
#define bit_arch_AVX512_VP2INTERSECT_Usable (1u << 24)
|
|
|
e354a5 |
#define bit_arch_AVX512_BF16_Usable (1u << 25)
|
|
|
e354a5 |
#define bit_arch_PKU_Usable (1u << 26)
|
|
|
e354a5 |
+#define bit_arch_AMX_BF16_Usable (1u << 27)
|
|
|
e354a5 |
+#define bit_arch_AMX_TILE_Usable (1u << 28)
|
|
|
e354a5 |
+#define bit_arch_AMX_INT8_Usable (1u << 29)
|
|
|
e354a5 |
|
|
|
e354a5 |
#define index_arch_AVX_Usable USABLE_FEATURE_INDEX_1
|
|
|
e354a5 |
#define index_arch_AVX2_Usable USABLE_FEATURE_INDEX_1
|
|
|
e354a5 |
@@ -184,6 +187,9 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|
|
e354a5 |
#define index_arch_AVX512_VP2INTERSECT_Usable USABLE_FEATURE_INDEX_1
|
|
|
e354a5 |
#define index_arch_AVX512_BF16_Usable USABLE_FEATURE_INDEX_1
|
|
|
e354a5 |
#define index_arch_PKU_Usable USABLE_FEATURE_INDEX_1
|
|
|
e354a5 |
+#define index_arch_AMX_BF16_Usable USABLE_FEATURE_INDEX_1
|
|
|
e354a5 |
+#define index_arch_AMX_TILE_Usable USABLE_FEATURE_INDEX_1
|
|
|
e354a5 |
+#define index_arch_AMX_INT8_Usable USABLE_FEATURE_INDEX_1
|
|
|
e354a5 |
|
|
|
e354a5 |
#define feature_AVX_Usable usable
|
|
|
e354a5 |
#define feature_AVX2_Usable usable
|
|
|
e354a5 |
@@ -212,6 +218,9 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|
|
e354a5 |
#define feature_AVX512_VP2INTERSECT_Usable usable
|
|
|
e354a5 |
#define feature_AVX512_BF16_Usable usable
|
|
|
e354a5 |
#define feature_PKU_Usable usable
|
|
|
e354a5 |
+#define feature_AMX_BF16_Usable usable
|
|
|
e354a5 |
+#define feature_AMX_TILE_Usable usable
|
|
|
e354a5 |
+#define feature_AMX_INT8_Usable usable
|
|
|
e354a5 |
|
|
|
e354a5 |
/* CPU features. */
|
|
|
e354a5 |
|
|
|
e354a5 |
@@ -347,6 +356,9 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|
|
e354a5 |
#define bit_cpu_TSXLDTRK (1u << 16)
|
|
|
e354a5 |
#define bit_cpu_PCONFIG (1u << 18)
|
|
|
e354a5 |
#define bit_cpu_IBT (1u << 20)
|
|
|
e354a5 |
+#define bit_cpu_AMX_BF16 (1u << 22)
|
|
|
e354a5 |
+#define bit_cpu_AMX_TILE (1u << 24)
|
|
|
e354a5 |
+#define bit_cpu_AMX_INT8 (1u << 25)
|
|
|
e354a5 |
#define bit_cpu_IBRS_IBPB (1u << 26)
|
|
|
e354a5 |
#define bit_cpu_STIBP (1u << 27)
|
|
|
e354a5 |
#define bit_cpu_L1D_FLUSH (1u << 28)
|
|
|
e354a5 |
@@ -529,6 +541,9 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|
|
e354a5 |
#define index_cpu_TSXLDTRK COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
#define index_cpu_PCONFIG COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
#define index_cpu_IBT COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
+#define index_cpu_AMX_BF16 COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
+#define index_cpu_AMX_TILE COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
+#define index_cpu_AMX_INT8 COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
#define index_cpu_IBRS_IBPB COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
#define index_cpu_STIBP COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
#define index_cpu_L1D_FLUSH COMMON_CPUID_INDEX_7
|
|
|
e354a5 |
@@ -711,6 +726,9 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|
|
e354a5 |
#define reg_TSXLDTRK edx
|
|
|
e354a5 |
#define reg_PCONFIG edx
|
|
|
e354a5 |
#define reg_IBT edx
|
|
|
e354a5 |
+#define reg_AMX_BF16 edx
|
|
|
e354a5 |
+#define reg_AMX_TILE edx
|
|
|
e354a5 |
+#define reg_AMX_INT8 edx
|
|
|
e354a5 |
#define reg_IBRS_IBPB edx
|
|
|
e354a5 |
#define reg_STIBP edx
|
|
|
e354a5 |
#define reg_L1D_FLUSH edx
|
|
|
e354a5 |
@@ -819,6 +837,8 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|
|
e354a5 |
#define bit_Opmask_state (1u << 5)
|
|
|
e354a5 |
#define bit_ZMM0_15_state (1u << 6)
|
|
|
e354a5 |
#define bit_ZMM16_31_state (1u << 7)
|
|
|
e354a5 |
+#define bit_XTILECFG_state (1u << 17)
|
|
|
e354a5 |
+#define bit_XTILEDATA_state (1u << 18)
|
|
|
e354a5 |
|
|
|
e354a5 |
# if defined (_LIBC) && !IS_IN (nonlib)
|
|
|
e354a5 |
/* Unused for x86. */
|
|
|
e354a5 |
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
|
|
|
e354a5 |
index 08688ace2a0ae35e..c4d91be3a48de886 100644
|
|
|
e354a5 |
--- a/sysdeps/x86/tst-get-cpu-features.c
|
|
|
e354a5 |
+++ b/sysdeps/x86/tst-get-cpu-features.c
|
|
|
e354a5 |
@@ -185,6 +185,9 @@ do_test (void)
|
|
|
e354a5 |
CHECK_CPU_FEATURE (TSXLDTRK);
|
|
|
e354a5 |
CHECK_CPU_FEATURE (PCONFIG);
|
|
|
e354a5 |
CHECK_CPU_FEATURE (IBT);
|
|
|
e354a5 |
+ CHECK_CPU_FEATURE (AMX_BF16);
|
|
|
e354a5 |
+ CHECK_CPU_FEATURE (AMX_TILE);
|
|
|
e354a5 |
+ CHECK_CPU_FEATURE (AMX_INT8);
|
|
|
e354a5 |
CHECK_CPU_FEATURE (IBRS_IBPB);
|
|
|
e354a5 |
CHECK_CPU_FEATURE (STIBP);
|
|
|
e354a5 |
CHECK_CPU_FEATURE (L1D_FLUSH);
|
|
|
e354a5 |
@@ -237,6 +240,9 @@ do_test (void)
|
|
|
e354a5 |
CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
|
|
|
e354a5 |
CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
|
|
|
e354a5 |
CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
|
|
|
e354a5 |
+ CHECK_CPU_FEATURE_USABLE (AMX_BF16);
|
|
|
e354a5 |
+ CHECK_CPU_FEATURE_USABLE (AMX_TILE);
|
|
|
e354a5 |
+ CHECK_CPU_FEATURE_USABLE (AMX_INT8);
|
|
|
e354a5 |
CHECK_CPU_FEATURE_USABLE (XOP);
|
|
|
e354a5 |
CHECK_CPU_FEATURE_USABLE (FMA4);
|
|
|
e354a5 |
CHECK_CPU_FEATURE_USABLE (XSAVEC);
|