446cf2
commit 8813b2682e4094e43b0cf1634e99619f1b8b2c62
446cf2
Author: Sajan Karumanchi <sajan.karumanchi@amd.com>
446cf2
Date:   Wed Oct 28 13:05:33 2020 +0530
446cf2
446cf2
    x86: Optimizing memcpy for AMD Zen architecture.
446cf2
446cf2
    Modifying the shareable cache '__x86_shared_cache_size', which is a
446cf2
    factor in computing the non-temporal threshold parameter
446cf2
    '__x86_shared_non_temporal_threshold' to optimize memcpy for AMD Zen
446cf2
    architectures.
446cf2
    In the existing implementation, the shareable cache is computed as 'L3
446cf2
    per thread, L2 per core'. Recomputing this shareable cache as 'L3 per
446cf2
    CCX(Core-Complex)' has brought in performance gains.
446cf2
    As per the large bench variant results, this patch also addresses the
446cf2
    regression problem on AMD Zen architectures.
446cf2
446cf2
    Backport of commit 59803e81f96b479c17f583b31eac44b57591a1bf upstream,
446cf2
    with the fix from cb3a749a22a55645dc6a52659eea765300623f98 ("x86:
446cf2
    Restore processing of cache size tunables in init_cacheinfo") applied.
446cf2
446cf2
    Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
446cf2
    Co-Authored-by: Florian Weimer <fweimer@redhat.com>
446cf2
446cf2
Backport is off the release/2.32/master branch upstream, to minimize
446cf2
conflicts.  Adjusted for missing "basic" member in struct cpu_features.
446cf2
446cf2
diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
446cf2
index 42b468d0c4885bad..57c36d030a76c8b2 100644
446cf2
--- a/sysdeps/x86/cacheinfo.c
446cf2
+++ b/sysdeps/x86/cacheinfo.c
446cf2
@@ -722,7 +722,7 @@ intel_bug_no_cache_info:
446cf2
 	      threads = 1 << ((ecx >> 12) & 0x0f);
446cf2
 	    }
446cf2
 
446cf2
-	  if (threads == 0)
446cf2
+	  if (threads == 0 || cpu_features->family >= 0x17)
446cf2
 	    {
446cf2
 	      /* If APIC ID width is not available, use logical
446cf2
 		 processor count.  */
446cf2
@@ -737,8 +737,22 @@ intel_bug_no_cache_info:
446cf2
 	  if (threads > 0)
446cf2
 	    shared /= threads;
446cf2
 
446cf2
-	  /* Account for exclusive L2 and L3 caches.  */
446cf2
-	  shared += core;
446cf2
+	  /* Get shared cache per ccx for Zen architectures.  */
446cf2
+	  if (cpu_features->family >= 0x17)
446cf2
+	    {
446cf2
+	      unsigned int eax;
446cf2
+
446cf2
+	      /* Get number of threads share the L3 cache in CCX.  */
446cf2
+	      __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
446cf2
+
446cf2
+	      unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
446cf2
+	      shared *= threads_per_ccx;
446cf2
+	    }
446cf2
+	  else
446cf2
+	    {
446cf2
+	      /* Account for exclusive L2 and L3 caches.  */
446cf2
+	      shared += core;
446cf2
+            }
446cf2
 	}
446cf2
 
446cf2
 #ifndef DISABLE_PREFETCHW