08c3a6
commit 6e008c884dad5a25f91085c68d044bb5e2d63761
08c3a6
Author: Noah Goldstein <goldstein.w.n@gmail.com>
08c3a6
Date:   Tue Jun 14 13:50:11 2022 -0700
08c3a6
08c3a6
    x86: Fix misordered logic for setting `rep_movsb_stop_threshold`
08c3a6
    
08c3a6
    Move the setting of `rep_movsb_stop_threshold` to after the tunables
08c3a6
    have been collected so that the `rep_movsb_stop_threshold` (which
08c3a6
    is used to redirect control flow to the non_temporal case) will
08c3a6
    use any user value for `non_temporal_threshold` (set using
08c3a6
    glibc.cpu.x86_non_temporal_threshold)
08c3a6
    
08c3a6
    (cherry picked from commit 035591551400cfc810b07244a015c9411e8bff7c)
08c3a6
08c3a6
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
08c3a6
index 2e43e67e4f4037d3..560bf260e8fbd7bf 100644
08c3a6
--- a/sysdeps/x86/dl-cacheinfo.h
08c3a6
+++ b/sysdeps/x86/dl-cacheinfo.h
08c3a6
@@ -898,18 +898,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
08c3a6
   if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
08c3a6
     rep_movsb_threshold = 2112;
08c3a6
 
08c3a6
-  unsigned long int rep_movsb_stop_threshold;
08c3a6
-  /* ERMS feature is implemented from AMD Zen3 architecture and it is
08c3a6
-     performing poorly for data above L2 cache size. Henceforth, adding
08c3a6
-     an upper bound threshold parameter to limit the usage of Enhanced
08c3a6
-     REP MOVSB operations and setting its value to L2 cache size.  */
08c3a6
-  if (cpu_features->basic.kind == arch_kind_amd)
08c3a6
-    rep_movsb_stop_threshold = core;
08c3a6
-  /* Setting the upper bound of ERMS to the computed value of
08c3a6
-     non-temporal threshold for architectures other than AMD.  */
08c3a6
-  else
08c3a6
-    rep_movsb_stop_threshold = non_temporal_threshold;
08c3a6
-
08c3a6
   /* The default threshold to use Enhanced REP STOSB.  */
08c3a6
   unsigned long int rep_stosb_threshold = 2048;
08c3a6
 
08c3a6
@@ -951,6 +939,18 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
08c3a6
 			   SIZE_MAX);
08c3a6
 #endif
08c3a6
 
08c3a6
+  unsigned long int rep_movsb_stop_threshold;
08c3a6
+  /* ERMS feature is implemented from AMD Zen3 architecture and it is
08c3a6
+     performing poorly for data above L2 cache size. Henceforth, adding
08c3a6
+     an upper bound threshold parameter to limit the usage of Enhanced
08c3a6
+     REP MOVSB operations and setting its value to L2 cache size.  */
08c3a6
+  if (cpu_features->basic.kind == arch_kind_amd)
08c3a6
+    rep_movsb_stop_threshold = core;
08c3a6
+  /* Setting the upper bound of ERMS to the computed value of
08c3a6
+     non-temporal threshold for architectures other than AMD.  */
08c3a6
+  else
08c3a6
+    rep_movsb_stop_threshold = non_temporal_threshold;
08c3a6
+
08c3a6
   cpu_features->data_cache_size = data;
08c3a6
   cpu_features->shared_cache_size = shared;
08c3a6
   cpu_features->non_temporal_threshold = non_temporal_threshold;