6ca6e8
commit ea69248445fb9b80da02ee0c7261cba4b1a5532e
6ca6e8
Author: Wangyang Guo <wangyang.guo@intel.com>
6ca6e8
Date:   Fri May 6 01:50:10 2022 +0000
6ca6e8
6ca6e8
    nptl: Add backoff mechanism to spinlock loop
6ca6e8
    
6ca6e8
    When mutiple threads waiting for lock at the same time, once lock owner
6ca6e8
    releases the lock, waiters will see lock available and all try to lock,
6ca6e8
    which may cause an expensive CAS storm.
6ca6e8
    
6ca6e8
    Binary exponential backoff with random jitter is introduced. As try-lock
6ca6e8
    attempt increases, there is more likely that a larger number threads
6ca6e8
    compete for adaptive mutex lock, so increase wait time in exponential.
6ca6e8
    A random jitter is also added to avoid synchronous try-lock from other
6ca6e8
    threads.
6ca6e8
    
6ca6e8
    v2: Remove read-check before try-lock for performance.
6ca6e8
    
6ca6e8
    v3:
6ca6e8
    1. Restore read-check since it works well in some platform.
6ca6e8
    2. Make backoff arch dependent, and enable it for x86_64.
6ca6e8
    3. Limit max backoff to reduce latency in large critical section.
6ca6e8
    
6ca6e8
    v4: Fix strict-prototypes error in sysdeps/nptl/pthread_mutex_backoff.h
6ca6e8
    
6ca6e8
    v5: Commit log updated for regression in large critical section.
6ca6e8
    
6ca6e8
    Result of pthread-mutex-locks bench
6ca6e8
    
6ca6e8
    Test Platform: Xeon 8280L (2 socket, 112 CPUs in total)
6ca6e8
    First Row: thread number
6ca6e8
    First Col: critical section length
6ca6e8
    Values: backoff vs upstream, time based, low is better
6ca6e8
    
6ca6e8
    non-critical-length: 1
6ca6e8
            1       2       4       8       16      32      64      112     140
6ca6e8
    0       0.99    0.58    0.52    0.49    0.43    0.44    0.46    0.52    0.54
6ca6e8
    1       0.98    0.43    0.56    0.50    0.44    0.45    0.50    0.56    0.57
6ca6e8
    2       0.99    0.41    0.57    0.51    0.45    0.47    0.48    0.60    0.61
6ca6e8
    4       0.99    0.45    0.59    0.53    0.48    0.49    0.52    0.64    0.65
6ca6e8
    8       1.00    0.66    0.71    0.63    0.56    0.59    0.66    0.72    0.71
6ca6e8
    16      0.97    0.78    0.91    0.73    0.67    0.70    0.79    0.80    0.80
6ca6e8
    32      0.95    1.17    0.98    0.87    0.82    0.86    0.89    0.90    0.90
6ca6e8
    64      0.96    0.95    1.01    1.01    0.98    1.00    1.03    0.99    0.99
6ca6e8
    128     0.99    1.01    1.01    1.17    1.08    1.12    1.02    0.97    1.02
6ca6e8
    
6ca6e8
    non-critical-length: 32
6ca6e8
            1       2       4       8       16      32      64      112     140
6ca6e8
    0       1.03    0.97    0.75    0.65    0.58    0.58    0.56    0.70    0.70
6ca6e8
    1       0.94    0.95    0.76    0.65    0.58    0.58    0.61    0.71    0.72
6ca6e8
    2       0.97    0.96    0.77    0.66    0.58    0.59    0.62    0.74    0.74
6ca6e8
    4       0.99    0.96    0.78    0.66    0.60    0.61    0.66    0.76    0.77
6ca6e8
    8       0.99    0.99    0.84    0.70    0.64    0.66    0.71    0.80    0.80
6ca6e8
    16      0.98    0.97    0.95    0.76    0.70    0.73    0.81    0.85    0.84
6ca6e8
    32      1.04    1.12    1.04    0.89    0.82    0.86    0.93    0.91    0.91
6ca6e8
    64      0.99    1.15    1.07    1.00    0.99    1.01    1.05    0.99    0.99
6ca6e8
    128     1.00    1.21    1.20    1.22    1.25    1.31    1.12    1.10    0.99
6ca6e8
    
6ca6e8
    non-critical-length: 128
6ca6e8
            1       2       4       8       16      32      64      112     140
6ca6e8
    0       1.02    1.00    0.99    0.67    0.61    0.61    0.61    0.74    0.73
6ca6e8
    1       0.95    0.99    1.00    0.68    0.61    0.60    0.60    0.74    0.74
6ca6e8
    2       1.00    1.04    1.00    0.68    0.59    0.61    0.65    0.76    0.76
6ca6e8
    4       1.00    0.96    0.98    0.70    0.63    0.63    0.67    0.78    0.77
6ca6e8
    8       1.01    1.02    0.89    0.73    0.65    0.67    0.71    0.81    0.80
6ca6e8
    16      0.99    0.96    0.96    0.79    0.71    0.73    0.80    0.84    0.84
6ca6e8
    32      0.99    0.95    1.05    0.89    0.84    0.85    0.94    0.92    0.91
6ca6e8
    64      1.00    0.99    1.16    1.04    1.00    1.02    1.06    0.99    0.99
6ca6e8
    128     1.00    1.06    0.98    1.14    1.39    1.26    1.08    1.02    0.98
6ca6e8
    
6ca6e8
    There is regression in large critical section. But adaptive mutex is
6ca6e8
    aimed for "quick" locks. Small critical section is more common when
6ca6e8
    users choose to use adaptive pthread_mutex.
6ca6e8
    
6ca6e8
    Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
6ca6e8
    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
6ca6e8
    (cherry picked from commit 8162147872491bb5b48e91543b19c49a29ae6b6d)
6ca6e8
6ca6e8
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
6ca6e8
index bbe754a272b97d91..8f3f687f2a151d16 100644
6ca6e8
--- a/nptl/pthread_mutex_lock.c
6ca6e8
+++ b/nptl/pthread_mutex_lock.c
6ca6e8
@@ -139,14 +139,26 @@ PTHREAD_MUTEX_LOCK (pthread_mutex_t *mutex)
6ca6e8
 	  int cnt = 0;
6ca6e8
 	  int max_cnt = MIN (max_adaptive_count (),
6ca6e8
 			     mutex->__data.__spins * 2 + 10);
6ca6e8
+	  int spin_count, exp_backoff = 1;
6ca6e8
+	  unsigned int jitter = get_jitter ();
6ca6e8
 	  do
6ca6e8
 	    {
6ca6e8
-	      if (cnt++ >= max_cnt)
6ca6e8
+	      /* In each loop, spin count is exponential backoff plus
6ca6e8
+		 random jitter, random range is [0, exp_backoff-1].  */
6ca6e8
+	      spin_count = exp_backoff + (jitter & (exp_backoff - 1));
6ca6e8
+	      cnt += spin_count;
6ca6e8
+	      if (cnt >= max_cnt)
6ca6e8
 		{
6ca6e8
+		  /* If cnt exceeds max spin count, just go to wait
6ca6e8
+		     queue.  */
6ca6e8
 		  LLL_MUTEX_LOCK (mutex);
6ca6e8
 		  break;
6ca6e8
 		}
6ca6e8
-	      atomic_spin_nop ();
6ca6e8
+	      do
6ca6e8
+		atomic_spin_nop ();
6ca6e8
+	      while (--spin_count > 0);
6ca6e8
+	      /* Prepare for next loop.  */
6ca6e8
+	      exp_backoff = get_next_backoff (exp_backoff);
6ca6e8
 	    }
6ca6e8
 	  while (LLL_MUTEX_READ_LOCK (mutex) != 0
6ca6e8
 		 || LLL_MUTEX_TRYLOCK (mutex) != 0);
6ca6e8
diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
6ca6e8
index b968afc4c6b61b92..ed186ce3df1fde0c 100644
6ca6e8
--- a/sysdeps/nptl/pthreadP.h
6ca6e8
+++ b/sysdeps/nptl/pthreadP.h
6ca6e8
@@ -34,6 +34,7 @@
6ca6e8
 #include <kernel-features.h>
6ca6e8
 #include <errno.h>
6ca6e8
 #include <internal-signals.h>
6ca6e8
+#include <pthread_mutex_backoff.h>
6ca6e8
 #include "pthread_mutex_conf.h"
6ca6e8
 
6ca6e8
 
6ca6e8
diff --git a/sysdeps/nptl/pthread_mutex_backoff.h b/sysdeps/nptl/pthread_mutex_backoff.h
6ca6e8
new file mode 100644
6ca6e8
index 0000000000000000..5b26c22ac789f54f
6ca6e8
--- /dev/null
6ca6e8
+++ b/sysdeps/nptl/pthread_mutex_backoff.h
6ca6e8
@@ -0,0 +1,35 @@
6ca6e8
+/* Pthread mutex backoff configuration.
6ca6e8
+   Copyright (C) 2022 Free Software Foundation, Inc.
6ca6e8
+   This file is part of the GNU C Library.
6ca6e8
+
6ca6e8
+   The GNU C Library is free software; you can redistribute it and/or
6ca6e8
+   modify it under the terms of the GNU Lesser General Public
6ca6e8
+   License as published by the Free Software Foundation; either
6ca6e8
+   version 2.1 of the License, or (at your option) any later version.
6ca6e8
+
6ca6e8
+   The GNU C Library is distributed in the hope that it will be useful,
6ca6e8
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
6ca6e8
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
6ca6e8
+   Lesser General Public License for more details.
6ca6e8
+
6ca6e8
+   You should have received a copy of the GNU Lesser General Public
6ca6e8
+   License along with the GNU C Library; if not, see
6ca6e8
+   <https://www.gnu.org/licenses/>.  */
6ca6e8
+#ifndef _PTHREAD_MUTEX_BACKOFF_H
6ca6e8
+#define _PTHREAD_MUTEX_BACKOFF_H 1
6ca6e8
+
6ca6e8
+static inline unsigned int
6ca6e8
+get_jitter (void)
6ca6e8
+{
6ca6e8
+  /* Arch dependent random jitter, return 0 disables random.  */
6ca6e8
+  return 0;
6ca6e8
+}
6ca6e8
+
6ca6e8
+static inline int
6ca6e8
+get_next_backoff (int backoff)
6ca6e8
+{
6ca6e8
+  /* Next backoff, return 1 disables mutex backoff.  */
6ca6e8
+  return 1;
6ca6e8
+}
6ca6e8
+
6ca6e8
+#endif
6ca6e8
diff --git a/sysdeps/x86_64/nptl/pthread_mutex_backoff.h b/sysdeps/x86_64/nptl/pthread_mutex_backoff.h
6ca6e8
new file mode 100644
6ca6e8
index 0000000000000000..ec74c3d9db61864e
6ca6e8
--- /dev/null
6ca6e8
+++ b/sysdeps/x86_64/nptl/pthread_mutex_backoff.h
6ca6e8
@@ -0,0 +1,39 @@
6ca6e8
+/* Pthread mutex backoff configuration.
6ca6e8
+   Copyright (C) 2022 Free Software Foundation, Inc.
6ca6e8
+   This file is part of the GNU C Library.
6ca6e8
+
6ca6e8
+   The GNU C Library is free software; you can redistribute it and/or
6ca6e8
+   modify it under the terms of the GNU Lesser General Public
6ca6e8
+   License as published by the Free Software Foundation; either
6ca6e8
+   version 2.1 of the License, or (at your option) any later version.
6ca6e8
+
6ca6e8
+   The GNU C Library is distributed in the hope that it will be useful,
6ca6e8
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
6ca6e8
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
6ca6e8
+   Lesser General Public License for more details.
6ca6e8
+
6ca6e8
+   You should have received a copy of the GNU Lesser General Public
6ca6e8
+   License along with the GNU C Library; if not, see
6ca6e8
+   <https://www.gnu.org/licenses/>.  */
6ca6e8
+#ifndef _PTHREAD_MUTEX_BACKOFF_H
6ca6e8
+#define _PTHREAD_MUTEX_BACKOFF_H 1
6ca6e8
+
6ca6e8
+#include <fast-jitter.h>
6ca6e8
+
6ca6e8
+static inline unsigned int
6ca6e8
+get_jitter (void)
6ca6e8
+{
6ca6e8
+  return get_fast_jitter ();
6ca6e8
+}
6ca6e8
+
6ca6e8
+#define MAX_BACKOFF 16
6ca6e8
+
6ca6e8
+static inline int
6ca6e8
+get_next_backoff (int backoff)
6ca6e8
+{
6ca6e8
+  /* Binary expontial backoff. Limiting max backoff
6ca6e8
+     can reduce latency in large critical section.  */
6ca6e8
+  return (backoff < MAX_BACKOFF) ? backoff << 1 : backoff;
6ca6e8
+}
6ca6e8
+
6ca6e8
+#endif