1feee8
commit ea69248445fb9b80da02ee0c7261cba4b1a5532e
1feee8
Author: Wangyang Guo <wangyang.guo@intel.com>
1feee8
Date:   Fri May 6 01:50:10 2022 +0000
1feee8
1feee8
    nptl: Add backoff mechanism to spinlock loop
1feee8
    
1feee8
    When mutiple threads waiting for lock at the same time, once lock owner
1feee8
    releases the lock, waiters will see lock available and all try to lock,
1feee8
    which may cause an expensive CAS storm.
1feee8
    
1feee8
    Binary exponential backoff with random jitter is introduced. As try-lock
1feee8
    attempt increases, there is more likely that a larger number threads
1feee8
    compete for adaptive mutex lock, so increase wait time in exponential.
1feee8
    A random jitter is also added to avoid synchronous try-lock from other
1feee8
    threads.
1feee8
    
1feee8
    v2: Remove read-check before try-lock for performance.
1feee8
    
1feee8
    v3:
1feee8
    1. Restore read-check since it works well in some platform.
1feee8
    2. Make backoff arch dependent, and enable it for x86_64.
1feee8
    3. Limit max backoff to reduce latency in large critical section.
1feee8
    
1feee8
    v4: Fix strict-prototypes error in sysdeps/nptl/pthread_mutex_backoff.h
1feee8
    
1feee8
    v5: Commit log updated for regression in large critical section.
1feee8
    
1feee8
    Result of pthread-mutex-locks bench
1feee8
    
1feee8
    Test Platform: Xeon 8280L (2 socket, 112 CPUs in total)
1feee8
    First Row: thread number
1feee8
    First Col: critical section length
1feee8
    Values: backoff vs upstream, time based, low is better
1feee8
    
1feee8
    non-critical-length: 1
1feee8
            1       2       4       8       16      32      64      112     140
1feee8
    0       0.99    0.58    0.52    0.49    0.43    0.44    0.46    0.52    0.54
1feee8
    1       0.98    0.43    0.56    0.50    0.44    0.45    0.50    0.56    0.57
1feee8
    2       0.99    0.41    0.57    0.51    0.45    0.47    0.48    0.60    0.61
1feee8
    4       0.99    0.45    0.59    0.53    0.48    0.49    0.52    0.64    0.65
1feee8
    8       1.00    0.66    0.71    0.63    0.56    0.59    0.66    0.72    0.71
1feee8
    16      0.97    0.78    0.91    0.73    0.67    0.70    0.79    0.80    0.80
1feee8
    32      0.95    1.17    0.98    0.87    0.82    0.86    0.89    0.90    0.90
1feee8
    64      0.96    0.95    1.01    1.01    0.98    1.00    1.03    0.99    0.99
1feee8
    128     0.99    1.01    1.01    1.17    1.08    1.12    1.02    0.97    1.02
1feee8
    
1feee8
    non-critical-length: 32
1feee8
            1       2       4       8       16      32      64      112     140
1feee8
    0       1.03    0.97    0.75    0.65    0.58    0.58    0.56    0.70    0.70
1feee8
    1       0.94    0.95    0.76    0.65    0.58    0.58    0.61    0.71    0.72
1feee8
    2       0.97    0.96    0.77    0.66    0.58    0.59    0.62    0.74    0.74
1feee8
    4       0.99    0.96    0.78    0.66    0.60    0.61    0.66    0.76    0.77
1feee8
    8       0.99    0.99    0.84    0.70    0.64    0.66    0.71    0.80    0.80
1feee8
    16      0.98    0.97    0.95    0.76    0.70    0.73    0.81    0.85    0.84
1feee8
    32      1.04    1.12    1.04    0.89    0.82    0.86    0.93    0.91    0.91
1feee8
    64      0.99    1.15    1.07    1.00    0.99    1.01    1.05    0.99    0.99
1feee8
    128     1.00    1.21    1.20    1.22    1.25    1.31    1.12    1.10    0.99
1feee8
    
1feee8
    non-critical-length: 128
1feee8
            1       2       4       8       16      32      64      112     140
1feee8
    0       1.02    1.00    0.99    0.67    0.61    0.61    0.61    0.74    0.73
1feee8
    1       0.95    0.99    1.00    0.68    0.61    0.60    0.60    0.74    0.74
1feee8
    2       1.00    1.04    1.00    0.68    0.59    0.61    0.65    0.76    0.76
1feee8
    4       1.00    0.96    0.98    0.70    0.63    0.63    0.67    0.78    0.77
1feee8
    8       1.01    1.02    0.89    0.73    0.65    0.67    0.71    0.81    0.80
1feee8
    16      0.99    0.96    0.96    0.79    0.71    0.73    0.80    0.84    0.84
1feee8
    32      0.99    0.95    1.05    0.89    0.84    0.85    0.94    0.92    0.91
1feee8
    64      1.00    0.99    1.16    1.04    1.00    1.02    1.06    0.99    0.99
1feee8
    128     1.00    1.06    0.98    1.14    1.39    1.26    1.08    1.02    0.98
1feee8
    
1feee8
    There is regression in large critical section. But adaptive mutex is
1feee8
    aimed for "quick" locks. Small critical section is more common when
1feee8
    users choose to use adaptive pthread_mutex.
1feee8
    
1feee8
    Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
1feee8
    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
1feee8
    (cherry picked from commit 8162147872491bb5b48e91543b19c49a29ae6b6d)
1feee8
1feee8
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
1feee8
index bbe754a272b97d91..8f3f687f2a151d16 100644
1feee8
--- a/nptl/pthread_mutex_lock.c
1feee8
+++ b/nptl/pthread_mutex_lock.c
1feee8
@@ -139,14 +139,26 @@ PTHREAD_MUTEX_LOCK (pthread_mutex_t *mutex)
1feee8
 	  int cnt = 0;
1feee8
 	  int max_cnt = MIN (max_adaptive_count (),
1feee8
 			     mutex->__data.__spins * 2 + 10);
1feee8
+	  int spin_count, exp_backoff = 1;
1feee8
+	  unsigned int jitter = get_jitter ();
1feee8
 	  do
1feee8
 	    {
1feee8
-	      if (cnt++ >= max_cnt)
1feee8
+	      /* In each loop, spin count is exponential backoff plus
1feee8
+		 random jitter, random range is [0, exp_backoff-1].  */
1feee8
+	      spin_count = exp_backoff + (jitter & (exp_backoff - 1));
1feee8
+	      cnt += spin_count;
1feee8
+	      if (cnt >= max_cnt)
1feee8
 		{
1feee8
+		  /* If cnt exceeds max spin count, just go to wait
1feee8
+		     queue.  */
1feee8
 		  LLL_MUTEX_LOCK (mutex);
1feee8
 		  break;
1feee8
 		}
1feee8
-	      atomic_spin_nop ();
1feee8
+	      do
1feee8
+		atomic_spin_nop ();
1feee8
+	      while (--spin_count > 0);
1feee8
+	      /* Prepare for next loop.  */
1feee8
+	      exp_backoff = get_next_backoff (exp_backoff);
1feee8
 	    }
1feee8
 	  while (LLL_MUTEX_READ_LOCK (mutex) != 0
1feee8
 		 || LLL_MUTEX_TRYLOCK (mutex) != 0);
1feee8
diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
1feee8
index b968afc4c6b61b92..ed186ce3df1fde0c 100644
1feee8
--- a/sysdeps/nptl/pthreadP.h
1feee8
+++ b/sysdeps/nptl/pthreadP.h
1feee8
@@ -34,6 +34,7 @@
1feee8
 #include <kernel-features.h>
1feee8
 #include <errno.h>
1feee8
 #include <internal-signals.h>
1feee8
+#include <pthread_mutex_backoff.h>
1feee8
 #include "pthread_mutex_conf.h"
1feee8
 
1feee8
 
1feee8
diff --git a/sysdeps/nptl/pthread_mutex_backoff.h b/sysdeps/nptl/pthread_mutex_backoff.h
1feee8
new file mode 100644
1feee8
index 0000000000000000..5b26c22ac789f54f
1feee8
--- /dev/null
1feee8
+++ b/sysdeps/nptl/pthread_mutex_backoff.h
1feee8
@@ -0,0 +1,35 @@
1feee8
+/* Pthread mutex backoff configuration.
1feee8
+   Copyright (C) 2022 Free Software Foundation, Inc.
1feee8
+   This file is part of the GNU C Library.
1feee8
+
1feee8
+   The GNU C Library is free software; you can redistribute it and/or
1feee8
+   modify it under the terms of the GNU Lesser General Public
1feee8
+   License as published by the Free Software Foundation; either
1feee8
+   version 2.1 of the License, or (at your option) any later version.
1feee8
+
1feee8
+   The GNU C Library is distributed in the hope that it will be useful,
1feee8
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
1feee8
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1feee8
+   Lesser General Public License for more details.
1feee8
+
1feee8
+   You should have received a copy of the GNU Lesser General Public
1feee8
+   License along with the GNU C Library; if not, see
1feee8
+   <https://www.gnu.org/licenses/>.  */
1feee8
+#ifndef _PTHREAD_MUTEX_BACKOFF_H
1feee8
+#define _PTHREAD_MUTEX_BACKOFF_H 1
1feee8
+
1feee8
+static inline unsigned int
1feee8
+get_jitter (void)
1feee8
+{
1feee8
+  /* Arch dependent random jitter, return 0 disables random.  */
1feee8
+  return 0;
1feee8
+}
1feee8
+
1feee8
+static inline int
1feee8
+get_next_backoff (int backoff)
1feee8
+{
1feee8
+  /* Next backoff, return 1 disables mutex backoff.  */
1feee8
+  return 1;
1feee8
+}
1feee8
+
1feee8
+#endif
1feee8
diff --git a/sysdeps/x86_64/nptl/pthread_mutex_backoff.h b/sysdeps/x86_64/nptl/pthread_mutex_backoff.h
1feee8
new file mode 100644
1feee8
index 0000000000000000..ec74c3d9db61864e
1feee8
--- /dev/null
1feee8
+++ b/sysdeps/x86_64/nptl/pthread_mutex_backoff.h
1feee8
@@ -0,0 +1,39 @@
1feee8
+/* Pthread mutex backoff configuration.
1feee8
+   Copyright (C) 2022 Free Software Foundation, Inc.
1feee8
+   This file is part of the GNU C Library.
1feee8
+
1feee8
+   The GNU C Library is free software; you can redistribute it and/or
1feee8
+   modify it under the terms of the GNU Lesser General Public
1feee8
+   License as published by the Free Software Foundation; either
1feee8
+   version 2.1 of the License, or (at your option) any later version.
1feee8
+
1feee8
+   The GNU C Library is distributed in the hope that it will be useful,
1feee8
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
1feee8
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1feee8
+   Lesser General Public License for more details.
1feee8
+
1feee8
+   You should have received a copy of the GNU Lesser General Public
1feee8
+   License along with the GNU C Library; if not, see
1feee8
+   <https://www.gnu.org/licenses/>.  */
1feee8
+#ifndef _PTHREAD_MUTEX_BACKOFF_H
1feee8
+#define _PTHREAD_MUTEX_BACKOFF_H 1
1feee8
+
1feee8
+#include <fast-jitter.h>
1feee8
+
1feee8
+static inline unsigned int
1feee8
+get_jitter (void)
1feee8
+{
1feee8
+  return get_fast_jitter ();
1feee8
+}
1feee8
+
1feee8
+#define MAX_BACKOFF 16
1feee8
+
1feee8
+static inline int
1feee8
+get_next_backoff (int backoff)
1feee8
+{
1feee8
+  /* Binary expontial backoff. Limiting max backoff
1feee8
+     can reduce latency in large critical section.  */
1feee8
+  return (backoff < MAX_BACKOFF) ? backoff << 1 : backoff;
1feee8
+}
1feee8
+
1feee8
+#endif