513694
From d4b1ecdf48cfe0e711ec201533811b7d823d1b7d Mon Sep 17 00:00:00 2001
513694
From: Wangyang Guo <wangyang.guo@intel.com>
513694
Date: Fri, 6 May 2022 01:50:10 +0000
513694
Subject: [PATCH] nptl: Add backoff mechanism to spinlock loop
513694
513694
When mutiple threads waiting for lock at the same time, once lock owner
513694
releases the lock, waiters will see lock available and all try to lock,
513694
which may cause an expensive CAS storm.
513694
513694
Binary exponential backoff with random jitter is introduced. As try-lock
513694
attempt increases, there is more likely that a larger number threads
513694
compete for adaptive mutex lock, so increase wait time in exponential.
513694
A random jitter is also added to avoid synchronous try-lock from other
513694
threads.
513694
513694
v2: Remove read-check before try-lock for performance.
513694
513694
v3:
513694
1. Restore read-check since it works well in some platform.
513694
2. Make backoff arch dependent, and enable it for x86_64.
513694
3. Limit max backoff to reduce latency in large critical section.
513694
513694
v4: Fix strict-prototypes error in sysdeps/nptl/pthread_mutex_backoff.h
513694
513694
v5: Commit log updated for regression in large critical section.
513694
513694
Result of pthread-mutex-locks bench
513694
513694
Test Platform: Xeon 8280L (2 socket, 112 CPUs in total)
513694
First Row: thread number
513694
First Col: critical section length
513694
Values: backoff vs upstream, time based, low is better
513694
513694
non-critical-length: 1
513694
	1	2	4	8	16	32	64	112	140
513694
0	0.99	0.58	0.52	0.49	0.43	0.44	0.46	0.52	0.54
513694
1	0.98	0.43	0.56	0.50	0.44	0.45	0.50	0.56	0.57
513694
2	0.99	0.41	0.57	0.51	0.45	0.47	0.48	0.60	0.61
513694
4	0.99	0.45	0.59	0.53	0.48	0.49	0.52	0.64	0.65
513694
8	1.00	0.66	0.71	0.63	0.56	0.59	0.66	0.72	0.71
513694
16	0.97	0.78	0.91	0.73	0.67	0.70	0.79	0.80	0.80
513694
32	0.95	1.17	0.98	0.87	0.82	0.86	0.89	0.90	0.90
513694
64	0.96	0.95	1.01	1.01	0.98	1.00	1.03	0.99	0.99
513694
128	0.99	1.01	1.01	1.17	1.08	1.12	1.02	0.97	1.02
513694
513694
non-critical-length: 32
513694
	1	2	4	8	16	32	64	112	140
513694
0	1.03	0.97	0.75	0.65	0.58	0.58	0.56	0.70	0.70
513694
1	0.94	0.95	0.76	0.65	0.58	0.58	0.61	0.71	0.72
513694
2	0.97	0.96	0.77	0.66	0.58	0.59	0.62	0.74	0.74
513694
4	0.99	0.96	0.78	0.66	0.60	0.61	0.66	0.76	0.77
513694
8	0.99	0.99	0.84	0.70	0.64	0.66	0.71	0.80	0.80
513694
16	0.98	0.97	0.95	0.76	0.70	0.73	0.81	0.85	0.84
513694
32	1.04	1.12	1.04	0.89	0.82	0.86	0.93	0.91	0.91
513694
64	0.99	1.15	1.07	1.00	0.99	1.01	1.05	0.99	0.99
513694
128	1.00	1.21	1.20	1.22	1.25	1.31	1.12	1.10	0.99
513694
513694
non-critical-length: 128
513694
	1	2	4	8	16	32	64	112	140
513694
0	1.02	1.00	0.99	0.67	0.61	0.61	0.61	0.74	0.73
513694
1	0.95	0.99	1.00	0.68	0.61	0.60	0.60	0.74	0.74
513694
2	1.00	1.04	1.00	0.68	0.59	0.61	0.65	0.76	0.76
513694
4	1.00	0.96	0.98	0.70	0.63	0.63	0.67	0.78	0.77
513694
8	1.01	1.02	0.89	0.73	0.65	0.67	0.71	0.81	0.80
513694
16	0.99	0.96	0.96	0.79	0.71	0.73	0.80	0.84	0.84
513694
32	0.99	0.95	1.05	0.89	0.84	0.85	0.94	0.92	0.91
513694
64	1.00	0.99	1.16	1.04	1.00	1.02	1.06	0.99	0.99
513694
128	1.00	1.06	0.98	1.14	1.39	1.26	1.08	1.02	0.98
513694
513694
There is regression in large critical section. But adaptive mutex is
513694
aimed for "quick" locks. Small critical section is more common when
513694
users choose to use adaptive pthread_mutex.
513694
513694
Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
513694
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
513694
(cherry picked from commit 8162147872491bb5b48e91543b19c49a29ae6b6d)
513694
---
513694
 nptl/pthreadP.h                             |  1 +
513694
 nptl/pthread_mutex_lock.c                   | 16 +++++++--
513694
 sysdeps/nptl/pthread_mutex_backoff.h        | 35 ++++++++++++++++++
513694
 sysdeps/x86_64/nptl/pthread_mutex_backoff.h | 39 +++++++++++++++++++++
513694
 4 files changed, 89 insertions(+), 2 deletions(-)
513694
 create mode 100644 sysdeps/nptl/pthread_mutex_backoff.h
513694
 create mode 100644 sysdeps/x86_64/nptl/pthread_mutex_backoff.h
513694
513694
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
513694
index 7ddc166c..1550e3b6 100644
513694
--- a/nptl/pthreadP.h
513694
+++ b/nptl/pthreadP.h
513694
@@ -33,6 +33,7 @@
513694
 #include <kernel-features.h>
513694
 #include <errno.h>
513694
 #include <internal-signals.h>
513694
+#include <pthread_mutex_backoff.h>
513694
 
513694
 
513694
 /* Atomic operations on TLS memory.  */
513694
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
513694
index d96a9933..c7770fc9 100644
513694
--- a/nptl/pthread_mutex_lock.c
513694
+++ b/nptl/pthread_mutex_lock.c
513694
@@ -133,14 +133,26 @@ __pthread_mutex_lock (pthread_mutex_t *mutex)
513694
 	  int cnt = 0;
513694
 	  int max_cnt = MIN (MAX_ADAPTIVE_COUNT,
513694
 			     mutex->__data.__spins * 2 + 10);
513694
+	  int spin_count, exp_backoff = 1;
513694
+	  unsigned int jitter = get_jitter ();
513694
 	  do
513694
 	    {
513694
-	      if (cnt++ >= max_cnt)
513694
+	      /* In each loop, spin count is exponential backoff plus
513694
+		 random jitter, random range is [0, exp_backoff-1].  */
513694
+	      spin_count = exp_backoff + (jitter & (exp_backoff - 1));
513694
+	      cnt += spin_count;
513694
+	      if (cnt >= max_cnt)
513694
 		{
513694
+		  /* If cnt exceeds max spin count, just go to wait
513694
+		     queue.  */
513694
 		  LLL_MUTEX_LOCK (mutex);
513694
 		  break;
513694
 		}
513694
-	      atomic_spin_nop ();
513694
+	      do
513694
+		atomic_spin_nop ();
513694
+	      while (--spin_count > 0);
513694
+	      /* Prepare for next loop.  */
513694
+	      exp_backoff = get_next_backoff (exp_backoff);
513694
 	    }
513694
 	  while (LLL_MUTEX_READ_LOCK (mutex) != 0
513694
 		 || LLL_MUTEX_TRYLOCK (mutex) != 0);
513694
diff --git a/sysdeps/nptl/pthread_mutex_backoff.h b/sysdeps/nptl/pthread_mutex_backoff.h
513694
new file mode 100644
513694
index 00000000..5b26c22a
513694
--- /dev/null
513694
+++ b/sysdeps/nptl/pthread_mutex_backoff.h
513694
@@ -0,0 +1,35 @@
513694
+/* Pthread mutex backoff configuration.
513694
+   Copyright (C) 2022 Free Software Foundation, Inc.
513694
+   This file is part of the GNU C Library.
513694
+
513694
+   The GNU C Library is free software; you can redistribute it and/or
513694
+   modify it under the terms of the GNU Lesser General Public
513694
+   License as published by the Free Software Foundation; either
513694
+   version 2.1 of the License, or (at your option) any later version.
513694
+
513694
+   The GNU C Library is distributed in the hope that it will be useful,
513694
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
513694
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
513694
+   Lesser General Public License for more details.
513694
+
513694
+   You should have received a copy of the GNU Lesser General Public
513694
+   License along with the GNU C Library; if not, see
513694
+   <https://www.gnu.org/licenses/>.  */
513694
+#ifndef _PTHREAD_MUTEX_BACKOFF_H
513694
+#define _PTHREAD_MUTEX_BACKOFF_H 1
513694
+
513694
+static inline unsigned int
513694
+get_jitter (void)
513694
+{
513694
+  /* Arch dependent random jitter, return 0 disables random.  */
513694
+  return 0;
513694
+}
513694
+
513694
+static inline int
513694
+get_next_backoff (int backoff)
513694
+{
513694
+  /* Next backoff, return 1 disables mutex backoff.  */
513694
+  return 1;
513694
+}
513694
+
513694
+#endif
513694
diff --git a/sysdeps/x86_64/nptl/pthread_mutex_backoff.h b/sysdeps/x86_64/nptl/pthread_mutex_backoff.h
513694
new file mode 100644
513694
index 00000000..ec74c3d9
513694
--- /dev/null
513694
+++ b/sysdeps/x86_64/nptl/pthread_mutex_backoff.h
513694
@@ -0,0 +1,39 @@
513694
+/* Pthread mutex backoff configuration.
513694
+   Copyright (C) 2022 Free Software Foundation, Inc.
513694
+   This file is part of the GNU C Library.
513694
+
513694
+   The GNU C Library is free software; you can redistribute it and/or
513694
+   modify it under the terms of the GNU Lesser General Public
513694
+   License as published by the Free Software Foundation; either
513694
+   version 2.1 of the License, or (at your option) any later version.
513694
+
513694
+   The GNU C Library is distributed in the hope that it will be useful,
513694
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
513694
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
513694
+   Lesser General Public License for more details.
513694
+
513694
+   You should have received a copy of the GNU Lesser General Public
513694
+   License along with the GNU C Library; if not, see
513694
+   <https://www.gnu.org/licenses/>.  */
513694
+#ifndef _PTHREAD_MUTEX_BACKOFF_H
513694
+#define _PTHREAD_MUTEX_BACKOFF_H 1
513694
+
513694
+#include <fast-jitter.h>
513694
+
513694
+static inline unsigned int
513694
+get_jitter (void)
513694
+{
513694
+  return get_fast_jitter ();
513694
+}
513694
+
513694
+#define MAX_BACKOFF 16
513694
+
513694
+static inline int
513694
+get_next_backoff (int backoff)
513694
+{
513694
+  /* Binary expontial backoff. Limiting max backoff
513694
+     can reduce latency in large critical section.  */
513694
+  return (backoff < MAX_BACKOFF) ? backoff << 1 : backoff;
513694
+}
513694
+
513694
+#endif
513694
-- 
513694
GitLab
513694