|
|
00db10 |
Partial backport:
|
|
|
00db10 |
|
|
|
00db10 |
Skipped elide.h since rw-locks are not backported.
|
|
|
00db10 |
|
|
|
00db10 |
commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a
|
|
|
00db10 |
Author: Torvald Riegel <triegel@redhat.com>
|
|
|
00db10 |
Date: Wed Nov 30 17:53:11 2016 +0100
|
|
|
00db10 |
|
|
|
00db10 |
Use C11-like atomics instead of plain memory accesses in x86 lock elision.
|
|
|
00db10 |
|
|
|
00db10 |
This uses atomic operations to access lock elision metadata that is accessed
|
|
|
00db10 |
concurrently (ie, adapt_count fields). The size of the data is less than a
|
|
|
00db10 |
word but accessed only with atomic loads and stores; therefore, we add
|
|
|
00db10 |
support for shorter-size atomic load and stores too.
|
|
|
00db10 |
|
|
|
00db10 |
* include/atomic.h (__atomic_check_size_ls): New.
|
|
|
00db10 |
(atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed,
|
|
|
00db10 |
atomic_store_release): Use it.
|
|
|
00db10 |
* sysdeps/x86/elide.h (ACCESS_ONCE): Remove.
|
|
|
00db10 |
(elision_adapt, ELIDE_LOCK): Use atomics.
|
|
|
00db10 |
* sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use
|
|
|
00db10 |
atomics and improve code comments.
|
|
|
00db10 |
* sysdeps/unix/sysv/linux/x86/elision-trylock.c
|
|
|
00db10 |
(__lll_trylock_elision): Likewise.
|
|
|
00db10 |
|
|
|
00db10 |
Index: glibc-2.17-c758a686/include/atomic.h
|
|
|
00db10 |
===================================================================
|
|
|
00db10 |
--- glibc-2.17-c758a686.orig/include/atomic.h
|
|
|
00db10 |
+++ glibc-2.17-c758a686/include/atomic.h
|
|
|
00db10 |
@@ -567,6 +567,20 @@ void __atomic_link_error (void);
|
|
|
00db10 |
if (sizeof (*mem) != 4) \
|
|
|
00db10 |
__atomic_link_error ();
|
|
|
00db10 |
# endif
|
|
|
00db10 |
+/* We additionally provide 8b and 16b atomic loads and stores; we do not yet
|
|
|
00db10 |
+ need other atomic operations of such sizes, and restricting the support to
|
|
|
00db10 |
+ loads and stores makes this easier for archs that do not have native
|
|
|
00db10 |
+ support for atomic operations to less-than-word-sized data. */
|
|
|
00db10 |
+# if __HAVE_64B_ATOMICS == 1
|
|
|
00db10 |
+# define __atomic_check_size_ls(mem) \
|
|
|
00db10 |
+ if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && (sizeof (*mem) != 4) \
|
|
|
00db10 |
+ && (sizeof (*mem) != 8)) \
|
|
|
00db10 |
+ __atomic_link_error ();
|
|
|
00db10 |
+# else
|
|
|
00db10 |
+# define __atomic_check_size_ls(mem) \
|
|
|
00db10 |
+ if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \
|
|
|
00db10 |
+ __atomic_link_error ();
|
|
|
00db10 |
+# endif
|
|
|
00db10 |
|
|
|
00db10 |
# define atomic_thread_fence_acquire() \
|
|
|
00db10 |
__atomic_thread_fence (__ATOMIC_ACQUIRE)
|
|
|
00db10 |
@@ -576,18 +590,20 @@ void __atomic_link_error (void);
|
|
|
00db10 |
__atomic_thread_fence (__ATOMIC_SEQ_CST)
|
|
|
00db10 |
|
|
|
00db10 |
# define atomic_load_relaxed(mem) \
|
|
|
00db10 |
- ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_RELAXED); })
|
|
|
00db10 |
+ ({ __atomic_check_size_ls((mem)); \
|
|
|
00db10 |
+ __atomic_load_n ((mem), __ATOMIC_RELAXED); })
|
|
|
00db10 |
# define atomic_load_acquire(mem) \
|
|
|
00db10 |
- ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
|
|
|
00db10 |
+ ({ __atomic_check_size_ls((mem)); \
|
|
|
00db10 |
+ __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
|
|
|
00db10 |
|
|
|
00db10 |
# define atomic_store_relaxed(mem, val) \
|
|
|
00db10 |
do { \
|
|
|
00db10 |
- __atomic_check_size((mem)); \
|
|
|
00db10 |
+ __atomic_check_size_ls((mem)); \
|
|
|
00db10 |
__atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \
|
|
|
00db10 |
} while (0)
|
|
|
00db10 |
# define atomic_store_release(mem, val) \
|
|
|
00db10 |
do { \
|
|
|
00db10 |
- __atomic_check_size((mem)); \
|
|
|
00db10 |
+ __atomic_check_size_ls((mem)); \
|
|
|
00db10 |
__atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \
|
|
|
00db10 |
} while (0)
|
|
|
00db10 |
|
|
|
00db10 |
Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c
|
|
|
00db10 |
===================================================================
|
|
|
00db10 |
--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c
|
|
|
00db10 |
+++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c
|
|
|
00db10 |
@@ -44,7 +44,13 @@
|
|
|
00db10 |
int
|
|
|
00db10 |
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
|
|
|
00db10 |
{
|
|
|
00db10 |
- if (*adapt_count <= 0)
|
|
|
00db10 |
+ /* adapt_count can be accessed concurrently; these accesses can be both
|
|
|
00db10 |
+ inside of transactions (if critical sections are nested and the outer
|
|
|
00db10 |
+ critical section uses lock elision) and outside of transactions. Thus,
|
|
|
00db10 |
+ we need to use atomic accesses to avoid data races. However, the
|
|
|
00db10 |
+ value of adapt_count is just a hint, so relaxed MO accesses are
|
|
|
00db10 |
+ sufficient. */
|
|
|
00db10 |
+ if (atomic_load_relaxed (adapt_count) <= 0)
|
|
|
00db10 |
{
|
|
|
00db10 |
unsigned status;
|
|
|
00db10 |
int try_xbegin;
|
|
|
00db10 |
@@ -70,15 +76,20 @@ __lll_lock_elision (int *futex, short *a
|
|
|
00db10 |
&& _XABORT_CODE (status) == _ABORT_LOCK_BUSY)
|
|
|
00db10 |
{
|
|
|
00db10 |
/* Right now we skip here. Better would be to wait a bit
|
|
|
00db10 |
- and retry. This likely needs some spinning. */
|
|
|
00db10 |
- if (*adapt_count != aconf.skip_lock_busy)
|
|
|
00db10 |
- *adapt_count = aconf.skip_lock_busy;
|
|
|
00db10 |
+ and retry. This likely needs some spinning. See
|
|
|
00db10 |
+ above for why relaxed MO is sufficient. */
|
|
|
00db10 |
+ if (atomic_load_relaxed (adapt_count)
|
|
|
00db10 |
+ != aconf.skip_lock_busy)
|
|
|
00db10 |
+ atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
|
|
|
00db10 |
}
|
|
|
00db10 |
/* Internal abort. There is no chance for retry.
|
|
|
00db10 |
Use the normal locking and next time use lock.
|
|
|
00db10 |
- Be careful to avoid writing to the lock. */
|
|
|
00db10 |
- else if (*adapt_count != aconf.skip_lock_internal_abort)
|
|
|
00db10 |
- *adapt_count = aconf.skip_lock_internal_abort;
|
|
|
00db10 |
+ Be careful to avoid writing to the lock. See above for why
|
|
|
00db10 |
+ relaxed MO is sufficient. */
|
|
|
00db10 |
+ else if (atomic_load_relaxed (adapt_count)
|
|
|
00db10 |
+ != aconf.skip_lock_internal_abort)
|
|
|
00db10 |
+ atomic_store_relaxed (adapt_count,
|
|
|
00db10 |
+ aconf.skip_lock_internal_abort);
|
|
|
00db10 |
break;
|
|
|
00db10 |
}
|
|
|
00db10 |
}
|
|
|
00db10 |
@@ -87,7 +98,8 @@ __lll_lock_elision (int *futex, short *a
|
|
|
00db10 |
{
|
|
|
00db10 |
/* Use a normal lock until the threshold counter runs out.
|
|
|
00db10 |
Lost updates possible. */
|
|
|
00db10 |
- (*adapt_count)--;
|
|
|
00db10 |
+ atomic_store_relaxed (adapt_count,
|
|
|
00db10 |
+ atomic_load_relaxed (adapt_count) - 1);
|
|
|
00db10 |
}
|
|
|
00db10 |
|
|
|
00db10 |
/* Use a normal lock here. */
|
|
|
00db10 |
Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c
|
|
|
00db10 |
===================================================================
|
|
|
00db10 |
--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c
|
|
|
00db10 |
+++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c
|
|
|
00db10 |
@@ -36,8 +36,10 @@ __lll_trylock_elision (int *futex, short
|
|
|
00db10 |
return an error. */
|
|
|
00db10 |
_xabort (_ABORT_NESTED_TRYLOCK);
|
|
|
00db10 |
|
|
|
00db10 |
- /* Only try a transaction if it's worth it. */
|
|
|
00db10 |
- if (*adapt_count <= 0)
|
|
|
00db10 |
+ /* Only try a transaction if it's worth it. See __lll_lock_elision for
|
|
|
00db10 |
+ why we need atomic accesses. Relaxed MO is sufficient because this is
|
|
|
00db10 |
+ just a hint. */
|
|
|
00db10 |
+ if (atomic_load_relaxed (adapt_count) <= 0)
|
|
|
00db10 |
{
|
|
|
00db10 |
unsigned status;
|
|
|
00db10 |
|
|
|
00db10 |
@@ -55,16 +57,18 @@ __lll_trylock_elision (int *futex, short
|
|
|
00db10 |
if (!(status & _XABORT_RETRY))
|
|
|
00db10 |
{
|
|
|
00db10 |
/* Internal abort. No chance for retry. For future
|
|
|
00db10 |
- locks don't try speculation for some time. */
|
|
|
00db10 |
- if (*adapt_count != aconf.skip_trylock_internal_abort)
|
|
|
00db10 |
- *adapt_count = aconf.skip_trylock_internal_abort;
|
|
|
00db10 |
+ locks don't try speculation for some time. See above for MO. */
|
|
|
00db10 |
+ if (atomic_load_relaxed (adapt_count)
|
|
|
00db10 |
+ != aconf.skip_lock_internal_abort)
|
|
|
00db10 |
+ atomic_store_relaxed (adapt_count, aconf.skip_lock_internal_abort);
|
|
|
00db10 |
}
|
|
|
00db10 |
/* Could do some retries here. */
|
|
|
00db10 |
}
|
|
|
00db10 |
else
|
|
|
00db10 |
{
|
|
|
00db10 |
- /* Lost updates are possible, but harmless. */
|
|
|
00db10 |
- (*adapt_count)--;
|
|
|
00db10 |
+ /* Lost updates are possible but harmless (see above). */
|
|
|
00db10 |
+ atomic_store_relaxed (adapt_count,
|
|
|
00db10 |
+ atomic_load_relaxed (adapt_count) - 1);
|
|
|
00db10 |
}
|
|
|
00db10 |
|
|
|
00db10 |
return lll_trylock (*futex);
|