ce426f
Partial backport:
ce426f
ce426f
Skipped elide.h since rw-locks are not backported.
ce426f
ce426f
commit ca6e601a9d4a72b3699cca15bad12ac1716bf49a
ce426f
Author: Torvald Riegel <triegel@redhat.com>
ce426f
Date:   Wed Nov 30 17:53:11 2016 +0100
ce426f
ce426f
    Use C11-like atomics instead of plain memory accesses in x86 lock elision.
ce426f
    
ce426f
    This uses atomic operations to access lock elision metadata that is accessed
ce426f
    concurrently (ie, adapt_count fields).  The size of the data is less than a
ce426f
    word but accessed only with atomic loads and stores; therefore, we add
ce426f
    support for shorter-size atomic load and stores too.
ce426f
    
ce426f
            * include/atomic.h (__atomic_check_size_ls): New.
ce426f
            (atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed,
ce426f
            atomic_store_release): Use it.
ce426f
            * sysdeps/x86/elide.h (ACCESS_ONCE): Remove.
ce426f
            (elision_adapt, ELIDE_LOCK): Use atomics.
ce426f
            * sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use
ce426f
            atomics and improve code comments.
ce426f
            * sysdeps/unix/sysv/linux/x86/elision-trylock.c
ce426f
            (__lll_trylock_elision): Likewise.
ce426f
ce426f
Index: glibc-2.17-c758a686/include/atomic.h
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/include/atomic.h
ce426f
+++ glibc-2.17-c758a686/include/atomic.h
ce426f
@@ -567,6 +567,20 @@ void __atomic_link_error (void);
ce426f
    if (sizeof (*mem) != 4)						      \
ce426f
      __atomic_link_error ();
ce426f
 # endif
ce426f
+/* We additionally provide 8b and 16b atomic loads and stores; we do not yet
ce426f
+   need other atomic operations of such sizes, and restricting the support to
ce426f
+   loads and stores makes this easier for archs that do not have native
ce426f
+   support for atomic operations to less-than-word-sized data.  */
ce426f
+# if __HAVE_64B_ATOMICS == 1
ce426f
+#  define __atomic_check_size_ls(mem) \
ce426f
+   if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && (sizeof (*mem) != 4)   \
ce426f
+       && (sizeof (*mem) != 8))						      \
ce426f
+     __atomic_link_error ();
ce426f
+# else
ce426f
+#  define __atomic_check_size_ls(mem) \
ce426f
+   if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4)    \
ce426f
+     __atomic_link_error ();
ce426f
+# endif
ce426f
 
ce426f
 # define atomic_thread_fence_acquire() \
ce426f
   __atomic_thread_fence (__ATOMIC_ACQUIRE)
ce426f
@@ -576,18 +590,20 @@ void __atomic_link_error (void);
ce426f
   __atomic_thread_fence (__ATOMIC_SEQ_CST)
ce426f
 
ce426f
 # define atomic_load_relaxed(mem) \
ce426f
-  ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_RELAXED); })
ce426f
+  ({ __atomic_check_size_ls((mem));					      \
ce426f
+     __atomic_load_n ((mem), __ATOMIC_RELAXED); })
ce426f
 # define atomic_load_acquire(mem) \
ce426f
-  ({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
ce426f
+  ({ __atomic_check_size_ls((mem));					      \
ce426f
+     __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
ce426f
 
ce426f
 # define atomic_store_relaxed(mem, val) \
ce426f
   do {									      \
ce426f
-    __atomic_check_size((mem));						      \
ce426f
+    __atomic_check_size_ls((mem));					      \
ce426f
     __atomic_store_n ((mem), (val), __ATOMIC_RELAXED);			      \
ce426f
   } while (0)
ce426f
 # define atomic_store_release(mem, val) \
ce426f
   do {									      \
ce426f
-    __atomic_check_size((mem));						      \
ce426f
+    __atomic_check_size_ls((mem));					      \
ce426f
     __atomic_store_n ((mem), (val), __ATOMIC_RELEASE);			      \
ce426f
   } while (0)
ce426f
 
ce426f
Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c
ce426f
+++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-lock.c
ce426f
@@ -44,7 +44,13 @@
ce426f
 int
ce426f
 __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
ce426f
 {
ce426f
-  if (*adapt_count <= 0)
ce426f
+  /* adapt_count can be accessed concurrently; these accesses can be both
ce426f
+     inside of transactions (if critical sections are nested and the outer
ce426f
+     critical section uses lock elision) and outside of transactions.  Thus,
ce426f
+     we need to use atomic accesses to avoid data races.  However, the
ce426f
+     value of adapt_count is just a hint, so relaxed MO accesses are
ce426f
+     sufficient.  */
ce426f
+  if (atomic_load_relaxed (adapt_count) <= 0)
ce426f
     {
ce426f
       unsigned status;
ce426f
       int try_xbegin;
ce426f
@@ -70,15 +76,20 @@ __lll_lock_elision (int *futex, short *a
ce426f
 			&& _XABORT_CODE (status) == _ABORT_LOCK_BUSY)
ce426f
 	        {
ce426f
 		  /* Right now we skip here.  Better would be to wait a bit
ce426f
-		     and retry.  This likely needs some spinning.  */
ce426f
-		  if (*adapt_count != aconf.skip_lock_busy)
ce426f
-		    *adapt_count = aconf.skip_lock_busy;
ce426f
+		     and retry.  This likely needs some spinning.  See
ce426f
+		     above for why relaxed MO is sufficient.  */
ce426f
+		  if (atomic_load_relaxed (adapt_count)
ce426f
+		      != aconf.skip_lock_busy)
ce426f
+		    atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
ce426f
 		}
ce426f
 	      /* Internal abort.  There is no chance for retry.
ce426f
 		 Use the normal locking and next time use lock.
ce426f
-		 Be careful to avoid writing to the lock.  */
ce426f
-	      else if (*adapt_count != aconf.skip_lock_internal_abort)
ce426f
-		*adapt_count = aconf.skip_lock_internal_abort;
ce426f
+		 Be careful to avoid writing to the lock.  See above for why
ce426f
+		 relaxed MO is sufficient.  */
ce426f
+	      else if (atomic_load_relaxed (adapt_count)
ce426f
+		  != aconf.skip_lock_internal_abort)
ce426f
+		atomic_store_relaxed (adapt_count,
ce426f
+		    aconf.skip_lock_internal_abort);
ce426f
 	      break;
ce426f
 	    }
ce426f
 	}
ce426f
@@ -87,7 +98,8 @@ __lll_lock_elision (int *futex, short *a
ce426f
     {
ce426f
       /* Use a normal lock until the threshold counter runs out.
ce426f
 	 Lost updates possible.  */
ce426f
-      (*adapt_count)--;
ce426f
+      atomic_store_relaxed (adapt_count,
ce426f
+	  atomic_load_relaxed (adapt_count) - 1);
ce426f
     }
ce426f
 
ce426f
   /* Use a normal lock here.  */
ce426f
Index: glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c
ce426f
===================================================================
ce426f
--- glibc-2.17-c758a686.orig/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c
ce426f
+++ glibc-2.17-c758a686/nptl/sysdeps/unix/sysv/linux/x86/elision-trylock.c
ce426f
@@ -36,8 +36,10 @@ __lll_trylock_elision (int *futex, short
ce426f
      return an error.  */
ce426f
   _xabort (_ABORT_NESTED_TRYLOCK);
ce426f
 
ce426f
-  /* Only try a transaction if it's worth it.  */
ce426f
-  if (*adapt_count <= 0)
ce426f
+  /* Only try a transaction if it's worth it.  See __lll_lock_elision for
ce426f
+     why we need atomic accesses.  Relaxed MO is sufficient because this is
ce426f
+     just a hint.  */
ce426f
+  if (atomic_load_relaxed (adapt_count) <= 0)
ce426f
     {
ce426f
       unsigned status;
ce426f
 
ce426f
@@ -55,16 +57,18 @@ __lll_trylock_elision (int *futex, short
ce426f
       if (!(status & _XABORT_RETRY))
ce426f
         {
ce426f
           /* Internal abort.  No chance for retry.  For future
ce426f
-             locks don't try speculation for some time.  */
ce426f
-          if (*adapt_count != aconf.skip_trylock_internal_abort)
ce426f
-            *adapt_count = aconf.skip_trylock_internal_abort;
ce426f
+             locks don't try speculation for some time.  See above for MO.  */
ce426f
+          if (atomic_load_relaxed (adapt_count)
ce426f
+              != aconf.skip_lock_internal_abort)
ce426f
+            atomic_store_relaxed (adapt_count, aconf.skip_lock_internal_abort);
ce426f
         }
ce426f
       /* Could do some retries here.  */
ce426f
     }
ce426f
   else
ce426f
     {
ce426f
-      /* Lost updates are possible, but harmless.  */
ce426f
-      (*adapt_count)--;
ce426f
+      /* Lost updates are possible but harmless (see above).  */
ce426f
+      atomic_store_relaxed (adapt_count,
ce426f
+	  atomic_load_relaxed (adapt_count) - 1);
ce426f
     }
ce426f
 
ce426f
   return lll_trylock (*futex);