25845f
commit 8f9450a0b7a9e78267e8ae1ab1000ebca08e473e
25845f
Author: Torvald Riegel <triegel@redhat.com>
25845f
Date:   Sat Dec 24 00:40:46 2016 +0100
25845f
25845f
    Add compiler barriers around modifications of the robust mutex list.
25845f
    
25845f
    Any changes to the per-thread list of robust mutexes currently acquired as
25845f
    well as the pending-operations entry are not simply sequential code but
25845f
    basically concurrent with any actions taken by the kernel when it tries
25845f
    to clean up after a crash.  This is not quite like multi-thread concurrency
25845f
    but more like signal-handler concurrency.
25845f
    This patch fixes latent bugs by adding compiler barriers where necessary so
25845f
    that it is ensured that the kernel crash handling sees consistent data.
25845f
    
25845f
    This is meant to be easy to backport, so we do not use C11-style signal
25845f
    fences yet.
25845f
    
25845f
            * nptl/descr.h (ENQUEUE_MUTEX_BOTH, DEQUEUE_MUTEX): Add compiler
25845f
            barriers and comments.
25845f
            * nptl/pthread_mutex_lock.c (__pthread_mutex_lock_full): Likewise.
25845f
            * nptl/pthread_mutex_timedlock.c (pthread_mutex_timedlock): Likewise.
25845f
            * nptl/pthread_mutex_unlock.c (__pthread_mutex_unlock_full): Likewise.
25845f
25845f
Index: glibc-2.17-c758a686/nptl/descr.h
25845f
===================================================================
25845f
--- glibc-2.17-c758a686.orig/nptl/descr.h
25845f
+++ glibc-2.17-c758a686/nptl/descr.h
25845f
@@ -180,7 +180,16 @@ struct pthread
25845f
      but the pointer to the next/previous element of the list points
25845f
      in the middle of the object, the __next element.  Whenever
25845f
      casting to __pthread_list_t we need to adjust the pointer
25845f
-     first.  */
25845f
+     first.
25845f
+     These operations are effectively concurrent code in that the thread
25845f
+     can get killed at any point in time and the kernel takes over.  Thus,
25845f
+     the __next elements are a kind of concurrent list and we need to
25845f
+     enforce using compiler barriers that the individual operations happen
25845f
+     in such a way that the kernel always sees a consistent list.  The
25845f
+     backward links (ie, the __prev elements) are not used by the kernel.
25845f
+     FIXME We should use relaxed MO atomic operations here and signal fences
25845f
+     because this kind of concurrency is similar to synchronizing with a
25845f
+     signal handler.  */
25845f
 # define QUEUE_PTR_ADJUST (offsetof (__pthread_list_t, __next))
25845f
 
25845f
 # define ENQUEUE_MUTEX_BOTH(mutex, val)					      \
25845f
@@ -192,6 +201,8 @@ struct pthread
25845f
     mutex->__data.__list.__next = THREAD_GETMEM (THREAD_SELF,		      \
25845f
 						 robust_head.list);	      \
25845f
     mutex->__data.__list.__prev = (void *) &THREAD_SELF->robust_head;	      \
25845f
+    /* Ensure that the new list entry is ready before we insert it.  */	      \
25845f
+    __asm ("" ::: "memory");						      \
25845f
     THREAD_SETMEM (THREAD_SELF, robust_head.list,			      \
25845f
 		   (void *) (((uintptr_t) &mutex->__data.__list.__next)	      \
25845f
 			     | val));					      \
25845f
@@ -206,6 +217,9 @@ struct pthread
25845f
       ((char *) (((uintptr_t) mutex->__data.__list.__prev) & ~1ul)	      \
25845f
        - QUEUE_PTR_ADJUST);						      \
25845f
     prev->__next = mutex->__data.__list.__next;				      \
25845f
+    /* Ensure that we remove the entry from the list before we change the     \
25845f
+       __next pointer of the entry, which is read by the kernel.  */	      \
25845f
+    __asm ("" ::: "memory");						      \
25845f
     mutex->__data.__list.__prev = NULL;					      \
25845f
     mutex->__data.__list.__next = NULL;					      \
25845f
   } while (0)
25845f
@@ -220,6 +234,8 @@ struct pthread
25845f
   do {									      \
25845f
     mutex->__data.__list.__next						      \
25845f
       = THREAD_GETMEM (THREAD_SELF, robust_list.__next);		      \
25845f
+    /* Ensure that the new list entry is ready before we insert it.  */	      \
25845f
+    __asm ("" ::: "memory");						      \
25845f
     THREAD_SETMEM (THREAD_SELF, robust_list.__next,			      \
25845f
 		   (void *) (((uintptr_t) &mutex->__data.__list) | val));     \
25845f
   } while (0)
25845f
@@ -240,6 +256,9 @@ struct pthread
25845f
 	  }								      \
25845f
 									      \
25845f
 	runp->__next = next->__next;					      \
25845f
+	/* Ensure that we remove the entry from the list before we change the \
25845f
+	   __next pointer of the entry, which is read by the kernel.  */      \
25845f
+	    __asm ("" ::: "memory");					      \
25845f
 	mutex->__data.__list.__next = NULL;				      \
25845f
       }									      \
25845f
   } while (0)
25845f
Index: glibc-2.17-c758a686/nptl/pthread_mutex_lock.c
25845f
===================================================================
25845f
--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_lock.c
25845f
+++ glibc-2.17-c758a686/nptl/pthread_mutex_lock.c
25845f
@@ -181,6 +181,9 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
25845f
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
 		     &mutex->__data.__list.__next);
25845f
+      /* We need to set op_pending before starting the operation.  Also
25845f
+	 see comments at ENQUEUE_MUTEX.  */
25845f
+      __asm ("" ::: "memory");
25845f
 
25845f
       oldval = mutex->__data.__lock;
25845f
       /* This is set to FUTEX_WAITERS iff we might have shared the
25845f
@@ -228,7 +231,12 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 	      /* But it is inconsistent unless marked otherwise.  */
25845f
 	      mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
25845f
 
25845f
+	      /* We must not enqueue the mutex before we have acquired it.
25845f
+		 Also see comments at ENQUEUE_MUTEX.  */
25845f
+	      __asm ("" ::: "memory");
25845f
 	      ENQUEUE_MUTEX (mutex);
25845f
+	      /* We need to clear op_pending after we enqueue the mutex.  */
25845f
+	      __asm ("" ::: "memory");
25845f
 	      THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 
25845f
 	      /* Note that we deliberately exit here.  If we fall
25845f
@@ -250,6 +258,8 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 	      int kind = PTHREAD_MUTEX_TYPE (mutex);
25845f
 	      if (kind == PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP)
25845f
 		{
25845f
+		  /* We do not need to ensure ordering wrt another memory
25845f
+		     access.  Also see comments at ENQUEUE_MUTEX. */
25845f
 		  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
 				 NULL);
25845f
 		  return EDEADLK;
25845f
@@ -257,6 +267,8 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 
25845f
 	      if (kind == PTHREAD_MUTEX_ROBUST_RECURSIVE_NP)
25845f
 		{
25845f
+		  /* We do not need to ensure ordering wrt another memory
25845f
+		     access.  */
25845f
 		  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
 				 NULL);
25845f
 
25845f
@@ -309,12 +321,19 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 	  mutex->__data.__count = 0;
25845f
 	  int private = PTHREAD_ROBUST_MUTEX_PSHARED (mutex);
25845f
 	  lll_unlock (mutex->__data.__lock, private);
25845f
+	  /* FIXME This violates the mutex destruction requirements.  See
25845f
+	     __pthread_mutex_unlock_full.  */
25845f
 	  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 	  return ENOTRECOVERABLE;
25845f
 	}
25845f
 
25845f
       mutex->__data.__count = 1;
25845f
+      /* We must not enqueue the mutex before we have acquired it.
25845f
+	 Also see comments at ENQUEUE_MUTEX.  */
25845f
+      __asm ("" ::: "memory");
25845f
       ENQUEUE_MUTEX (mutex);
25845f
+      /* We need to clear op_pending after we enqueue the mutex.  */
25845f
+      __asm ("" ::: "memory");
25845f
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
       break;
25845f
 
25845f
@@ -331,10 +350,15 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 	int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
25845f
 
25845f
 	if (robust)
25845f
-	  /* Note: robust PI futexes are signaled by setting bit 0.  */
25845f
-	  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
-			 (void *) (((uintptr_t) &mutex->__data.__list.__next)
25845f
-				   | 1));
25845f
+	  {
25845f
+	    /* Note: robust PI futexes are signaled by setting bit 0.  */
25845f
+	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
+			   (void *) (((uintptr_t) &mutex->__data.__list.__next)
25845f
+				     | 1));
25845f
+	    /* We need to set op_pending before starting the operation.  Also
25845f
+	       see comments at ENQUEUE_MUTEX.  */
25845f
+	    __asm ("" ::: "memory");
25845f
+	  }
25845f
 
25845f
 	oldval = mutex->__data.__lock;
25845f
 
25845f
@@ -343,12 +367,16 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 	  {
25845f
 	    if (kind == PTHREAD_MUTEX_ERRORCHECK_NP)
25845f
 	      {
25845f
+		/* We do not need to ensure ordering wrt another memory
25845f
+		   access.  */
25845f
 		THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 		return EDEADLK;
25845f
 	      }
25845f
 
25845f
 	    if (kind == PTHREAD_MUTEX_RECURSIVE_NP)
25845f
 	      {
25845f
+		/* We do not need to ensure ordering wrt another memory
25845f
+		   access.  */
25845f
 		THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 
25845f
 		/* Just bump the counter.  */
25845f
@@ -411,7 +439,12 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 	    /* But it is inconsistent unless marked otherwise.  */
25845f
 	    mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
25845f
 
25845f
+	    /* We must not enqueue the mutex before we have acquired it.
25845f
+	       Also see comments at ENQUEUE_MUTEX.  */
25845f
+	    __asm ("" ::: "memory");
25845f
 	    ENQUEUE_MUTEX_PI (mutex);
25845f
+	    /* We need to clear op_pending after we enqueue the mutex.  */
25845f
+	    __asm ("" ::: "memory");
25845f
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 
25845f
 	    /* Note that we deliberately exit here.  If we fall
25845f
@@ -439,6 +472,8 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 						  PTHREAD_ROBUST_MUTEX_PSHARED (mutex)),
25845f
 			      0, 0);
25845f
 
25845f
+	    /* To the kernel, this will be visible after the kernel has
25845f
+	       acquired the mutex in the syscall.  */
25845f
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 	    return ENOTRECOVERABLE;
25845f
 	  }
25845f
@@ -446,7 +481,12 @@ __pthread_mutex_lock_full (pthread_mutex
25845f
 	mutex->__data.__count = 1;
25845f
 	if (robust)
25845f
 	  {
25845f
+	    /* We must not enqueue the mutex before we have acquired it.
25845f
+	       Also see comments at ENQUEUE_MUTEX.  */
25845f
+	    __asm ("" ::: "memory");
25845f
 	    ENQUEUE_MUTEX_PI (mutex);
25845f
+	    /* We need to clear op_pending after we enqueue the mutex.  */
25845f
+	    __asm ("" ::: "memory");
25845f
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 	  }
25845f
       }
25845f
Index: glibc-2.17-c758a686/nptl/pthread_mutex_timedlock.c
25845f
===================================================================
25845f
--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_timedlock.c
25845f
+++ glibc-2.17-c758a686/nptl/pthread_mutex_timedlock.c
25845f
@@ -140,6 +140,9 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
25845f
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
 		     &mutex->__data.__list.__next);
25845f
+      /* We need to set op_pending before starting the operation.  Also
25845f
+	 see comments at ENQUEUE_MUTEX.  */
25845f
+      __asm ("" ::: "memory");
25845f
 
25845f
       oldval = mutex->__data.__lock;
25845f
       /* This is set to FUTEX_WAITERS iff we might have shared the
25845f
@@ -177,7 +180,12 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 	      /* But it is inconsistent unless marked otherwise.  */
25845f
 	      mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
25845f
 
25845f
+	      /* We must not enqueue the mutex before we have acquired it.
25845f
+		 Also see comments at ENQUEUE_MUTEX.  */
25845f
+	      __asm ("" ::: "memory");
25845f
 	      ENQUEUE_MUTEX (mutex);
25845f
+	      /* We need to clear op_pending after we enqueue the mutex.  */
25845f
+	      __asm ("" ::: "memory");
25845f
 	      THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 
25845f
 	      /* Note that we deliberately exit here.  If we fall
25845f
@@ -193,6 +201,8 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 	      int kind = PTHREAD_MUTEX_TYPE (mutex);
25845f
 	      if (kind == PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP)
25845f
 		{
25845f
+		  /* We do not need to ensure ordering wrt another memory
25845f
+		     access.  Also see comments at ENQUEUE_MUTEX. */
25845f
 		  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
 				 NULL);
25845f
 		  return EDEADLK;
25845f
@@ -200,6 +210,8 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 
25845f
 	      if (kind == PTHREAD_MUTEX_ROBUST_RECURSIVE_NP)
25845f
 		{
25845f
+		  /* We do not need to ensure ordering wrt another memory
25845f
+		     access.  */
25845f
 		  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
 				 NULL);
25845f
 
25845f
@@ -294,12 +306,19 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 	  mutex->__data.__count = 0;
25845f
 	  int private = PTHREAD_ROBUST_MUTEX_PSHARED (mutex);
25845f
 	  lll_unlock (mutex->__data.__lock, private);
25845f
+	  /* FIXME This violates the mutex destruction requirements.  See
25845f
+	     __pthread_mutex_unlock_full.  */
25845f
 	  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 	  return ENOTRECOVERABLE;
25845f
 	}
25845f
 
25845f
       mutex->__data.__count = 1;
25845f
+      /* We must not enqueue the mutex before we have acquired it.
25845f
+	 Also see comments at ENQUEUE_MUTEX.  */
25845f
+      __asm ("" ::: "memory");
25845f
       ENQUEUE_MUTEX (mutex);
25845f
+      /* We need to clear op_pending after we enqueue the mutex.  */
25845f
+      __asm ("" ::: "memory");
25845f
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
       break;
25845f
 
25845f
@@ -316,10 +335,15 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 	int robust = mutex->__data.__kind & PTHREAD_MUTEX_ROBUST_NORMAL_NP;
25845f
 
25845f
 	if (robust)
25845f
-	  /* Note: robust PI futexes are signaled by setting bit 0.  */
25845f
-	  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
-			 (void *) (((uintptr_t) &mutex->__data.__list.__next)
25845f
-				   | 1));
25845f
+	  {
25845f
+	    /* Note: robust PI futexes are signaled by setting bit 0.  */
25845f
+	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
+			   (void *) (((uintptr_t) &mutex->__data.__list.__next)
25845f
+				     | 1));
25845f
+	    /* We need to set op_pending before starting the operation.  Also
25845f
+	       see comments at ENQUEUE_MUTEX.  */
25845f
+	    __asm ("" ::: "memory");
25845f
+	  }
25845f
 
25845f
 	oldval = mutex->__data.__lock;
25845f
 
25845f
@@ -328,12 +352,16 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 	  {
25845f
 	    if (kind == PTHREAD_MUTEX_ERRORCHECK_NP)
25845f
 	      {
25845f
+		/* We do not need to ensure ordering wrt another memory
25845f
+		   access.  */
25845f
 		THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 		return EDEADLK;
25845f
 	      }
25845f
 
25845f
 	    if (kind == PTHREAD_MUTEX_RECURSIVE_NP)
25845f
 	      {
25845f
+		/* We do not need to ensure ordering wrt another memory
25845f
+		   access.  */
25845f
 		THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 
25845f
 		/* Just bump the counter.  */
25845f
@@ -420,7 +448,12 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 	    /* But it is inconsistent unless marked otherwise.  */
25845f
 	    mutex->__data.__owner = PTHREAD_MUTEX_INCONSISTENT;
25845f
 
25845f
+	    /* We must not enqueue the mutex before we have acquired it.
25845f
+	       Also see comments at ENQUEUE_MUTEX.  */
25845f
+	    __asm ("" ::: "memory");
25845f
 	    ENQUEUE_MUTEX_PI (mutex);
25845f
+	    /* We need to clear op_pending after we enqueue the mutex.  */
25845f
+	    __asm ("" ::: "memory");
25845f
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 
25845f
 	    /* Note that we deliberately exit here.  If we fall
25845f
@@ -443,6 +476,8 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 						  PTHREAD_ROBUST_MUTEX_PSHARED (mutex)),
25845f
 			      0, 0);
25845f
 
25845f
+	    /* To the kernel, this will be visible after the kernel has
25845f
+	       acquired the mutex in the syscall.  */
25845f
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 	    return ENOTRECOVERABLE;
25845f
 	  }
25845f
@@ -450,7 +485,12 @@ pthread_mutex_timedlock (pthread_mutex_t
25845f
 	mutex->__data.__count = 1;
25845f
 	if (robust)
25845f
 	  {
25845f
+	    /* We must not enqueue the mutex before we have acquired it.
25845f
+	       Also see comments at ENQUEUE_MUTEX.  */
25845f
+	    __asm ("" ::: "memory");
25845f
 	    ENQUEUE_MUTEX_PI (mutex);
25845f
+	    /* We need to clear op_pending after we enqueue the mutex.  */
25845f
+	    __asm ("" ::: "memory");
25845f
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
 	  }
25845f
 	}
25845f
Index: glibc-2.17-c758a686/nptl/pthread_mutex_unlock.c
25845f
===================================================================
25845f
--- glibc-2.17-c758a686.orig/nptl/pthread_mutex_unlock.c
25845f
+++ glibc-2.17-c758a686/nptl/pthread_mutex_unlock.c
25845f
@@ -143,6 +143,9 @@ __pthread_mutex_unlock_full (pthread_mut
25845f
       /* Remove mutex from the list.  */
25845f
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
 		     &mutex->__data.__list.__next);
25845f
+      /* We must set op_pending before we dequeue the mutex.  Also see
25845f
+	 comments at ENQUEUE_MUTEX.  */
25845f
+      __asm ("" ::: "memory");
25845f
       DEQUEUE_MUTEX (mutex);
25845f
 
25845f
       mutex->__data.__owner = newowner;
25845f
@@ -159,6 +162,14 @@ __pthread_mutex_unlock_full (pthread_mut
25845f
 			     & FUTEX_WAITERS) != 0))
25845f
 	lll_futex_wake (&mutex->__data.__lock, 1, private);
25845f
 
25845f
+      /* We must clear op_pending after we release the mutex.
25845f
+	 FIXME However, this violates the mutex destruction requirements
25845f
+	 because another thread could acquire the mutex, destroy it, and
25845f
+	 reuse the memory for something else; then, if this thread crashes,
25845f
+	 and the memory happens to have a value equal to the TID, the kernel
25845f
+	 will believe it is still related to the mutex (which has been
25845f
+	 destroyed already) and will modify some other random object.  */
25845f
+      __asm ("" ::: "memory");
25845f
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
       break;
25845f
 
25845f
@@ -223,6 +234,9 @@ __pthread_mutex_unlock_full (pthread_mut
25845f
 	  THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
25845f
 			 (void *) (((uintptr_t) &mutex->__data.__list.__next)
25845f
 				   | 1));
25845f
+	  /* We must set op_pending before we dequeue the mutex.  Also see
25845f
+	     comments at ENQUEUE_MUTEX.  */
25845f
+	  __asm ("" ::: "memory");
25845f
 	  DEQUEUE_MUTEX (mutex);
25845f
 	}
25845f
 
25845f
@@ -247,6 +261,9 @@ __pthread_mutex_unlock_full (pthread_mut
25845f
 			    __lll_private_flag (FUTEX_UNLOCK_PI, private));
25845f
 	}
25845f
 
25845f
+      /* This happens after the kernel releases the mutex but violates the
25845f
+	 mutex destruction requirements; see comments in the code handling
25845f
+	 PTHREAD_MUTEX_ROBUST_NORMAL_NP.  */
25845f
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending, NULL);
25845f
       break;
25845f