93dc2d
commit e3e589829d16af9f7e73c7b70f74f3c5d5003e45
93dc2d
Author: Florian Weimer <fweimer@redhat.com>
93dc2d
Date:   Thu Dec 9 09:49:32 2021 +0100
93dc2d
93dc2d
    nptl: Add glibc.pthread.rseq tunable to control rseq registration
93dc2d
    
93dc2d
    This tunable allows applications to register the rseq area instead
93dc2d
    of glibc.
93dc2d
    
93dc2d
    Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
93dc2d
    Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
93dc2d
93dc2d
diff --git a/manual/tunables.texi b/manual/tunables.texi
93dc2d
index 658547c6137bf177..1f5c410288eeecec 100644
93dc2d
--- a/manual/tunables.texi
93dc2d
+++ b/manual/tunables.texi
93dc2d
@@ -413,6 +413,16 @@ The value is measured in bytes.  The default is @samp{41943040}
93dc2d
 (fourty mibibytes).
93dc2d
 @end deftp
93dc2d
 
93dc2d
+@deftp Tunable glibc.pthread.rseq
93dc2d
+The @code{glibc.pthread.rseq} tunable can be set to @samp{0}, to disable
93dc2d
+restartable sequences support in @theglibc{}.  This enables applications
93dc2d
+to perform direct restartable sequence registration with the kernel.
93dc2d
+The default is @samp{1}, which means that @theglibc{} performs
93dc2d
+registration on behalf of the application.
93dc2d
+
93dc2d
+Restartable sequences are a Linux-specific extension.
93dc2d
+@end deftp
93dc2d
+
93dc2d
 @node Hardware Capability Tunables
93dc2d
 @section Hardware Capability Tunables
93dc2d
 @cindex hardware capability tunables
93dc2d
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
93dc2d
index d2b40924dafad316..f405fa356c2955ce 100644
93dc2d
--- a/nptl/pthread_create.c
93dc2d
+++ b/nptl/pthread_create.c
93dc2d
@@ -369,7 +369,10 @@ start_thread (void *arg)
93dc2d
   __ctype_init ();
93dc2d
 
93dc2d
   /* Register rseq TLS to the kernel.  */
93dc2d
-  rseq_register_current_thread (pd);
93dc2d
+  {
93dc2d
+    bool do_rseq = THREAD_GETMEM (pd, flags) & ATTR_FLAG_DO_RSEQ;
93dc2d
+    rseq_register_current_thread (pd, do_rseq);
93dc2d
+  }
93dc2d
 
93dc2d
 #ifndef __ASSUME_SET_ROBUST_LIST
93dc2d
   if (__nptl_set_robust_list_avail)
93dc2d
@@ -678,6 +681,11 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
93dc2d
   pd->flags = ((iattr->flags & ~(ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET))
93dc2d
 	       | (self->flags & (ATTR_FLAG_SCHED_SET | ATTR_FLAG_POLICY_SET)));
93dc2d
 
93dc2d
+  /* Inherit rseq registration state.  Without seccomp filters, rseq
93dc2d
+     registration will either always fail or always succeed.  */
93dc2d
+  if ((int) THREAD_GETMEM_VOLATILE (self, rseq_area.cpu_id) >= 0)
93dc2d
+    pd->flags |= ATTR_FLAG_DO_RSEQ;
93dc2d
+
93dc2d
   /* Initialize the field for the ID of the thread which is waiting
93dc2d
      for us.  This is a self-reference in case the thread is created
93dc2d
      detached.  */
93dc2d
diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
93dc2d
index fedb876fdb2642d2..b39dfbff2c6678d5 100644
93dc2d
--- a/sysdeps/nptl/dl-tls_init_tp.c
93dc2d
+++ b/sysdeps/nptl/dl-tls_init_tp.c
93dc2d
@@ -23,6 +23,9 @@
93dc2d
 #include <tls.h>
93dc2d
 #include <rseq-internal.h>
93dc2d
 
93dc2d
+#define TUNABLE_NAMESPACE pthread
93dc2d
+#include <dl-tunables.h>
93dc2d
+
93dc2d
 #ifndef __ASSUME_SET_ROBUST_LIST
93dc2d
 bool __nptl_set_robust_list_avail;
93dc2d
 rtld_hidden_data_def (__nptl_set_robust_list_avail)
93dc2d
@@ -92,7 +95,13 @@ __tls_init_tp (void)
93dc2d
       }
93dc2d
   }
93dc2d
 
93dc2d
-  rseq_register_current_thread (pd);
93dc2d
+  {
93dc2d
+    bool do_rseq = true;
93dc2d
+#if HAVE_TUNABLES
93dc2d
+    do_rseq = TUNABLE_GET (rseq, int, NULL);
93dc2d
+#endif
93dc2d
+    rseq_register_current_thread (pd, do_rseq);
93dc2d
+  }
93dc2d
 
93dc2d
   /* Set initial thread's stack block from 0 up to __libc_stack_end.
93dc2d
      It will be bigger than it actually is, but for unwind.c/pt-longjmp.c
93dc2d
diff --git a/sysdeps/nptl/dl-tunables.list b/sysdeps/nptl/dl-tunables.list
93dc2d
index ac5d053298725468..d24f4be0d08ba407 100644
93dc2d
--- a/sysdeps/nptl/dl-tunables.list
93dc2d
+++ b/sysdeps/nptl/dl-tunables.list
93dc2d
@@ -27,5 +27,11 @@ glibc {
93dc2d
       type: SIZE_T
93dc2d
       default: 41943040
93dc2d
     }
93dc2d
+    rseq {
93dc2d
+      type: INT_32
93dc2d
+      minval: 0
93dc2d
+      maxval: 1
93dc2d
+      default: 1
93dc2d
+    }
93dc2d
   }
93dc2d
 }
93dc2d
diff --git a/sysdeps/nptl/internaltypes.h b/sysdeps/nptl/internaltypes.h
93dc2d
index 50a2ad19ae7210ae..8205c6d15a918952 100644
93dc2d
--- a/sysdeps/nptl/internaltypes.h
93dc2d
+++ b/sysdeps/nptl/internaltypes.h
93dc2d
@@ -49,6 +49,7 @@ struct pthread_attr
93dc2d
 #define ATTR_FLAG_OLDATTR		0x0010
93dc2d
 #define ATTR_FLAG_SCHED_SET		0x0020
93dc2d
 #define ATTR_FLAG_POLICY_SET		0x0040
93dc2d
+#define ATTR_FLAG_DO_RSEQ		0x0080
93dc2d
 
93dc2d
 /* Used to allocate a pthread_attr_t object which is also accessed
93dc2d
    internally.  */
93dc2d
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
93dc2d
index f84ccd6bbb3b16ad..d30d21898b402d1e 100644
93dc2d
--- a/sysdeps/unix/sysv/linux/Makefile
93dc2d
+++ b/sysdeps/unix/sysv/linux/Makefile
93dc2d
@@ -135,6 +135,12 @@ tests-internal += \
93dc2d
   tst-sigcontext-get_pc \
93dc2d
   # tests-internal
93dc2d
 
93dc2d
+ifneq (no,$(have-tunables))
93dc2d
+tests-internal += \
93dc2d
+  tst-rseq-disable \
93dc2d
+  # tests-internal $(have-tunables)
93dc2d
+endif
93dc2d
+
93dc2d
 tests-time64 += \
93dc2d
   tst-adjtimex-time64 \
93dc2d
   tst-clock_adjtime-time64 \
93dc2d
@@ -226,6 +232,8 @@ $(objpfx)tst-mman-consts.out: ../sysdeps/unix/sysv/linux/tst-mman-consts.py
93dc2d
 	  < /dev/null > $@ 2>&1; $(evaluate-test)
93dc2d
 $(objpfx)tst-mman-consts.out: $(sysdeps-linux-python-deps)
93dc2d
 
93dc2d
+tst-rseq-disable-ENV = GLIBC_TUNABLES=glibc.pthread.rseq=0
93dc2d
+
93dc2d
 endif # $(subdir) == misc
93dc2d
 
93dc2d
 ifeq ($(subdir),time)
93dc2d
diff --git a/sysdeps/unix/sysv/linux/rseq-internal.h b/sysdeps/unix/sysv/linux/rseq-internal.h
93dc2d
index 909f5478251d3d13..15bc7ffd6eda632d 100644
93dc2d
--- a/sysdeps/unix/sysv/linux/rseq-internal.h
93dc2d
+++ b/sysdeps/unix/sysv/linux/rseq-internal.h
93dc2d
@@ -21,22 +21,27 @@
93dc2d
 #include <sysdep.h>
93dc2d
 #include <errno.h>
93dc2d
 #include <kernel-features.h>
93dc2d
+#include <stdbool.h>
93dc2d
 #include <stdio.h>
93dc2d
 #include <sys/rseq.h>
93dc2d
 
93dc2d
 #ifdef RSEQ_SIG
93dc2d
 static inline void
93dc2d
-rseq_register_current_thread (struct pthread *self)
93dc2d
+rseq_register_current_thread (struct pthread *self, bool do_rseq)
93dc2d
 {
93dc2d
-  int ret = INTERNAL_SYSCALL_CALL (rseq,
93dc2d
-                                   &self->rseq_area, sizeof (self->rseq_area),
93dc2d
-                                   0, RSEQ_SIG);
93dc2d
-  if (INTERNAL_SYSCALL_ERROR_P (ret))
93dc2d
-    THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
93dc2d
+  if (do_rseq)
93dc2d
+    {
93dc2d
+      int ret = INTERNAL_SYSCALL_CALL (rseq, &self->rseq_area,
93dc2d
+                                       sizeof (self->rseq_area),
93dc2d
+                                       0, RSEQ_SIG);
93dc2d
+      if (!INTERNAL_SYSCALL_ERROR_P (ret))
93dc2d
+        return;
93dc2d
+    }
93dc2d
+  THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
93dc2d
 }
93dc2d
 #else /* RSEQ_SIG */
93dc2d
 static inline void
93dc2d
-rseq_register_current_thread (struct pthread *self)
93dc2d
+rseq_register_current_thread (struct pthread *self, bool do_rseq)
93dc2d
 {
93dc2d
   THREAD_SETMEM (self, rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
93dc2d
 }
93dc2d
diff --git a/sysdeps/unix/sysv/linux/tst-rseq-disable.c b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
93dc2d
new file mode 100644
93dc2d
index 0000000000000000..000e351872fc2f76
93dc2d
--- /dev/null
93dc2d
+++ b/sysdeps/unix/sysv/linux/tst-rseq-disable.c
93dc2d
@@ -0,0 +1,89 @@
93dc2d
+/* Test disabling of rseq registration via tunable.
93dc2d
+   Copyright (C) 2021 Free Software Foundation, Inc.
93dc2d
+
93dc2d
+   The GNU C Library is free software; you can redistribute it and/or
93dc2d
+   modify it under the terms of the GNU Lesser General Public
93dc2d
+   License as published by the Free Software Foundation; either
93dc2d
+   version 2.1 of the License, or (at your option) any later version.
93dc2d
+
93dc2d
+   The GNU C Library is distributed in the hope that it will be useful,
93dc2d
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
93dc2d
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
93dc2d
+   Lesser General Public License for more details.
93dc2d
+
93dc2d
+   You should have received a copy of the GNU Lesser General Public
93dc2d
+   License along with the GNU C Library; if not, see
93dc2d
+   <https://www.gnu.org/licenses/>.  */
93dc2d
+
93dc2d
+#include <errno.h>
93dc2d
+#include <stdio.h>
93dc2d
+#include <support/check.h>
93dc2d
+#include <support/namespace.h>
93dc2d
+#include <support/xthread.h>
93dc2d
+#include <sysdep.h>
93dc2d
+#include <unistd.h>
93dc2d
+
93dc2d
+#ifdef RSEQ_SIG
93dc2d
+
93dc2d
+/* Check that rseq can be registered and has not been taken by glibc.  */
93dc2d
+static void
93dc2d
+check_rseq_disabled (void)
93dc2d
+{
93dc2d
+  struct pthread *pd = THREAD_SELF;
93dc2d
+  TEST_COMPARE ((int) pd->rseq_area.cpu_id, RSEQ_CPU_ID_REGISTRATION_FAILED);
93dc2d
+
93dc2d
+  int ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area),
93dc2d
+                     0, RSEQ_SIG);
93dc2d
+  if (ret == 0)
93dc2d
+    {
93dc2d
+      ret = syscall (__NR_rseq, &pd->rseq_area, sizeof (pd->rseq_area),
93dc2d
+                     RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
93dc2d
+      TEST_COMPARE (ret, 0);
93dc2d
+      pd->rseq_area.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
93dc2d
+    }
93dc2d
+  else
93dc2d
+    {
93dc2d
+      TEST_VERIFY (errno != -EINVAL);
93dc2d
+      TEST_VERIFY (errno != -EBUSY);
93dc2d
+    }
93dc2d
+}
93dc2d
+
93dc2d
+static void *
93dc2d
+thread_func (void *ignored)
93dc2d
+{
93dc2d
+  check_rseq_disabled ();
93dc2d
+  return NULL;
93dc2d
+}
93dc2d
+
93dc2d
+static void
93dc2d
+proc_func (void *ignored)
93dc2d
+{
93dc2d
+  check_rseq_disabled ();
93dc2d
+}
93dc2d
+
93dc2d
+static int
93dc2d
+do_test (void)
93dc2d
+{
93dc2d
+  puts ("info: checking main thread");
93dc2d
+  check_rseq_disabled ();
93dc2d
+
93dc2d
+  puts ("info: checking main thread (2)");
93dc2d
+  check_rseq_disabled ();
93dc2d
+
93dc2d
+  puts ("info: checking new thread");
93dc2d
+  xpthread_join (xpthread_create (NULL, thread_func, NULL));
93dc2d
+
93dc2d
+  puts ("info: checking subprocess");
93dc2d
+  support_isolate_in_subprocess (proc_func, NULL);
93dc2d
+
93dc2d
+  return 0;
93dc2d
+}
93dc2d
+#else /* !RSEQ_SIG */
93dc2d
+static int
93dc2d
+do_test (void)
93dc2d
+{
93dc2d
+  FAIL_UNSUPPORTED ("glibc does not define RSEQ_SIG, skipping test");
93dc2d
+}
93dc2d
+#endif
93dc2d
+
93dc2d
+#include <support/test-driver.c>