| commit a8ac8c4725ddb1119764126a8674a04c9dd5aea8 |
| Author: Florian Weimer <fweimer@redhat.com> |
| Date: Mon Sep 13 11:06:08 2021 +0200 |
| |
| nptl: Fix race between pthread_kill and thread exit (bug 12889) |
| |
| A new thread exit lock and flag are introduced. They are used to |
| detect that the thread is about to exit or has exited in |
| __pthread_kill_internal, and the signal is not sent in this case. |
| |
| The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived |
| from a downstream test originally written by Marek Polacek. |
| |
| Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> |
| (cherry picked from commit 526c3cf11ee9367344b6b15d669e4c3cb461a2be) |
| |
| diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c |
| index cfe37a3443b69454..50065bc9bd8a28e5 100644 |
| |
| |
| @@ -32,6 +32,7 @@ |
| #include <futex-internal.h> |
| #include <kernel-features.h> |
| #include <nptl-stack.h> |
| +#include <libc-lock.h> |
| |
| /* Default alignment of stack. */ |
| #ifndef STACK_ALIGN |
| @@ -127,6 +128,8 @@ get_cached_stack (size_t *sizep, void **memp) |
| /* No pending event. */ |
| result->nextevent = NULL; |
| |
| + result->exiting = false; |
| + __libc_lock_init (result->exit_lock); |
| result->tls_state = (struct tls_internal_t) { 0 }; |
| |
| /* Clear the DTV. */ |
| diff --git a/nptl/descr.h b/nptl/descr.h |
| index c85778d44941a42f..4de84138fb960fa4 100644 |
| |
| |
| @@ -396,6 +396,12 @@ struct pthread |
| PTHREAD_CANCEL_ASYNCHRONOUS). */ |
| unsigned char canceltype; |
| |
| + /* Used in __pthread_kill_internal to detected a thread that has |
| + exited or is about to exit. exit_lock must only be acquired |
| + after blocking signals. */ |
| + bool exiting; |
| + int exit_lock; /* A low-level lock (for use with __libc_lock_init etc). */ |
| + |
| /* Used on strsignal. */ |
| struct tls_internal_t tls_state; |
| |
| diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c |
| index d8ec299cb1661e82..33b426fc682300dc 100644 |
| |
| |
| @@ -37,6 +37,7 @@ |
| #include <sys/single_threaded.h> |
| #include <version.h> |
| #include <clone_internal.h> |
| +#include <futex-internal.h> |
| |
| #include <shlib-compat.h> |
| |
| @@ -485,6 +486,19 @@ start_thread (void *arg) |
| /* This was the last thread. */ |
| exit (0); |
| |
| + /* This prevents sending a signal from this thread to itself during |
| + its final stages. This must come after the exit call above |
| + because atexit handlers must not run with signals blocked. */ |
| + __libc_signal_block_all (NULL); |
| + |
| + /* Tell __pthread_kill_internal that this thread is about to exit. |
| + If there is a __pthread_kill_internal in progress, this delays |
| + the thread exit until the signal has been queued by the kernel |
| + (so that the TID used to send it remains valid). */ |
| + __libc_lock_lock (pd->exit_lock); |
| + pd->exiting = true; |
| + __libc_lock_unlock (pd->exit_lock); |
| + |
| #ifndef __ASSUME_SET_ROBUST_LIST |
| /* If this thread has any robust mutexes locked, handle them now. */ |
| # if __PTHREAD_MUTEX_HAVE_PREV |
| diff --git a/nptl/pthread_kill.c b/nptl/pthread_kill.c |
| index 5d4c86f9205a6fb5..fb7862eff787a94f 100644 |
| |
| |
| @@ -16,6 +16,7 @@ |
| License along with the GNU C Library; if not, see |
| <https://www.gnu.org/licenses/>. */ |
| |
| +#include <libc-lock.h> |
| #include <unistd.h> |
| #include <pthreadP.h> |
| #include <shlib-compat.h> |
| @@ -23,37 +24,51 @@ |
| int |
| __pthread_kill_internal (pthread_t threadid, int signo) |
| { |
| - pid_t tid; |
| struct pthread *pd = (struct pthread *) threadid; |
| - |
| if (pd == THREAD_SELF) |
| - /* It is a special case to handle raise() implementation after a vfork |
| - call (which does not update the PD tid field). */ |
| - tid = INLINE_SYSCALL_CALL (gettid); |
| - else |
| - /* Force load of pd->tid into local variable or register. Otherwise |
| - if a thread exits between ESRCH test and tgkill, we might return |
| - EINVAL, because pd->tid would be cleared by the kernel. */ |
| - tid = atomic_forced_read (pd->tid); |
| - |
| - int val; |
| - if (__glibc_likely (tid > 0)) |
| { |
| - pid_t pid = __getpid (); |
| - |
| - val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo); |
| - val = (INTERNAL_SYSCALL_ERROR_P (val) |
| - ? INTERNAL_SYSCALL_ERRNO (val) : 0); |
| + /* Use the actual TID from the kernel, so that it refers to the |
| + current thread even if called after vfork. There is no |
| + signal blocking in this case, so that the signal is delivered |
| + immediately, before __pthread_kill_internal returns: a signal |
| + sent to the thread itself needs to be delivered |
| + synchronously. (It is unclear if Linux guarantees the |
| + delivery of all pending signals after unblocking in the code |
| + below. POSIX only guarantees delivery of a single signal, |
| + which may not be the right one.) */ |
| + pid_t tid = INTERNAL_SYSCALL_CALL (gettid); |
| + int ret = INTERNAL_SYSCALL_CALL (kill, tid, signo); |
| + return INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO (ret) : 0; |
| } |
| + |
| + /* Block all signals, as required by pd->exit_lock. */ |
| + sigset_t old_mask; |
| + __libc_signal_block_all (&old_mask); |
| + __libc_lock_lock (pd->exit_lock); |
| + |
| + int ret; |
| + if (pd->exiting) |
| + /* The thread is about to exit (or has exited). Sending the |
| + signal is either not observable (the target thread has already |
| + blocked signals at this point), or it will fail, or it might be |
| + delivered to a new, unrelated thread that has reused the TID. |
| + So do not actually send the signal. Do not report an error |
| + because the threadid argument is still valid (the thread ID |
| + lifetime has not ended), and ESRCH (for example) would be |
| + misleading. */ |
| + ret = 0; |
| else |
| - /* The kernel reports that the thread has exited. POSIX specifies |
| - the ESRCH error only for the case when the lifetime of a thread |
| - ID has ended, but calling pthread_kill on such a thread ID is |
| - undefined in glibc. Therefore, do not treat kernel thread exit |
| - as an error. */ |
| - val = 0; |
| + { |
| + /* Using tgkill is a safety measure. pd->exit_lock ensures that |
| + the target thread cannot exit. */ |
| + ret = INTERNAL_SYSCALL_CALL (tgkill, __getpid (), pd->tid, signo); |
| + ret = INTERNAL_SYSCALL_ERROR_P (ret) ? INTERNAL_SYSCALL_ERRNO (ret) : 0; |
| + } |
| + |
| + __libc_lock_unlock (pd->exit_lock); |
| + __libc_signal_restore_set (&old_mask); |
| |
| - return val; |
| + return ret; |
| } |
| |
| int |
| diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile |
| index dedfa0d290da4949..48dba717a1cdc20a 100644 |
| |
| |
| @@ -119,7 +119,9 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \ |
| tst-unwind-thread \ |
| tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \ |
| tst-pthread_cancel-exited \ |
| + tst-pthread_cancel-select-loop \ |
| tst-pthread_kill-exited \ |
| + tst-pthread_kill-exiting \ |
| # tests |
| |
| tests-time64 := \ |
| diff --git a/sysdeps/pthread/tst-pthread_cancel-select-loop.c b/sysdeps/pthread/tst-pthread_cancel-select-loop.c |
| new file mode 100644 |
| index 0000000000000000..a62087589cee24b5 |
| |
| |
| @@ -0,0 +1,87 @@ |
| +/* Test that pthread_cancel succeeds during thread exit. |
| + Copyright (C) 2021 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <https://www.gnu.org/licenses/>. */ |
| + |
| +/* This test tries to trigger an internal race condition in |
| + pthread_cancel, where the cancellation signal is sent after the |
| + thread has begun the cancellation process. This can result in a |
| + spurious ESRCH error. For the original bug 12889, the window is |
| + quite small, so the bug was not reproduced in every run. */ |
| + |
| +#include <stdbool.h> |
| +#include <stddef.h> |
| +#include <support/check.h> |
| +#include <support/xthread.h> |
| +#include <support/xunistd.h> |
| +#include <sys/select.h> |
| +#include <unistd.h> |
| + |
| +/* Set to true by timeout_thread_function when the test should |
| + terminate. */ |
| +static bool timeout; |
| + |
| +static void * |
| +timeout_thread_function (void *unused) |
| +{ |
| + usleep (5 * 1000 * 1000); |
| + __atomic_store_n (&timeout, true, __ATOMIC_RELAXED); |
| + return NULL; |
| +} |
| + |
| +/* Used for blocking the select function below. */ |
| +static int pipe_fds[2]; |
| + |
| +static void * |
| +canceled_thread_function (void *unused) |
| +{ |
| + while (true) |
| + { |
| + fd_set rfs; |
| + fd_set wfs; |
| + fd_set efs; |
| + FD_ZERO (&rfs); |
| + FD_ZERO (&wfs); |
| + FD_ZERO (&efs); |
| + FD_SET (pipe_fds[0], &rfs); |
| + |
| + /* If the cancellation request is recognized early, the thread |
| + begins exiting while the cancellation signal arrives. */ |
| + select (FD_SETSIZE, &rfs, &wfs, &efs, NULL); |
| + } |
| + return NULL; |
| +} |
| + |
| +static int |
| +do_test (void) |
| +{ |
| + xpipe (pipe_fds); |
| + pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL); |
| + |
| + while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED)) |
| + { |
| + pthread_t thr = xpthread_create (NULL, canceled_thread_function, NULL); |
| + xpthread_cancel (thr); |
| + TEST_VERIFY (xpthread_join (thr) == PTHREAD_CANCELED); |
| + } |
| + |
| + xpthread_join (thr_timeout); |
| + xclose (pipe_fds[0]); |
| + xclose (pipe_fds[1]); |
| + return 0; |
| +} |
| + |
| +#include <support/test-driver.c> |
| diff --git a/sysdeps/pthread/tst-pthread_kill-exiting.c b/sysdeps/pthread/tst-pthread_kill-exiting.c |
| new file mode 100644 |
| index 0000000000000000..f803e94f1195f204 |
| |
| |
| @@ -0,0 +1,123 @@ |
| +/* Test that pthread_kill succeeds during thread exit. |
| + Copyright (C) 2021 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <https://www.gnu.org/licenses/>. */ |
| + |
| +/* This test verifies that pthread_kill for a thread that is exiting |
| + succeeds (with or without actually delivering the signal). */ |
| + |
| +#include <array_length.h> |
| +#include <stdbool.h> |
| +#include <stddef.h> |
| +#include <support/xsignal.h> |
| +#include <support/xthread.h> |
| +#include <unistd.h> |
| + |
| +/* Set to true by timeout_thread_function when the test should |
| + terminate. */ |
| +static bool timeout; |
| + |
| +static void * |
| +timeout_thread_function (void *unused) |
| +{ |
| + usleep (1000 * 1000); |
| + __atomic_store_n (&timeout, true, __ATOMIC_RELAXED); |
| + return NULL; |
| +} |
| + |
| +/* Used to synchronize the sending threads with the target thread and |
| + main thread. */ |
| +static pthread_barrier_t barrier_1; |
| +static pthread_barrier_t barrier_2; |
| + |
| +/* The target thread to which signals are to be sent. */ |
| +static pthread_t target_thread; |
| + |
| +/* Set by the main thread to true after timeout has been set to |
| + true. */ |
| +static bool exiting; |
| + |
| +static void * |
| +sender_thread_function (void *unused) |
| +{ |
| + while (true) |
| + { |
| + /* Wait until target_thread has been initialized. The target |
| + thread and main thread participate in this barrier. */ |
| + xpthread_barrier_wait (&barrier_1); |
| + |
| + if (exiting) |
| + break; |
| + |
| + xpthread_kill (target_thread, SIGUSR1); |
| + |
| + /* Communicate that the signal has been sent. The main thread |
| + participates in this barrier. */ |
| + xpthread_barrier_wait (&barrier_2); |
| + } |
| + return NULL; |
| +} |
| + |
| +static void * |
| +target_thread_function (void *unused) |
| +{ |
| + target_thread = pthread_self (); |
| + xpthread_barrier_wait (&barrier_1); |
| + return NULL; |
| +} |
| + |
| +static int |
| +do_test (void) |
| +{ |
| + xsignal (SIGUSR1, SIG_IGN); |
| + |
| + pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL); |
| + |
| + pthread_t threads[4]; |
| + xpthread_barrier_init (&barrier_1, NULL, array_length (threads) + 2); |
| + xpthread_barrier_init (&barrier_2, NULL, array_length (threads) + 1); |
| + |
| + for (int i = 0; i < array_length (threads); ++i) |
| + threads[i] = xpthread_create (NULL, sender_thread_function, NULL); |
| + |
| + while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED)) |
| + { |
| + xpthread_create (NULL, target_thread_function, NULL); |
| + |
| + /* Wait for the target thread to be set up and signal sending to |
| + start. */ |
| + xpthread_barrier_wait (&barrier_1); |
| + |
| + /* Wait for signal sending to complete. */ |
| + xpthread_barrier_wait (&barrier_2); |
| + |
| + xpthread_join (target_thread); |
| + } |
| + |
| + exiting = true; |
| + |
| + /* Signal the sending threads to exit. */ |
| + xpthread_create (NULL, target_thread_function, NULL); |
| + xpthread_barrier_wait (&barrier_1); |
| + |
| + for (int i = 0; i < array_length (threads); ++i) |
| + xpthread_join (threads[i]); |
| + xpthread_join (thr_timeout); |
| + |
| + return 0; |
| +} |
| + |
| +#include <support/test-driver.c> |