Blob Blame History Raw
commit 8d1ae0139eed2a1c2d118e6d3ea7facb52aab40c
Author: David Smith <dsmith@redhat.com>
Date: Mon Jan 15 10:23:45 2018 -0600
Fix BZ1525651 by making the task_finder shutdown more robust.
* runtime/linux/task_finder.c (__stap_utrace_detach): New function, based
on stap_utrace_detach(), but lets the caller know if any the task had
the utrace engine attached to it.
(stap_utrace_detach): Just call __stap_utrace_detach().
(stap_utrace_detach_ops): Perform the detaching in a loop, so if the
process list snapshot we operate on becomes out of date, we'll still
detach everything.
(__stp_utrace_task_finder_report_clone): Move the
__stp_tf_handler_start() call to the very start of the function.
(__stp_utrace_task_finder_report_exec): Ditto.
(__stp_utrace_task_finder_target_exec): Ditto.
(__stp_utrace_task_finder_target_exit): Ditto.
(__stp_utrace_task_finder_target_quiesce): Ditto.
(__stp_utrace_task_finder_target_syscall_entry): Ditto.
(__stp_utrace_task_finder_target_syscall_exit): Ditto.
diff --git a/runtime/linux/task_finder.c b/runtime/linux/task_finder.c
index 69c27fe..3346f2e 100644
--- a/runtime/linux/task_finder.c
+++ b/runtime/linux/task_finder.c
@@ -283,13 +283,17 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
}
static int
-stap_utrace_detach(struct task_struct *tsk,
- const struct utrace_engine_ops *ops)
+__stap_utrace_detach(struct task_struct *tsk,
+ const struct utrace_engine_ops *ops,
+ int *ops_matched)
{
struct utrace_engine *engine;
struct mm_struct *mm;
int rc = 0;
+ if (ops_matched)
+ *ops_matched = 0;
+
// Ignore invalid tasks.
if (tsk == NULL || tsk->pid <= 0)
return 0;
@@ -331,6 +335,8 @@ stap_utrace_detach(struct task_struct *tsk,
rc = EFAULT;
}
else {
+ if (ops_matched)
+ *ops_matched = 1;
rc = utrace_control(tsk, engine, UTRACE_DETACH);
switch (rc) {
case 0: /* success */
@@ -363,6 +369,13 @@ stap_utrace_detach(struct task_struct *tsk,
return rc;
}
+static int
+stap_utrace_detach(struct task_struct *tsk,
+ const struct utrace_engine_ops *ops)
+{
+ return __stap_utrace_detach(tsk, ops, NULL);
+}
+
static void
stap_utrace_detach_ops(struct utrace_engine_ops *ops)
{
@@ -370,42 +383,66 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops)
struct utrace_engine *engine;
pid_t pid = 0;
int rc = 0;
-
- // Notice we're not calling get_task_mm() in this loop. In
- // every other instance when calling do_each_thread, we avoid
- // tasks with no mm, because those are kernel threads. So,
- // why is this function different? When a thread is in the
- // process of dying, its mm gets freed. Then, later the
- // thread gets in the dying state and the thread's
- // UTRACE_EVENT(DEATH) event handler gets called (if any).
+ int ops_matched;
+ int iterations = 0;
+
+ // We're going to do the detach in a loop. Why?
+ // do_each_thread() presents a snapshot of the list of
+ // processes on the system. If the system is currently
+ // creating lots of processes, we'll miss some during this
+ // loop (especially since utrace_barrier() can wait for a
+ // handler to finish).
//
- // If a thread is in this "mortally wounded" state - no mm
- // but not dead - and at that moment this function is called,
- // we'd miss detaching from it if we were checking to see if
- // it had an mm.
-
- rcu_read_lock();
- do_each_thread(grp, tsk) {
+ // So, we'll repeat the loop until we longer find any
+ // processes that have this ops attached to it.
+ do {
+ // Notice we're not calling get_task_mm() in this
+ // loop. In every other instance when calling
+ // do_each_thread, we avoid tasks with no mm, because
+ // those are kernel threads. So, why is this function
+ // different? When a thread is in the process of
+ // dying, its mm gets freed. Then, later the thread
+ // gets in the dying state and the thread's
+ // UTRACE_EVENT(DEATH) event handler gets called (if
+ // any).
+ //
+ // If a thread is in this "mortally wounded" state -
+ // no mm but not dead - and at that moment this
+ // function is called, we'd miss detaching from it if
+ // we were checking to see if it had an mm.
+
+ ops_matched = 0;
+ rcu_read_lock();
+ do_each_thread(grp, tsk) {
+ int matched = 0;
#ifdef PF_KTHREAD
- // Ignore kernel threads. On systems without
- // PF_KTHREAD, we're ok, since kernel threads won't be
- // matched by the stap_utrace_detach() call.
- if (tsk->flags & PF_KTHREAD)
- continue;
+ // Ignore kernel threads. On systems without
+ // PF_KTHREAD, we're ok, since kernel threads
+ // won't be matched by the
+ // __stap_utrace_detach() call.
+ if (tsk->flags & PF_KTHREAD)
+ continue;
#endif
- /* Notice we're purposefully ignoring errors from
- * stap_utrace_detach(). Even if we got an error on
- * this task, we need to keep detaching from other
- * tasks. But warn, we might be unloading and dangling
- * engines are bad news. */
- rc = stap_utrace_detach(tsk, ops);
- if (rc != 0)
- _stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid);
- WARN_ON(rc != 0);
- } while_each_thread(grp, tsk);
- rcu_read_unlock();
- debug_task_finder_report();
+ /* Notice we're purposefully ignoring errors
+ * from __stap_utrace_detach(). Even if we
+ * got an error on this task, we need to keep
+ * detaching from other tasks. But warn, we
+ * might be unloading and dangling engines are
+ * bad news. */
+ rc = __stap_utrace_detach(tsk, ops, &matched);
+ if (rc != 0)
+ _stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid);
+ WARN_ON(rc != 0);
+ ops_matched |= matched;
+ } while_each_thread(grp, tsk);
+ rcu_read_unlock();
+ debug_task_finder_report();
+ iterations++;
+ } while (ops_matched && iterations < 10);
+#ifdef DEBUG_TASK_FINDER
+ _stp_dbug(__FUNCTION__, __LINE__, "took %d attempts\n", iterations);
+#endif
}
static void
@@ -980,13 +1017,13 @@ __stp_utrace_task_finder_report_clone(enum utrace_resume_action action,
#endif
int rc;
+ __stp_tf_handler_start();
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
- __stp_tf_handler_start();
-
// On clone, attach to the child.
rc = __stp_utrace_attach(child, engine->ops, 0,
__STP_TASK_FINDER_EVENTS, UTRACE_RESUME);
@@ -1034,13 +1071,13 @@ __stp_utrace_task_finder_report_exec(enum utrace_resume_action action,
struct stap_task_finder_target *tgt;
int found_node = 0;
+ __stp_tf_handler_start();
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
- __stp_tf_handler_start();
-
// If the original task was "interesting",
// __stp_utrace_task_finder_target_exec() will handle calling
// callbacks.
@@ -1101,13 +1138,13 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action,
struct stap_task_finder_target *tgt = engine->data;
int rc;
+ __stp_tf_handler_start();
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
- __stp_tf_handler_start();
-
// We'll hardcode this as a process end. If a thread
// calls exec() (which it isn't supposed to), the kernel
// "promotes" it to being a process. Call the callbacks.
@@ -1123,8 +1160,8 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action,
// __stp_utrace_attach_match_tsk() to figure out if the
// exec'ed program is "interesting".
- __stp_tf_handler_end();
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
@@ -1144,12 +1181,13 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine,
#endif
struct stap_task_finder_target *tgt = engine->data;
+ __stp_tf_handler_start();
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
- __stp_tf_handler_start();
// The first implementation of this added a
// UTRACE_EVENT(DEATH) handler to
// __stp_utrace_task_finder_ops. However, dead threads don't
@@ -1163,8 +1201,8 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine,
__stp_call_callbacks(tgt, tsk, 0, (tsk->pid == tsk->tgid));
}
- __stp_tf_handler_end();
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
@@ -1332,18 +1370,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action,
struct stap_task_finder_target *tgt = engine->data;
int rc;
+ __stp_tf_handler_start();
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
if (tgt == NULL || tsk == NULL) {
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
- __stp_tf_handler_start();
-
// Turn off quiesce handling
rc = utrace_set_events(tsk, engine,
__STP_ATTACHED_TASK_BASE_EVENTS(tgt));
@@ -1418,13 +1457,17 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
int is_mprotect = 0;
int is_munmap = 0;
+ __stp_tf_handler_start();
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
- if (tgt == NULL)
+ if (tgt == NULL) {
+ __stp_tf_handler_end();
return UTRACE_RESUME;
+ }
// See if syscall is one we're interested in. On x86_64, this
// is a potentially expensive operation (since we have to
@@ -1442,20 +1485,23 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
? 1 : 0);
}
}
- if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap)
+ if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap) {
+ __stp_tf_handler_end();
return UTRACE_RESUME;
+ }
// The syscall is one we're interested in, but do we have a
// handler for it?
if ((is_mmap_or_mmap2 && tgt->mmap_events == 0)
|| (is_mprotect && tgt->mprotect_events == 0)
- || (is_munmap && tgt->munmap_events == 0))
+ || (is_munmap && tgt->munmap_events == 0)) {
+ __stp_tf_handler_end();
return UTRACE_RESUME;
+ }
// Save the needed arguments. Note that for mmap, we really
// just need the return value, so there is no need to save
// any arguments.
- __stp_tf_handler_start();
if (is_munmap) {
// We need 2 arguments for munmap()
syscall_get_arguments(tsk, regs, 0, 2, args);
@@ -1501,23 +1547,28 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
unsigned long rv;
struct __stp_tf_map_entry *entry;
+ __stp_tf_handler_start();
if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
debug_task_finder_detach();
+ __stp_tf_handler_end();
return UTRACE_DETACH;
}
- if (tgt == NULL)
+ if (tgt == NULL) {
+ __stp_tf_handler_end();
return UTRACE_RESUME;
+ }
// See if we can find saved syscall info. If we can, it must
// be one of the syscalls we are interested in (and we must
// have callbacks to call for it).
entry = __stp_tf_get_map_entry(tsk);
- if (entry == NULL)
+ if (entry == NULL) {
+ __stp_tf_handler_end();
return UTRACE_RESUME;
+ }
// Get return value
- __stp_tf_handler_start();
rv = syscall_get_return_value(tsk, regs);
dbug_task_vma(1,
@@ -1547,8 +1598,8 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
entry->arg1, entry->arg2);
}
- __stp_tf_handler_end();
__stp_tf_remove_map_entry(entry);
+ __stp_tf_handler_end();
return UTRACE_RESUME;
}