commit 8d1ae0139eed2a1c2d118e6d3ea7facb52aab40c Author: David Smith Date: Mon Jan 15 10:23:45 2018 -0600 Fix BZ1525651 by making the task_finder shutdown more robust. * runtime/linux/task_finder.c (__stap_utrace_detach): New function, based on stap_utrace_detach(), but lets the caller know if any the task had the utrace engine attached to it. (stap_utrace_detach): Just call __stap_utrace_detach(). (stap_utrace_detach_ops): Perform the detaching in a loop, so if the process list snapshot we operate on becomes out of date, we'll still detach everything. (__stp_utrace_task_finder_report_clone): Move the __stp_tf_handler_start() call to the very start of the function. (__stp_utrace_task_finder_report_exec): Ditto. (__stp_utrace_task_finder_target_exec): Ditto. (__stp_utrace_task_finder_target_exit): Ditto. (__stp_utrace_task_finder_target_quiesce): Ditto. (__stp_utrace_task_finder_target_syscall_entry): Ditto. (__stp_utrace_task_finder_target_syscall_exit): Ditto. diff --git a/runtime/linux/task_finder.c b/runtime/linux/task_finder.c index 69c27fe..3346f2e 100644 --- a/runtime/linux/task_finder.c +++ b/runtime/linux/task_finder.c @@ -283,13 +283,17 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) } static int -stap_utrace_detach(struct task_struct *tsk, - const struct utrace_engine_ops *ops) +__stap_utrace_detach(struct task_struct *tsk, + const struct utrace_engine_ops *ops, + int *ops_matched) { struct utrace_engine *engine; struct mm_struct *mm; int rc = 0; + if (ops_matched) + *ops_matched = 0; + // Ignore invalid tasks. if (tsk == NULL || tsk->pid <= 0) return 0; @@ -331,6 +335,8 @@ stap_utrace_detach(struct task_struct *tsk, rc = EFAULT; } else { + if (ops_matched) + *ops_matched = 1; rc = utrace_control(tsk, engine, UTRACE_DETACH); switch (rc) { case 0: /* success */ @@ -363,6 +369,13 @@ stap_utrace_detach(struct task_struct *tsk, return rc; } +static int +stap_utrace_detach(struct task_struct *tsk, + const struct utrace_engine_ops *ops) +{ + return __stap_utrace_detach(tsk, ops, NULL); +} + static void stap_utrace_detach_ops(struct utrace_engine_ops *ops) { @@ -370,42 +383,66 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops) struct utrace_engine *engine; pid_t pid = 0; int rc = 0; - - // Notice we're not calling get_task_mm() in this loop. In - // every other instance when calling do_each_thread, we avoid - // tasks with no mm, because those are kernel threads. So, - // why is this function different? When a thread is in the - // process of dying, its mm gets freed. Then, later the - // thread gets in the dying state and the thread's - // UTRACE_EVENT(DEATH) event handler gets called (if any). + int ops_matched; + int iterations = 0; + + // We're going to do the detach in a loop. Why? + // do_each_thread() presents a snapshot of the list of + // processes on the system. If the system is currently + // creating lots of processes, we'll miss some during this + // loop (especially since utrace_barrier() can wait for a + // handler to finish). // - // If a thread is in this "mortally wounded" state - no mm - // but not dead - and at that moment this function is called, - // we'd miss detaching from it if we were checking to see if - // it had an mm. - - rcu_read_lock(); - do_each_thread(grp, tsk) { + // So, we'll repeat the loop until we longer find any + // processes that have this ops attached to it. + do { + // Notice we're not calling get_task_mm() in this + // loop. In every other instance when calling + // do_each_thread, we avoid tasks with no mm, because + // those are kernel threads. So, why is this function + // different? When a thread is in the process of + // dying, its mm gets freed. Then, later the thread + // gets in the dying state and the thread's + // UTRACE_EVENT(DEATH) event handler gets called (if + // any). + // + // If a thread is in this "mortally wounded" state - + // no mm but not dead - and at that moment this + // function is called, we'd miss detaching from it if + // we were checking to see if it had an mm. + + ops_matched = 0; + rcu_read_lock(); + do_each_thread(grp, tsk) { + int matched = 0; #ifdef PF_KTHREAD - // Ignore kernel threads. On systems without - // PF_KTHREAD, we're ok, since kernel threads won't be - // matched by the stap_utrace_detach() call. - if (tsk->flags & PF_KTHREAD) - continue; + // Ignore kernel threads. On systems without + // PF_KTHREAD, we're ok, since kernel threads + // won't be matched by the + // __stap_utrace_detach() call. + if (tsk->flags & PF_KTHREAD) + continue; #endif - /* Notice we're purposefully ignoring errors from - * stap_utrace_detach(). Even if we got an error on - * this task, we need to keep detaching from other - * tasks. But warn, we might be unloading and dangling - * engines are bad news. */ - rc = stap_utrace_detach(tsk, ops); - if (rc != 0) - _stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid); - WARN_ON(rc != 0); - } while_each_thread(grp, tsk); - rcu_read_unlock(); - debug_task_finder_report(); + /* Notice we're purposefully ignoring errors + * from __stap_utrace_detach(). Even if we + * got an error on this task, we need to keep + * detaching from other tasks. But warn, we + * might be unloading and dangling engines are + * bad news. */ + rc = __stap_utrace_detach(tsk, ops, &matched); + if (rc != 0) + _stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid); + WARN_ON(rc != 0); + ops_matched |= matched; + } while_each_thread(grp, tsk); + rcu_read_unlock(); + debug_task_finder_report(); + iterations++; + } while (ops_matched && iterations < 10); +#ifdef DEBUG_TASK_FINDER + _stp_dbug(__FUNCTION__, __LINE__, "took %d attempts\n", iterations); +#endif } static void @@ -980,13 +1017,13 @@ __stp_utrace_task_finder_report_clone(enum utrace_resume_action action, #endif int rc; + __stp_tf_handler_start(); if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } - __stp_tf_handler_start(); - // On clone, attach to the child. rc = __stp_utrace_attach(child, engine->ops, 0, __STP_TASK_FINDER_EVENTS, UTRACE_RESUME); @@ -1034,13 +1071,13 @@ __stp_utrace_task_finder_report_exec(enum utrace_resume_action action, struct stap_task_finder_target *tgt; int found_node = 0; + __stp_tf_handler_start(); if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } - __stp_tf_handler_start(); - // If the original task was "interesting", // __stp_utrace_task_finder_target_exec() will handle calling // callbacks. @@ -1101,13 +1138,13 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action, struct stap_task_finder_target *tgt = engine->data; int rc; + __stp_tf_handler_start(); if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } - __stp_tf_handler_start(); - // We'll hardcode this as a process end. If a thread // calls exec() (which it isn't supposed to), the kernel // "promotes" it to being a process. Call the callbacks. @@ -1123,8 +1160,8 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action, // __stp_utrace_attach_match_tsk() to figure out if the // exec'ed program is "interesting". - __stp_tf_handler_end(); debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } @@ -1144,12 +1181,13 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine, #endif struct stap_task_finder_target *tgt = engine->data; + __stp_tf_handler_start(); if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } - __stp_tf_handler_start(); // The first implementation of this added a // UTRACE_EVENT(DEATH) handler to // __stp_utrace_task_finder_ops. However, dead threads don't @@ -1163,8 +1201,8 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine, __stp_call_callbacks(tgt, tsk, 0, (tsk->pid == tsk->tgid)); } - __stp_tf_handler_end(); debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } @@ -1332,18 +1370,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action, struct stap_task_finder_target *tgt = engine->data; int rc; + __stp_tf_handler_start(); if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } if (tgt == NULL || tsk == NULL) { debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } - __stp_tf_handler_start(); - // Turn off quiesce handling rc = utrace_set_events(tsk, engine, __STP_ATTACHED_TASK_BASE_EVENTS(tgt)); @@ -1418,13 +1457,17 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, int is_mprotect = 0; int is_munmap = 0; + __stp_tf_handler_start(); if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } - if (tgt == NULL) + if (tgt == NULL) { + __stp_tf_handler_end(); return UTRACE_RESUME; + } // See if syscall is one we're interested in. On x86_64, this // is a potentially expensive operation (since we have to @@ -1442,20 +1485,23 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, ? 1 : 0); } } - if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap) + if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap) { + __stp_tf_handler_end(); return UTRACE_RESUME; + } // The syscall is one we're interested in, but do we have a // handler for it? if ((is_mmap_or_mmap2 && tgt->mmap_events == 0) || (is_mprotect && tgt->mprotect_events == 0) - || (is_munmap && tgt->munmap_events == 0)) + || (is_munmap && tgt->munmap_events == 0)) { + __stp_tf_handler_end(); return UTRACE_RESUME; + } // Save the needed arguments. Note that for mmap, we really // just need the return value, so there is no need to save // any arguments. - __stp_tf_handler_start(); if (is_munmap) { // We need 2 arguments for munmap() syscall_get_arguments(tsk, regs, 0, 2, args); @@ -1501,23 +1547,28 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, unsigned long rv; struct __stp_tf_map_entry *entry; + __stp_tf_handler_start(); if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { debug_task_finder_detach(); + __stp_tf_handler_end(); return UTRACE_DETACH; } - if (tgt == NULL) + if (tgt == NULL) { + __stp_tf_handler_end(); return UTRACE_RESUME; + } // See if we can find saved syscall info. If we can, it must // be one of the syscalls we are interested in (and we must // have callbacks to call for it). entry = __stp_tf_get_map_entry(tsk); - if (entry == NULL) + if (entry == NULL) { + __stp_tf_handler_end(); return UTRACE_RESUME; + } // Get return value - __stp_tf_handler_start(); rv = syscall_get_return_value(tsk, regs); dbug_task_vma(1, @@ -1547,8 +1598,8 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, entry->arg1, entry->arg2); } - __stp_tf_handler_end(); __stp_tf_remove_map_entry(entry); + __stp_tf_handler_end(); return UTRACE_RESUME; }