| commit 8d1ae0139eed2a1c2d118e6d3ea7facb52aab40c |
| Author: David Smith <dsmith@redhat.com> |
| Date: Mon Jan 15 10:23:45 2018 -0600 |
| |
| Fix BZ1525651 by making the task_finder shutdown more robust. |
| |
| * runtime/linux/task_finder.c (__stap_utrace_detach): New function, based |
| on stap_utrace_detach(), but lets the caller know if any the task had |
| the utrace engine attached to it. |
| (stap_utrace_detach): Just call __stap_utrace_detach(). |
| (stap_utrace_detach_ops): Perform the detaching in a loop, so if the |
| process list snapshot we operate on becomes out of date, we'll still |
| detach everything. |
| (__stp_utrace_task_finder_report_clone): Move the |
| __stp_tf_handler_start() call to the very start of the function. |
| (__stp_utrace_task_finder_report_exec): Ditto. |
| (__stp_utrace_task_finder_target_exec): Ditto. |
| (__stp_utrace_task_finder_target_exit): Ditto. |
| (__stp_utrace_task_finder_target_quiesce): Ditto. |
| (__stp_utrace_task_finder_target_syscall_entry): Ditto. |
| (__stp_utrace_task_finder_target_syscall_exit): Ditto. |
| |
| diff --git a/runtime/linux/task_finder.c b/runtime/linux/task_finder.c |
| index 69c27fe..3346f2e 100644 |
| |
| |
| @@ -283,13 +283,17 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt) |
| } |
| |
| static int |
| -stap_utrace_detach(struct task_struct *tsk, |
| - const struct utrace_engine_ops *ops) |
| +__stap_utrace_detach(struct task_struct *tsk, |
| + const struct utrace_engine_ops *ops, |
| + int *ops_matched) |
| { |
| struct utrace_engine *engine; |
| struct mm_struct *mm; |
| int rc = 0; |
| |
| + if (ops_matched) |
| + *ops_matched = 0; |
| + |
| // Ignore invalid tasks. |
| if (tsk == NULL || tsk->pid <= 0) |
| return 0; |
| @@ -331,6 +335,8 @@ stap_utrace_detach(struct task_struct *tsk, |
| rc = EFAULT; |
| } |
| else { |
| + if (ops_matched) |
| + *ops_matched = 1; |
| rc = utrace_control(tsk, engine, UTRACE_DETACH); |
| switch (rc) { |
| case 0: /* success */ |
| @@ -363,6 +369,13 @@ stap_utrace_detach(struct task_struct *tsk, |
| return rc; |
| } |
| |
| +static int |
| +stap_utrace_detach(struct task_struct *tsk, |
| + const struct utrace_engine_ops *ops) |
| +{ |
| + return __stap_utrace_detach(tsk, ops, NULL); |
| +} |
| + |
| static void |
| stap_utrace_detach_ops(struct utrace_engine_ops *ops) |
| { |
| @@ -370,42 +383,66 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops) |
| struct utrace_engine *engine; |
| pid_t pid = 0; |
| int rc = 0; |
| - |
| - // Notice we're not calling get_task_mm() in this loop. In |
| - // every other instance when calling do_each_thread, we avoid |
| - // tasks with no mm, because those are kernel threads. So, |
| - // why is this function different? When a thread is in the |
| - // process of dying, its mm gets freed. Then, later the |
| - // thread gets in the dying state and the thread's |
| - // UTRACE_EVENT(DEATH) event handler gets called (if any). |
| + int ops_matched; |
| + int iterations = 0; |
| + |
| + // We're going to do the detach in a loop. Why? |
| + // do_each_thread() presents a snapshot of the list of |
| + // processes on the system. If the system is currently |
| + // creating lots of processes, we'll miss some during this |
| + // loop (especially since utrace_barrier() can wait for a |
| + // handler to finish). |
| // |
| - // If a thread is in this "mortally wounded" state - no mm |
| - // but not dead - and at that moment this function is called, |
| - // we'd miss detaching from it if we were checking to see if |
| - // it had an mm. |
| - |
| - rcu_read_lock(); |
| - do_each_thread(grp, tsk) { |
| + // So, we'll repeat the loop until we longer find any |
| + // processes that have this ops attached to it. |
| + do { |
| + // Notice we're not calling get_task_mm() in this |
| + // loop. In every other instance when calling |
| + // do_each_thread, we avoid tasks with no mm, because |
| + // those are kernel threads. So, why is this function |
| + // different? When a thread is in the process of |
| + // dying, its mm gets freed. Then, later the thread |
| + // gets in the dying state and the thread's |
| + // UTRACE_EVENT(DEATH) event handler gets called (if |
| + // any). |
| + // |
| + // If a thread is in this "mortally wounded" state - |
| + // no mm but not dead - and at that moment this |
| + // function is called, we'd miss detaching from it if |
| + // we were checking to see if it had an mm. |
| + |
| + ops_matched = 0; |
| + rcu_read_lock(); |
| + do_each_thread(grp, tsk) { |
| + int matched = 0; |
| #ifdef PF_KTHREAD |
| - // Ignore kernel threads. On systems without |
| - // PF_KTHREAD, we're ok, since kernel threads won't be |
| - // matched by the stap_utrace_detach() call. |
| - if (tsk->flags & PF_KTHREAD) |
| - continue; |
| + // Ignore kernel threads. On systems without |
| + // PF_KTHREAD, we're ok, since kernel threads |
| + // won't be matched by the |
| + // __stap_utrace_detach() call. |
| + if (tsk->flags & PF_KTHREAD) |
| + continue; |
| #endif |
| |
| - /* Notice we're purposefully ignoring errors from |
| - * stap_utrace_detach(). Even if we got an error on |
| - * this task, we need to keep detaching from other |
| - * tasks. But warn, we might be unloading and dangling |
| - * engines are bad news. */ |
| - rc = stap_utrace_detach(tsk, ops); |
| - if (rc != 0) |
| - _stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid); |
| - WARN_ON(rc != 0); |
| - } while_each_thread(grp, tsk); |
| - rcu_read_unlock(); |
| - debug_task_finder_report(); |
| + /* Notice we're purposefully ignoring errors |
| + * from __stap_utrace_detach(). Even if we |
| + * got an error on this task, we need to keep |
| + * detaching from other tasks. But warn, we |
| + * might be unloading and dangling engines are |
| + * bad news. */ |
| + rc = __stap_utrace_detach(tsk, ops, &matched); |
| + if (rc != 0) |
| + _stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid); |
| + WARN_ON(rc != 0); |
| + ops_matched |= matched; |
| + } while_each_thread(grp, tsk); |
| + rcu_read_unlock(); |
| + debug_task_finder_report(); |
| + iterations++; |
| + } while (ops_matched && iterations < 10); |
| +#ifdef DEBUG_TASK_FINDER |
| + _stp_dbug(__FUNCTION__, __LINE__, "took %d attempts\n", iterations); |
| +#endif |
| } |
| |
| static void |
| @@ -980,13 +1017,13 @@ __stp_utrace_task_finder_report_clone(enum utrace_resume_action action, |
| #endif |
| int rc; |
| |
| + __stp_tf_handler_start(); |
| if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| - __stp_tf_handler_start(); |
| - |
| // On clone, attach to the child. |
| rc = __stp_utrace_attach(child, engine->ops, 0, |
| __STP_TASK_FINDER_EVENTS, UTRACE_RESUME); |
| @@ -1034,13 +1071,13 @@ __stp_utrace_task_finder_report_exec(enum utrace_resume_action action, |
| struct stap_task_finder_target *tgt; |
| int found_node = 0; |
| |
| + __stp_tf_handler_start(); |
| if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| - __stp_tf_handler_start(); |
| - |
| // If the original task was "interesting", |
| // __stp_utrace_task_finder_target_exec() will handle calling |
| // callbacks. |
| @@ -1101,13 +1138,13 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action, |
| struct stap_task_finder_target *tgt = engine->data; |
| int rc; |
| |
| + __stp_tf_handler_start(); |
| if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| - __stp_tf_handler_start(); |
| - |
| // We'll hardcode this as a process end. If a thread |
| // calls exec() (which it isn't supposed to), the kernel |
| // "promotes" it to being a process. Call the callbacks. |
| @@ -1123,8 +1160,8 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action, |
| // __stp_utrace_attach_match_tsk() to figure out if the |
| // exec'ed program is "interesting". |
| |
| - __stp_tf_handler_end(); |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| @@ -1144,12 +1181,13 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine, |
| #endif |
| struct stap_task_finder_target *tgt = engine->data; |
| |
| + __stp_tf_handler_start(); |
| if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| - __stp_tf_handler_start(); |
| // The first implementation of this added a |
| // UTRACE_EVENT(DEATH) handler to |
| // __stp_utrace_task_finder_ops. However, dead threads don't |
| @@ -1163,8 +1201,8 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine, |
| __stp_call_callbacks(tgt, tsk, 0, (tsk->pid == tsk->tgid)); |
| } |
| |
| - __stp_tf_handler_end(); |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| @@ -1332,18 +1370,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action, |
| struct stap_task_finder_target *tgt = engine->data; |
| int rc; |
| |
| + __stp_tf_handler_start(); |
| if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| if (tgt == NULL || tsk == NULL) { |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| - __stp_tf_handler_start(); |
| - |
| // Turn off quiesce handling |
| rc = utrace_set_events(tsk, engine, |
| __STP_ATTACHED_TASK_BASE_EVENTS(tgt)); |
| @@ -1418,13 +1457,17 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, |
| int is_mprotect = 0; |
| int is_munmap = 0; |
| |
| + __stp_tf_handler_start(); |
| if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| - if (tgt == NULL) |
| + if (tgt == NULL) { |
| + __stp_tf_handler_end(); |
| return UTRACE_RESUME; |
| + } |
| |
| // See if syscall is one we're interested in. On x86_64, this |
| // is a potentially expensive operation (since we have to |
| @@ -1442,20 +1485,23 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action, |
| ? 1 : 0); |
| } |
| } |
| - if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap) |
| + if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap) { |
| + __stp_tf_handler_end(); |
| return UTRACE_RESUME; |
| + } |
| |
| // The syscall is one we're interested in, but do we have a |
| // handler for it? |
| if ((is_mmap_or_mmap2 && tgt->mmap_events == 0) |
| || (is_mprotect && tgt->mprotect_events == 0) |
| - || (is_munmap && tgt->munmap_events == 0)) |
| + || (is_munmap && tgt->munmap_events == 0)) { |
| + __stp_tf_handler_end(); |
| return UTRACE_RESUME; |
| + } |
| |
| // Save the needed arguments. Note that for mmap, we really |
| // just need the return value, so there is no need to save |
| // any arguments. |
| - __stp_tf_handler_start(); |
| if (is_munmap) { |
| // We need 2 arguments for munmap() |
| syscall_get_arguments(tsk, regs, 0, 2, args); |
| @@ -1501,23 +1547,28 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, |
| unsigned long rv; |
| struct __stp_tf_map_entry *entry; |
| |
| + __stp_tf_handler_start(); |
| if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) { |
| debug_task_finder_detach(); |
| + __stp_tf_handler_end(); |
| return UTRACE_DETACH; |
| } |
| |
| - if (tgt == NULL) |
| + if (tgt == NULL) { |
| + __stp_tf_handler_end(); |
| return UTRACE_RESUME; |
| + } |
| |
| // See if we can find saved syscall info. If we can, it must |
| // be one of the syscalls we are interested in (and we must |
| // have callbacks to call for it). |
| entry = __stp_tf_get_map_entry(tsk); |
| - if (entry == NULL) |
| + if (entry == NULL) { |
| + __stp_tf_handler_end(); |
| return UTRACE_RESUME; |
| + } |
| |
| // Get return value |
| - __stp_tf_handler_start(); |
| rv = syscall_get_return_value(tsk, regs); |
| |
| dbug_task_vma(1, |
| @@ -1547,8 +1598,8 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action, |
| entry->arg1, entry->arg2); |
| } |
| |
| - __stp_tf_handler_end(); |
| __stp_tf_remove_map_entry(entry); |
| + __stp_tf_handler_end(); |
| return UTRACE_RESUME; |
| } |
| |