Blame SOURCES/rhbz1525651.patch

193a19
commit 8d1ae0139eed2a1c2d118e6d3ea7facb52aab40c
193a19
Author: David Smith <dsmith@redhat.com>
193a19
Date:   Mon Jan 15 10:23:45 2018 -0600
193a19
193a19
    Fix BZ1525651 by making the task_finder shutdown more robust.
193a19
    
193a19
    * runtime/linux/task_finder.c (__stap_utrace_detach): New function, based
193a19
      on stap_utrace_detach(), but lets the caller know if any the task had
193a19
      the utrace engine attached to it.
193a19
      (stap_utrace_detach): Just call __stap_utrace_detach().
193a19
      (stap_utrace_detach_ops): Perform the detaching in a loop, so if the
193a19
      process list snapshot we operate on becomes out of date, we'll still
193a19
      detach everything.
193a19
      (__stp_utrace_task_finder_report_clone): Move the
193a19
      __stp_tf_handler_start() call to the very start of the function.
193a19
      (__stp_utrace_task_finder_report_exec): Ditto.
193a19
      (__stp_utrace_task_finder_target_exec): Ditto.
193a19
      (__stp_utrace_task_finder_target_exit): Ditto.
193a19
      (__stp_utrace_task_finder_target_quiesce): Ditto.
193a19
      (__stp_utrace_task_finder_target_syscall_entry): Ditto.
193a19
      (__stp_utrace_task_finder_target_syscall_exit): Ditto.
193a19
193a19
diff --git a/runtime/linux/task_finder.c b/runtime/linux/task_finder.c
193a19
index 69c27fe..3346f2e 100644
193a19
--- a/runtime/linux/task_finder.c
193a19
+++ b/runtime/linux/task_finder.c
193a19
@@ -283,13 +283,17 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
193a19
 }
193a19
 
193a19
 static int
193a19
-stap_utrace_detach(struct task_struct *tsk,
193a19
-		   const struct utrace_engine_ops *ops)
193a19
+__stap_utrace_detach(struct task_struct *tsk,
193a19
+		     const struct utrace_engine_ops *ops,
193a19
+		     int *ops_matched)
193a19
 {
193a19
 	struct utrace_engine *engine;
193a19
 	struct mm_struct *mm;
193a19
 	int rc = 0;
193a19
 
193a19
+	if (ops_matched)
193a19
+		*ops_matched = 0;
193a19
+
193a19
 	// Ignore invalid tasks.
193a19
 	if (tsk == NULL || tsk->pid <= 0)
193a19
 		return 0;
193a19
@@ -331,6 +335,8 @@ stap_utrace_detach(struct task_struct *tsk,
193a19
 		rc = EFAULT;
193a19
 	}
193a19
 	else {
193a19
+		if (ops_matched)
193a19
+			*ops_matched = 1;
193a19
 		rc = utrace_control(tsk, engine, UTRACE_DETACH);
193a19
 		switch (rc) {
193a19
 		case 0:			/* success */
193a19
@@ -363,6 +369,13 @@ stap_utrace_detach(struct task_struct *tsk,
193a19
 	return rc;
193a19
 }
193a19
 
193a19
+static int
193a19
+stap_utrace_detach(struct task_struct *tsk,
193a19
+		   const struct utrace_engine_ops *ops)
193a19
+{
193a19
+	return __stap_utrace_detach(tsk, ops, NULL);
193a19
+}
193a19
+
193a19
 static void
193a19
 stap_utrace_detach_ops(struct utrace_engine_ops *ops)
193a19
 {
193a19
@@ -370,42 +383,66 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops)
193a19
 	struct utrace_engine *engine;
193a19
 	pid_t pid = 0;
193a19
 	int rc = 0;
193a19
-
193a19
-	// Notice we're not calling get_task_mm() in this loop. In
193a19
-	// every other instance when calling do_each_thread, we avoid
193a19
-	// tasks with no mm, because those are kernel threads.  So,
193a19
-	// why is this function different?  When a thread is in the
193a19
-	// process of dying, its mm gets freed.  Then, later the
193a19
-	// thread gets in the dying state and the thread's
193a19
-	// UTRACE_EVENT(DEATH) event handler gets called (if any).
193a19
+	int ops_matched;
193a19
+	int iterations = 0;
193a19
+
193a19
+	// We're going to do the detach in a loop. Why?
193a19
+	// do_each_thread() presents a snapshot of the list of
193a19
+	// processes on the system. If the system is currently
193a19
+	// creating lots of processes, we'll miss some during this
193a19
+	// loop (especially since utrace_barrier() can wait for a
193a19
+	// handler to finish).
193a19
 	//
193a19
-	// If a thread is in this "mortally wounded" state - no mm
193a19
-	// but not dead - and at that moment this function is called,
193a19
-	// we'd miss detaching from it if we were checking to see if
193a19
-	// it had an mm.
193a19
-
193a19
-	rcu_read_lock();
193a19
-	do_each_thread(grp, tsk) {
193a19
+	// So, we'll repeat the loop until we longer find any
193a19
+	// processes that have this ops attached to it.
193a19
+	do {
193a19
+		// Notice we're not calling get_task_mm() in this
193a19
+		// loop. In every other instance when calling
193a19
+		// do_each_thread, we avoid tasks with no mm, because
193a19
+		// those are kernel threads.  So, why is this function
193a19
+		// different?  When a thread is in the process of
193a19
+		// dying, its mm gets freed.  Then, later the thread
193a19
+		// gets in the dying state and the thread's
193a19
+		// UTRACE_EVENT(DEATH) event handler gets called (if
193a19
+		// any).
193a19
+		//
193a19
+		// If a thread is in this "mortally wounded" state -
193a19
+		// no mm but not dead - and at that moment this
193a19
+		// function is called, we'd miss detaching from it if
193a19
+		// we were checking to see if it had an mm.
193a19
+
193a19
+		ops_matched = 0;
193a19
+		rcu_read_lock();
193a19
+		do_each_thread(grp, tsk) {
193a19
+			int matched = 0;
193a19
 #ifdef PF_KTHREAD
193a19
-		// Ignore kernel threads.  On systems without
193a19
-		// PF_KTHREAD, we're ok, since kernel threads won't be
193a19
-		// matched by the stap_utrace_detach() call.
193a19
-		if (tsk->flags & PF_KTHREAD)
193a19
-			continue;
193a19
+			// Ignore kernel threads.  On systems without
193a19
+			// PF_KTHREAD, we're ok, since kernel threads
193a19
+			// won't be matched by the
193a19
+			// __stap_utrace_detach() call.
193a19
+			if (tsk->flags & PF_KTHREAD)
193a19
+				continue;
193a19
 #endif
193a19
 
193a19
-		/* Notice we're purposefully ignoring errors from
193a19
-		 * stap_utrace_detach().  Even if we got an error on
193a19
-		 * this task, we need to keep detaching from other
193a19
-		 * tasks.  But warn, we might be unloading and dangling
193a19
-		 * engines are bad news. */
193a19
-		rc = stap_utrace_detach(tsk, ops);
193a19
-		if (rc != 0)
193a19
-			_stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid);
193a19
-		WARN_ON(rc != 0);
193a19
-	} while_each_thread(grp, tsk);
193a19
-	rcu_read_unlock();
193a19
-	debug_task_finder_report();
193a19
+			/* Notice we're purposefully ignoring errors
193a19
+			 * from __stap_utrace_detach().  Even if we
193a19
+			 * got an error on this task, we need to keep
193a19
+			 * detaching from other tasks.  But warn, we
193a19
+			 * might be unloading and dangling engines are
193a19
+			 * bad news. */
193a19
+			rc = __stap_utrace_detach(tsk, ops, &matched);
193a19
+			if (rc != 0)
193a19
+				_stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid);
193a19
+			WARN_ON(rc != 0);
193a19
+			ops_matched |= matched;
193a19
+		} while_each_thread(grp, tsk);
193a19
+		rcu_read_unlock();
193a19
+		debug_task_finder_report();
193a19
+		iterations++;
193a19
+	} while (ops_matched && iterations < 10);
193a19
+#ifdef DEBUG_TASK_FINDER
193a19
+	_stp_dbug(__FUNCTION__, __LINE__, "took %d attempts\n", iterations);
193a19
+#endif
193a19
 }
193a19
 
193a19
 static void
193a19
@@ -980,13 +1017,13 @@ __stp_utrace_task_finder_report_clone(enum utrace_resume_action action,
193a19
 #endif
193a19
 	int rc;
193a19
 
193a19
+	__stp_tf_handler_start();
193a19
 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
193a19
 		debug_task_finder_detach();
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_DETACH;
193a19
 	}
193a19
 
193a19
-	__stp_tf_handler_start();
193a19
-
193a19
 	// On clone, attach to the child.
193a19
 	rc = __stp_utrace_attach(child, engine->ops, 0,
193a19
 				 __STP_TASK_FINDER_EVENTS, UTRACE_RESUME);
193a19
@@ -1034,13 +1071,13 @@ __stp_utrace_task_finder_report_exec(enum utrace_resume_action action,
193a19
 	struct stap_task_finder_target *tgt;
193a19
 	int found_node = 0;
193a19
 
193a19
+	__stp_tf_handler_start();
193a19
 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
193a19
 		debug_task_finder_detach();
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_DETACH;
193a19
 	}
193a19
 
193a19
-	__stp_tf_handler_start();
193a19
-
193a19
 	// If the original task was "interesting",
193a19
 	// __stp_utrace_task_finder_target_exec() will handle calling
193a19
 	// callbacks. 
193a19
@@ -1101,13 +1138,13 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action,
193a19
 	struct stap_task_finder_target *tgt = engine->data;
193a19
 	int rc;
193a19
 
193a19
+	__stp_tf_handler_start();
193a19
 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
193a19
 		debug_task_finder_detach();
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_DETACH;
193a19
 	}
193a19
 
193a19
-	__stp_tf_handler_start();
193a19
-
193a19
 	// We'll hardcode this as a process end.  If a thread
193a19
 	// calls exec() (which it isn't supposed to), the kernel
193a19
 	// "promotes" it to being a process.  Call the callbacks.
193a19
@@ -1123,8 +1160,8 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action,
193a19
 	// __stp_utrace_attach_match_tsk() to figure out if the
193a19
 	// exec'ed program is "interesting".
193a19
 
193a19
-	__stp_tf_handler_end();
193a19
 	debug_task_finder_detach();
193a19
+	__stp_tf_handler_end();
193a19
 	return UTRACE_DETACH;
193a19
 }
193a19
 
193a19
@@ -1144,12 +1181,13 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine,
193a19
 #endif
193a19
 	struct stap_task_finder_target *tgt = engine->data;
193a19
 
193a19
+	__stp_tf_handler_start();
193a19
 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
193a19
 		debug_task_finder_detach();
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_DETACH;
193a19
 	}
193a19
 
193a19
-	__stp_tf_handler_start();
193a19
 	// The first implementation of this added a
193a19
 	// UTRACE_EVENT(DEATH) handler to
193a19
 	// __stp_utrace_task_finder_ops.  However, dead threads don't
193a19
@@ -1163,8 +1201,8 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine,
193a19
 		__stp_call_callbacks(tgt, tsk, 0, (tsk->pid == tsk->tgid));
193a19
 	}
193a19
 
193a19
-	__stp_tf_handler_end();
193a19
 	debug_task_finder_detach();
193a19
+	__stp_tf_handler_end();
193a19
 	return UTRACE_DETACH;
193a19
 }
193a19
 
193a19
@@ -1332,18 +1370,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action,
193a19
 	struct stap_task_finder_target *tgt = engine->data;
193a19
 	int rc;
193a19
 
193a19
+	__stp_tf_handler_start();
193a19
 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
193a19
 		debug_task_finder_detach();
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_DETACH;
193a19
 	}
193a19
 
193a19
 	if (tgt == NULL || tsk == NULL) {
193a19
 		debug_task_finder_detach();
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_DETACH;
193a19
 	}
193a19
 
193a19
-	__stp_tf_handler_start();
193a19
-
193a19
 	// Turn off quiesce handling
193a19
 	rc = utrace_set_events(tsk, engine,
193a19
 			       __STP_ATTACHED_TASK_BASE_EVENTS(tgt));
193a19
@@ -1418,13 +1457,17 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
193a19
 	int is_mprotect = 0;
193a19
 	int is_munmap = 0;
193a19
 
193a19
+	__stp_tf_handler_start();
193a19
 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
193a19
 		debug_task_finder_detach();
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_DETACH;
193a19
 	}
193a19
 
193a19
-	if (tgt == NULL)
193a19
+	if (tgt == NULL) {
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_RESUME;
193a19
+	}
193a19
 
193a19
 	// See if syscall is one we're interested in.  On x86_64, this
193a19
 	// is a potentially expensive operation (since we have to
193a19
@@ -1442,20 +1485,23 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
193a19
 				     ? 1 : 0);
193a19
 		}
193a19
 	}
193a19
-	if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap)
193a19
+	if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap) {
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_RESUME;
193a19
+	}
193a19
 
193a19
 	// The syscall is one we're interested in, but do we have a
193a19
 	// handler for it?
193a19
 	if ((is_mmap_or_mmap2 && tgt->mmap_events == 0)
193a19
 	    || (is_mprotect && tgt->mprotect_events == 0)
193a19
-	    || (is_munmap && tgt->munmap_events == 0))
193a19
+	    || (is_munmap && tgt->munmap_events == 0)) {
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_RESUME;
193a19
+	}
193a19
 
193a19
 	// Save the needed arguments.  Note that for mmap, we really
193a19
 	// just need the return value, so there is no need to save
193a19
 	// any arguments.
193a19
-	__stp_tf_handler_start();
193a19
 	if (is_munmap) {
193a19
 		// We need 2 arguments for munmap()
193a19
 		syscall_get_arguments(tsk, regs, 0, 2, args);
193a19
@@ -1501,23 +1547,28 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
193a19
 	unsigned long rv;
193a19
 	struct __stp_tf_map_entry *entry;
193a19
 
193a19
+	__stp_tf_handler_start();
193a19
 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
193a19
 		debug_task_finder_detach();
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_DETACH;
193a19
 	}
193a19
 
193a19
-	if (tgt == NULL)
193a19
+	if (tgt == NULL) {
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_RESUME;
193a19
+	}
193a19
 
193a19
 	// See if we can find saved syscall info.  If we can, it must
193a19
 	// be one of the syscalls we are interested in (and we must
193a19
 	// have callbacks to call for it).
193a19
 	entry = __stp_tf_get_map_entry(tsk);
193a19
-	if (entry == NULL)
193a19
+	if (entry == NULL) {
193a19
+		__stp_tf_handler_end();
193a19
 		return UTRACE_RESUME;
193a19
+	}
193a19
 
193a19
 	// Get return value
193a19
-	__stp_tf_handler_start();
193a19
 	rv = syscall_get_return_value(tsk, regs);
193a19
 
193a19
 	dbug_task_vma(1,
193a19
@@ -1547,8 +1598,8 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
193a19
 					      entry->arg1, entry->arg2);
193a19
 	}
193a19
 
193a19
-	__stp_tf_handler_end();
193a19
 	__stp_tf_remove_map_entry(entry);
193a19
+	__stp_tf_handler_end();
193a19
 	return UTRACE_RESUME;
193a19
 }
193a19