Blob Blame History Raw
commit b9b055e2f6e7db5197fe90c4b95ae52dd84a5994 (HEAD, TMP)
Author: David Smith <dsmith@redhat.com>
Date:   Fri Jul 18 15:49:39 2014 -0500

    Fixed PR17181 by making utrace handle interrupting processes better.
    
    * runtime/stp_utrace.c (utrace_init): Use kallsyms_lookup_name() to lookup
      "wake_up_state" if needed.
      (stp_task_notify_resume): New function to handle the details of adding a
      resume handler.
      (utrace_cleanup): Only output debug printk's if STP_TF_DEBUG is defined.
      (utrace_set_events): Improve check.
      (utrace_do_stop): Call stp_task_notify_resume() instead of inline code.
      (utrace_wakeup): Call stp_wake_up_state() instead of wake_up_process()
      to avoid a WARN(). Call stp_task_notify_resume() instead of inline
      code.
      (utrace_control): Call stp_task_notify_resume() instead of inline code.
      (finish_report): Ditto.
      (finish_resume_report): Add UTRACE_INTERRUPT support.
      (utrace_resume): Handle UTRACE_INTERRUPT.
    * runtime/linux/task_finder2.c (stap_task_finder_post_init): Go back to
      sending UTRACE_INTERRUPT to all tasks.
    * buildrun.cxx (compile_pass): Add export tests for 'wake_up_state' and
      'try_to_wake_up'.
    * runtime/linux/runtime.h: Added 'kallsyms_wake_up_state' declaration when
      necessary.
    * testsuite/systemtap.base/process_resume.c: New file.
    * testsuite/systemtap.base/process_resume.exp: New file.

diff --git a/buildrun.cxx b/buildrun.cxx
index a2f2197e4609..87b946a27621 100644
--- a/buildrun.cxx
+++ b/buildrun.cxx
@@ -404,6 +404,8 @@ compile_pass (systemtap_session& s)
 
   // used by runtime/stp_utrace.c
   output_exportconf(s, o, "task_work_add", "STAPCONF_TASK_WORK_ADD_EXPORTED");
+  output_exportconf(s, o, "wake_up_state", "STAPCONF_WAKE_UP_STATE_EXPORTED");
+  output_exportconf(s, o, "try_to_wake_up", "STAPCONF_TRY_TO_WAKE_UP_EXPORTED");
   output_exportconf(s, o, "signal_wake_up_state", "STAPCONF_SIGNAL_WAKE_UP_STATE_EXPORTED");
   output_exportconf(s, o, "signal_wake_up", "STAPCONF_SIGNAL_WAKE_UP_EXPORTED");
   output_exportconf(s, o, "__lock_task_sighand", "STAPCONF___LOCK_TASK_SIGHAND_EXPORTED");
diff --git a/runtime/linux/runtime.h b/runtime/linux/runtime.h
index 76dbea4fd24d..281ce41af72e 100644
--- a/runtime/linux/runtime.h
+++ b/runtime/linux/runtime.h
@@ -180,6 +180,9 @@ static void *kallsyms_task_work_add;
 static void *kallsyms_task_work_cancel;
 #endif
 
+#if !defined(STAPCONF_TRY_TO_WAKE_UP_EXPORTED) && !defined(STAPCONF_WAKE_UP_STATE_EXPORTED)
+static void *kallsyms_wake_up_state;
+#endif
 #if !defined(STAPCONF_SIGNAL_WAKE_UP_STATE_EXPORTED)
 static void *kallsyms_signal_wake_up_state;
 #endif
diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
index e88543a81039..64f263319c0c 100644
--- a/runtime/linux/task_finder2.c
+++ b/runtime/linux/task_finder2.c
@@ -1805,10 +1805,12 @@ stap_task_finder_post_init(void)
 				int rc = utrace_control(tsk, engine,
 							UTRACE_INTERRUPT);
 				/* If utrace_control() returns
-				 * EINPROGRESS when we're trying to
-				 * stop/interrupt, that means the task
-				 * hasn't stopped quite yet, but will
-				 * soon.  Ignore this error. */
+				 * EINPROGRESS when we're
+				 * trying to stop/interrupt,
+				 * that means the task hasn't
+				 * stopped quite yet, but will
+				 * soon.  Ignore this
+				 * error. */
 				if (rc != 0 && rc != -EINPROGRESS) {
 					_stp_error("utrace_control returned error %d on pid %d",
 						   rc, (int)tsk->pid);
diff --git a/runtime/stp_utrace.c b/runtime/stp_utrace.c
index 0c9d0eba3969..acbe9366ed83 100644
--- a/runtime/stp_utrace.c
+++ b/runtime/stp_utrace.c
@@ -109,6 +109,22 @@ static void utrace_report_exec(void *cb_data __attribute__ ((unused)),
 #define __UTRACE_REGISTERED	1
 static atomic_t utrace_state = ATOMIC_INIT(__UTRACE_UNREGISTERED);
 
+// If wake_up_state() is exported, use it.
+#if defined(STAPCONF_WAKE_UP_STATE_EXPORTED)
+#define stp_wake_up_state wake_up_state
+// Otherwise, try to use try_to_wake_up(). The wake_up_state()
+// function is just a wrapper around try_to_wake_up().
+#elif defined(STAPCONF_TRY_TO_WAKE_UP_EXPORTED)
+static inline int stp_wake_up_state(struct task_struct *p, unsigned int state)
+{
+	return try_to_wake_up(p, state, 0);
+}
+// Otherwise, we'll have to look up wake_up_state() with kallsyms.
+#else
+typedef typeof(&wake_up_state) wake_up_state_fn;
+#define stp_wake_up_state (* (wake_up_state_fn)kallsyms_wake_up_state)
+#endif
+
 #if !defined(STAPCONF_SIGNAL_WAKE_UP_STATE_EXPORTED)
 // Sigh. On kernel's without signal_wake_up_state(), there is no
 // declaration to use in 'typeof(&signal_wake_up_state)'. So, we'll
@@ -188,6 +204,14 @@ static int utrace_init(void)
 		INIT_HLIST_HEAD(&task_utrace_table[i]);
 	}
 
+#if !defined(STAPCONF_TRY_TO_WAKE_UP_EXPORTED) \
+    && !defined(STAPCONF_WAKE_UP_STATE_EXPORTED)
+	kallsyms_wake_up_state = (void *)kallsyms_lookup_name("wake_up_state");
+        if (kallsyms_wake_up_state == NULL) {
+		_stp_error("Can't resolve wake_up_state!");
+		goto error;
+        }
+#endif
 #if !defined(STAPCONF_SIGNAL_WAKE_UP_STATE_EXPORTED)
 	/* The signal_wake_up_state() function (which replaces
 	 * signal_wake_up() in newer kernels) isn't exported. Look up
@@ -316,6 +340,29 @@ static int utrace_exit(void)
 	return 0;
 }
 
+/*
+ * stp_task_notify_resume() is our version of
+ * set_notify_resume(). When called, the task_work infrastructure will
+ * cause utrace_resume() to get called.
+ */
+static void
+stp_task_notify_resume(struct task_struct *target, struct utrace *utrace)
+{
+	if (! utrace->task_work_added) {
+		int rc = stp_task_work_add(target, &utrace->work);
+		if (rc == 0) {
+			utrace->task_work_added = 1;
+		}
+		/* stp_task_work_add() returns -ESRCH if the task has
+		 * already passed exit_task_work(). Just ignore this
+		 * error. */
+		else if (rc != -ESRCH) {
+			printk(KERN_ERR "%s:%d - task_work_add() returned %d\n",
+			       __FUNCTION__, __LINE__, rc);
+		}
+	}
+}
+
 static void utrace_resume(struct task_work *work);
 static void utrace_report_work(struct task_work *work);
 
@@ -348,21 +395,29 @@ static void utrace_cleanup(struct utrace *utrace)
 	}
 
 	if (utrace->task_work_added) {
+#ifdef STP_TF_DEBUG
 		if (stp_task_work_cancel(utrace->task, &utrace_resume) == NULL)
 			printk(KERN_ERR "%s:%d - task_work_cancel() failed? task %p, %d, %s\n",
 			       __FUNCTION__, __LINE__, utrace->task,
 			       utrace->task->tgid,
 			       (utrace->task->comm ? utrace->task->comm
 				: "UNKNOWN"));
+#else
+		stp_task_work_cancel(utrace->task, &utrace_resume);
+#endif
 		utrace->task_work_added = 0;
 	}
 	if (utrace->report_work_added) {
+#ifdef STP_TF_DEBUG
 		if (stp_task_work_cancel(utrace->task, &utrace_report_work) == NULL)
 			printk(KERN_ERR "%s:%d - task_work_cancel() failed? task %p, %d, %s\n",
 			       __FUNCTION__, __LINE__, utrace->task,
 			       utrace->task->tgid,
 			       (utrace->task->comm ? utrace->task->comm
 				: "UNKNOWN"));
+#else
+		stp_task_work_cancel(utrace->task, &utrace_report_work);
+#endif
 		utrace->report_work_added = 0;
 	}
 	spin_unlock(&utrace->lock);
@@ -945,7 +1000,11 @@ static int utrace_set_events(struct task_struct *target,
 	 * If utrace_report_death() is already progress now,
 	 * it's too late to clear the death event bits.
 	 */
-	if (((old_flags & ~events) & _UTRACE_DEATH_EVENTS) && utrace->death)
+	if (target->exit_state &&
+	    (((events & ~old_flags) & _UTRACE_DEATH_EVENTS) ||
+	     (utrace->death &&
+	      ((old_flags & ~events) & _UTRACE_DEATH_EVENTS)) ||
+	     (utrace->reap && ((old_flags & ~events) & UTRACE_EVENT(REAP)))))
 		goto unlock;
 
 	/*
@@ -1035,20 +1094,7 @@ static bool utrace_do_stop(struct task_struct *target, struct utrace *utrace)
 		spin_unlock_irq(&target->sighand->siglock);
 	} else if (utrace->resume > UTRACE_REPORT) {
 		utrace->resume = UTRACE_REPORT;
-		if (! utrace->task_work_added) {
-			int rc = stp_task_work_add(target, &utrace->work);
-			if (rc == 0) {
-				utrace->task_work_added = 1;
-			}
-			/* stp_task_work_add() returns -ESRCH if the task
-			 * has already passed exit_task_work(). Just
-			 * ignore this error. */
-			else if (rc != -ESRCH) {
-				printk(KERN_ERR
-				       "%s:%d - task_work_add() returned %d\n",
-				       __FUNCTION__, __LINE__, rc);
-			}
-		}
+		stp_task_notify_resume(target, utrace);
 	}
 
 	return task_is_traced(target);
@@ -1066,10 +1112,7 @@ static void utrace_wakeup(struct task_struct *target, struct utrace *utrace)
 	    target->signal->group_stop_count)
 		target->state = TASK_STOPPED;
 	else
-		/* FIXME: Had to change wake_up_state() to
-		 * wake_up_process() here (since wake_up_state() isn't
-		 * exported).  Reasonable? */
-		wake_up_process(target);
+		stp_wake_up_state(target, __TASK_TRACED);
 	spin_unlock_irq(&target->sighand->siglock);
 }
 
@@ -1175,20 +1218,9 @@ static void utrace_stop(struct task_struct *task, struct utrace *utrace,
 		 * Ensure a reporting pass when we're resumed.
 		 */
 		utrace->resume = action;
-		if (! utrace->task_work_added) {
-			int rc = stp_task_work_add(task, &utrace->work);
-			if (rc == 0) {
-				utrace->task_work_added = 1;
-			}
-			/* stp_task_work_add() returns -ESRCH if the task
-			 * has already passed exit_task_work(). Just
-			 * ignore this error. */
-			else if (rc != -ESRCH) {
-				printk(KERN_ERR
-				       "%s:%d - stp_task_work_add() returned %d\n",
-				       __FUNCTION__, __LINE__, rc);
-			}
-		}
+		stp_task_notify_resume(task, utrace);
+		if (action == UTRACE_INTERRUPT)
+			set_thread_flag(TIF_SIGPENDING);
 	}
 
 	/*
@@ -1540,21 +1572,7 @@ static int utrace_control(struct task_struct *target,
 		clear_engine_wants_stop(engine);
 		if (action < utrace->resume) {
 			utrace->resume = action;
-			if (! utrace->task_work_added) {
-				ret = stp_task_work_add(target, &utrace->work);
-				if (ret == 0) {
-					utrace->task_work_added = 1;
-				}
-				/* stp_task_work_add() returns -ESRCH if
-				 * the task has already passed
-				 * exit_task_work(). Just ignore this
-				 * error. */
-				else if (ret != -ESRCH) {
-					printk(KERN_ERR
-					       "%s:%d - stp_task_work_add() returned %d\n",
-					       __FUNCTION__, __LINE__, ret);
-				}
-			}
+			stp_task_notify_resume(target, utrace);
 		}
 		break;
 
@@ -1574,6 +1592,7 @@ static int utrace_control(struct task_struct *target,
 		 * we've serialized any later recalc_sigpending() after our
 		 * setting of utrace->resume to force it on.
 		 */
+		stp_task_notify_resume(target, utrace);
 		if (reset) {
 			/*
 			 * This is really just to keep the invariant that
@@ -1583,35 +1602,12 @@ static int utrace_control(struct task_struct *target,
 			 */
 			set_tsk_thread_flag(target, TIF_SIGPENDING);
 		} else {
-			int rc = 0;
-
-			if (! utrace->task_work_added) {
-				rc = stp_task_work_add(target, &utrace->work);
-				/* stp_task_work_add() returns -ESRCH
-				 * if the task has already passed
-				 * exit_task_work(). Just ignore this
-				 * error. */
-				if (rc == 0 || rc == -ESRCH) {
-					utrace->task_work_added = 1;
-					rc = 0;
-				}
-				else {
-					printk(KERN_ERR
-					       "%s:%d - task_work_add() returned %d\n",
-					       __FUNCTION__, __LINE__, rc);
-				}
-			}
-
-			if (likely(rc == 0)) {
-				struct sighand_struct *sighand;
-				unsigned long irqflags;
-
-				sighand = stp_lock_task_sighand(target,
-								&irqflags);
-				if (likely(sighand)) {
-					stp_signal_wake_up(target, 0);
-					unlock_task_sighand(target, &irqflags);
-				}
+			struct sighand_struct *sighand;
+			unsigned long irqflags;
+			sighand = stp_lock_task_sighand(target, &irqflags);
+			if (likely(sighand)) {
+				stp_signal_wake_up(target, 0);
+				unlock_task_sighand(target, &irqflags);
 			}
 		}
 		break;
@@ -1764,20 +1760,9 @@ static void finish_report(struct task_struct *task, struct utrace *utrace,
 	if (resume < utrace->resume) {
 		spin_lock(&utrace->lock);
 		utrace->resume = resume;
-		if (! utrace->task_work_added) {
-			int rc = stp_task_work_add(task, &utrace->work);
-			if (rc == 0) {
-				utrace->task_work_added = 1;
-			}
-			/* stp_task_work_add() returns -ESRCH if the task
-			 * has already passed exit_task_work(). Just
-			 * ignore this error. */
-			else if (rc != -ESRCH) {
-				printk(KERN_ERR
-				       "%s:%d - task_work_add() returned %d\n",
-				       __FUNCTION__, __LINE__, rc);
-			}
-		}
+		stp_task_notify_resume(task, utrace);
+		if (resume == UTRACE_INTERRUPT)
+			set_tsk_thread_flag(task, TIF_SIGPENDING);
 		spin_unlock(&utrace->lock);
 	}
 
@@ -2323,12 +2308,16 @@ static void finish_resume_report(struct task_struct *task,
 		utrace_stop(task, utrace, report->resume_action);
 		break;
 
+	case UTRACE_INTERRUPT:
+		if (!signal_pending(task)) {
+			stp_task_notify_resume(task, utrace);
+			set_tsk_thread_flag(task, TIF_SIGPENDING);
+		}
+		break;
+
 	case UTRACE_REPORT:
 	case UTRACE_RESUME:
 	default:
-#if 0
-		user_disable_single_step(task);
-#endif
 		break;
 	}
 }
@@ -2385,6 +2374,14 @@ static void utrace_resume(struct task_work *work)
 		/* Remember that this task_work_func is finished. */
 		stp_task_work_func_done();
 		return;
+	case UTRACE_INTERRUPT:
+		/*
+		 * Note that UTRACE_INTERRUPT reporting was handled by
+		 * utrace_get_signal() in original utrace. In this
+		 * utrace version, we'll handle it here like UTRACE_REPORT.
+		 *
+		 * Fallthrough...
+		 */
 	case UTRACE_REPORT:
 		if (unlikely(!(utrace->utrace_flags & UTRACE_EVENT(QUIESCE))))
 			break;
diff --git a/testsuite/systemtap.base/process_resume.c b/testsuite/systemtap.base/process_resume.c
new file mode 100644
index 000000000000..ddf8b1eaf795
--- /dev/null
+++ b/testsuite/systemtap.base/process_resume.c
@@ -0,0 +1,10 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <signal.h>
+ 
+int main()
+{
+    kill(getpid(), SIGSTOP);
+    getpid();
+    return 0;
+}
diff --git a/testsuite/systemtap.base/process_resume.exp b/testsuite/systemtap.base/process_resume.exp
new file mode 100644
index 000000000000..0cb4a4322efe
--- /dev/null
+++ b/testsuite/systemtap.base/process_resume.exp
@@ -0,0 +1,82 @@
+# This test answers the following questions:
+#
+# - Can we attach to stopped processes correctly? Before PR?????? fix,
+#   stap would cause a kernel warn.
+#
+# - Does resuming work on those processes? Before PR?????? fix,
+#   processes would get stuck.
+
+set test "process_resume"
+
+# Only run on make installcheck and if we have utrace
+if {![installtest_p]} { untested "$test"; return }
+if {![utrace_p]} { untested "$test : no kernel utrace support found"; return }
+
+# "load" generation function for stap_run.  It waits a bit, tells the
+# test program to continue and then waits a bit.
+proc resume_load {} {
+    global pid
+
+    wait_n_secs 2
+    kill -cont $pid 0
+    wait_n_secs 2
+    return 0
+}
+
+set getpid_script {
+    global getpid_calls = 0
+    probe begin { printf("systemtap starting probe\n") }
+    probe syscall.getpid { getpid_calls++ }
+    probe end { printf("systemtap ending probe\n")
+	printf("getpid calls = %d\n", getpid_calls) }
+}
+# Notice we don't care how many getpid() calls we see.
+set getpid_script_output "getpid calls = \[0-9\]\r\n"
+
+set end_script {
+    global end_probes_fired = 0
+    probe begin { printf("systemtap starting probe\n") }
+    probe process("%s").end { end_probes_fired++ }
+    probe end { printf("systemtap ending probe\n")
+	printf("end probes = %%d\n", end_probes_fired) }
+}
+set end_script_output "end probes = 1\r\n"
+
+# Compile test program.
+set res [target_compile $srcdir/$subdir/${test}.c ${test} executable \
+    "additional_flags=-O2 additional_flags=-g"]
+if { $res != "" } {
+    verbose "target_compile failed: $res" 2
+    fail "${test}: unable to compile ${test}.c"
+    return
+}
+
+# Start the application and put it in the background before each
+# test. It will stop itself and be resumed by the 'resume_load' proc.
+
+set TEST_NAME "${test}-getpid"
+set pid [exec ./$test &]
+#spawn "./$test"
+#set id $spawn_id
+#set pid [exp_pid -i $spawn_id]
+stap_run $TEST_NAME resume_load $getpid_script_output -e $getpid_script
+if {[file isdirectory /proc/$pid]} {
+    kill -KILL $pid 0
+    fail "$TEST_NAME: process didn't resume properly"
+} else {
+    pass "$TEST_NAME: process resumed properly"
+}
+
+set TEST_NAME "${test}-end"
+set pid [exec ./$test &]
+set script [format $end_script $test]
+stap_run $TEST_NAME resume_load $end_script_output -e $script
+if {[file isdirectory /proc/$pid]} {
+    kill -KILL $pid 0
+    fail "$TEST_NAME: process didn't resume properly"
+} else {
+    pass "$TEST_NAME: process resumed properly"
+}
+
+# Cleanup
+if { $verbose == 0 } { catch { exec rm -f $test } }