diff --git a/SOURCES/rhbz1525651.patch b/SOURCES/rhbz1525651.patch
new file mode 100644
index 0000000..c18aeb4
--- /dev/null
+++ b/SOURCES/rhbz1525651.patch
@@ -0,0 +1,361 @@
+commit 8d1ae0139eed2a1c2d118e6d3ea7facb52aab40c
+Author: David Smith <dsmith@redhat.com>
+Date:   Mon Jan 15 10:23:45 2018 -0600
+
+    Fix BZ1525651 by making the task_finder shutdown more robust.
+    
+    * runtime/linux/task_finder.c (__stap_utrace_detach): New function, based
+      on stap_utrace_detach(), but lets the caller know if any the task had
+      the utrace engine attached to it.
+      (stap_utrace_detach): Just call __stap_utrace_detach().
+      (stap_utrace_detach_ops): Perform the detaching in a loop, so if the
+      process list snapshot we operate on becomes out of date, we'll still
+      detach everything.
+      (__stp_utrace_task_finder_report_clone): Move the
+      __stp_tf_handler_start() call to the very start of the function.
+      (__stp_utrace_task_finder_report_exec): Ditto.
+      (__stp_utrace_task_finder_target_exec): Ditto.
+      (__stp_utrace_task_finder_target_exit): Ditto.
+      (__stp_utrace_task_finder_target_quiesce): Ditto.
+      (__stp_utrace_task_finder_target_syscall_entry): Ditto.
+      (__stp_utrace_task_finder_target_syscall_exit): Ditto.
+
+diff --git a/runtime/linux/task_finder.c b/runtime/linux/task_finder.c
+index 69c27fe..3346f2e 100644
+--- a/runtime/linux/task_finder.c
++++ b/runtime/linux/task_finder.c
+@@ -283,13 +283,17 @@ stap_register_task_finder_target(struct stap_task_finder_target *new_tgt)
+ }
+ 
+ static int
+-stap_utrace_detach(struct task_struct *tsk,
+-		   const struct utrace_engine_ops *ops)
++__stap_utrace_detach(struct task_struct *tsk,
++		     const struct utrace_engine_ops *ops,
++		     int *ops_matched)
+ {
+ 	struct utrace_engine *engine;
+ 	struct mm_struct *mm;
+ 	int rc = 0;
+ 
++	if (ops_matched)
++		*ops_matched = 0;
++
+ 	// Ignore invalid tasks.
+ 	if (tsk == NULL || tsk->pid <= 0)
+ 		return 0;
+@@ -331,6 +335,8 @@ stap_utrace_detach(struct task_struct *tsk,
+ 		rc = EFAULT;
+ 	}
+ 	else {
++		if (ops_matched)
++			*ops_matched = 1;
+ 		rc = utrace_control(tsk, engine, UTRACE_DETACH);
+ 		switch (rc) {
+ 		case 0:			/* success */
+@@ -363,6 +369,13 @@ stap_utrace_detach(struct task_struct *tsk,
+ 	return rc;
+ }
+ 
++static int
++stap_utrace_detach(struct task_struct *tsk,
++		   const struct utrace_engine_ops *ops)
++{
++	return __stap_utrace_detach(tsk, ops, NULL);
++}
++
+ static void
+ stap_utrace_detach_ops(struct utrace_engine_ops *ops)
+ {
+@@ -370,42 +383,66 @@ stap_utrace_detach_ops(struct utrace_engine_ops *ops)
+ 	struct utrace_engine *engine;
+ 	pid_t pid = 0;
+ 	int rc = 0;
+-
+-	// Notice we're not calling get_task_mm() in this loop. In
+-	// every other instance when calling do_each_thread, we avoid
+-	// tasks with no mm, because those are kernel threads.  So,
+-	// why is this function different?  When a thread is in the
+-	// process of dying, its mm gets freed.  Then, later the
+-	// thread gets in the dying state and the thread's
+-	// UTRACE_EVENT(DEATH) event handler gets called (if any).
++	int ops_matched;
++	int iterations = 0;
++
++	// We're going to do the detach in a loop. Why?
++	// do_each_thread() presents a snapshot of the list of
++	// processes on the system. If the system is currently
++	// creating lots of processes, we'll miss some during this
++	// loop (especially since utrace_barrier() can wait for a
++	// handler to finish).
+ 	//
+-	// If a thread is in this "mortally wounded" state - no mm
+-	// but not dead - and at that moment this function is called,
+-	// we'd miss detaching from it if we were checking to see if
+-	// it had an mm.
+-
+-	rcu_read_lock();
+-	do_each_thread(grp, tsk) {
++	// So, we'll repeat the loop until we longer find any
++	// processes that have this ops attached to it.
++	do {
++		// Notice we're not calling get_task_mm() in this
++		// loop. In every other instance when calling
++		// do_each_thread, we avoid tasks with no mm, because
++		// those are kernel threads.  So, why is this function
++		// different?  When a thread is in the process of
++		// dying, its mm gets freed.  Then, later the thread
++		// gets in the dying state and the thread's
++		// UTRACE_EVENT(DEATH) event handler gets called (if
++		// any).
++		//
++		// If a thread is in this "mortally wounded" state -
++		// no mm but not dead - and at that moment this
++		// function is called, we'd miss detaching from it if
++		// we were checking to see if it had an mm.
++
++		ops_matched = 0;
++		rcu_read_lock();
++		do_each_thread(grp, tsk) {
++			int matched = 0;
+ #ifdef PF_KTHREAD
+-		// Ignore kernel threads.  On systems without
+-		// PF_KTHREAD, we're ok, since kernel threads won't be
+-		// matched by the stap_utrace_detach() call.
+-		if (tsk->flags & PF_KTHREAD)
+-			continue;
++			// Ignore kernel threads.  On systems without
++			// PF_KTHREAD, we're ok, since kernel threads
++			// won't be matched by the
++			// __stap_utrace_detach() call.
++			if (tsk->flags & PF_KTHREAD)
++				continue;
+ #endif
+ 
+-		/* Notice we're purposefully ignoring errors from
+-		 * stap_utrace_detach().  Even if we got an error on
+-		 * this task, we need to keep detaching from other
+-		 * tasks.  But warn, we might be unloading and dangling
+-		 * engines are bad news. */
+-		rc = stap_utrace_detach(tsk, ops);
+-		if (rc != 0)
+-			_stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid);
+-		WARN_ON(rc != 0);
+-	} while_each_thread(grp, tsk);
+-	rcu_read_unlock();
+-	debug_task_finder_report();
++			/* Notice we're purposefully ignoring errors
++			 * from __stap_utrace_detach().  Even if we
++			 * got an error on this task, we need to keep
++			 * detaching from other tasks.  But warn, we
++			 * might be unloading and dangling engines are
++			 * bad news. */
++			rc = __stap_utrace_detach(tsk, ops, &matched);
++			if (rc != 0)
++				_stp_error("stap_utrace_detach returned error %d on pid %d", rc, tsk->pid);
++			WARN_ON(rc != 0);
++			ops_matched |= matched;
++		} while_each_thread(grp, tsk);
++		rcu_read_unlock();
++		debug_task_finder_report();
++		iterations++;
++	} while (ops_matched && iterations < 10);
++#ifdef DEBUG_TASK_FINDER
++	_stp_dbug(__FUNCTION__, __LINE__, "took %d attempts\n", iterations);
++#endif
+ }
+ 
+ static void
+@@ -980,13 +1017,13 @@ __stp_utrace_task_finder_report_clone(enum utrace_resume_action action,
+ #endif
+ 	int rc;
+ 
++	__stp_tf_handler_start();
+ 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ 		debug_task_finder_detach();
++		__stp_tf_handler_end();
+ 		return UTRACE_DETACH;
+ 	}
+ 
+-	__stp_tf_handler_start();
+-
+ 	// On clone, attach to the child.
+ 	rc = __stp_utrace_attach(child, engine->ops, 0,
+ 				 __STP_TASK_FINDER_EVENTS, UTRACE_RESUME);
+@@ -1034,13 +1071,13 @@ __stp_utrace_task_finder_report_exec(enum utrace_resume_action action,
+ 	struct stap_task_finder_target *tgt;
+ 	int found_node = 0;
+ 
++	__stp_tf_handler_start();
+ 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ 		debug_task_finder_detach();
++		__stp_tf_handler_end();
+ 		return UTRACE_DETACH;
+ 	}
+ 
+-	__stp_tf_handler_start();
+-
+ 	// If the original task was "interesting",
+ 	// __stp_utrace_task_finder_target_exec() will handle calling
+ 	// callbacks. 
+@@ -1101,13 +1138,13 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action,
+ 	struct stap_task_finder_target *tgt = engine->data;
+ 	int rc;
+ 
++	__stp_tf_handler_start();
+ 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ 		debug_task_finder_detach();
++		__stp_tf_handler_end();
+ 		return UTRACE_DETACH;
+ 	}
+ 
+-	__stp_tf_handler_start();
+-
+ 	// We'll hardcode this as a process end.  If a thread
+ 	// calls exec() (which it isn't supposed to), the kernel
+ 	// "promotes" it to being a process.  Call the callbacks.
+@@ -1123,8 +1160,8 @@ __stp_utrace_task_finder_target_exec(enum utrace_resume_action action,
+ 	// __stp_utrace_attach_match_tsk() to figure out if the
+ 	// exec'ed program is "interesting".
+ 
+-	__stp_tf_handler_end();
+ 	debug_task_finder_detach();
++	__stp_tf_handler_end();
+ 	return UTRACE_DETACH;
+ }
+ 
+@@ -1144,12 +1181,13 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine,
+ #endif
+ 	struct stap_task_finder_target *tgt = engine->data;
+ 
++	__stp_tf_handler_start();
+ 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ 		debug_task_finder_detach();
++		__stp_tf_handler_end();
+ 		return UTRACE_DETACH;
+ 	}
+ 
+-	__stp_tf_handler_start();
+ 	// The first implementation of this added a
+ 	// UTRACE_EVENT(DEATH) handler to
+ 	// __stp_utrace_task_finder_ops.  However, dead threads don't
+@@ -1163,8 +1201,8 @@ __stp_utrace_task_finder_target_exit(struct utrace_engine *engine,
+ 		__stp_call_callbacks(tgt, tsk, 0, (tsk->pid == tsk->tgid));
+ 	}
+ 
+-	__stp_tf_handler_end();
+ 	debug_task_finder_detach();
++	__stp_tf_handler_end();
+ 	return UTRACE_DETACH;
+ }
+ 
+@@ -1332,18 +1370,19 @@ __stp_utrace_task_finder_target_quiesce(enum utrace_resume_action action,
+ 	struct stap_task_finder_target *tgt = engine->data;
+ 	int rc;
+ 
++	__stp_tf_handler_start();
+ 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ 		debug_task_finder_detach();
++		__stp_tf_handler_end();
+ 		return UTRACE_DETACH;
+ 	}
+ 
+ 	if (tgt == NULL || tsk == NULL) {
+ 		debug_task_finder_detach();
++		__stp_tf_handler_end();
+ 		return UTRACE_DETACH;
+ 	}
+ 
+-	__stp_tf_handler_start();
+-
+ 	// Turn off quiesce handling
+ 	rc = utrace_set_events(tsk, engine,
+ 			       __STP_ATTACHED_TASK_BASE_EVENTS(tgt));
+@@ -1418,13 +1457,17 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
+ 	int is_mprotect = 0;
+ 	int is_munmap = 0;
+ 
++	__stp_tf_handler_start();
+ 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ 		debug_task_finder_detach();
++		__stp_tf_handler_end();
+ 		return UTRACE_DETACH;
+ 	}
+ 
+-	if (tgt == NULL)
++	if (tgt == NULL) {
++		__stp_tf_handler_end();
+ 		return UTRACE_RESUME;
++	}
+ 
+ 	// See if syscall is one we're interested in.  On x86_64, this
+ 	// is a potentially expensive operation (since we have to
+@@ -1442,20 +1485,23 @@ __stp_utrace_task_finder_target_syscall_entry(enum utrace_resume_action action,
+ 				     ? 1 : 0);
+ 		}
+ 	}
+-	if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap)
++	if (!is_mmap_or_mmap2 && !is_mprotect && !is_munmap) {
++		__stp_tf_handler_end();
+ 		return UTRACE_RESUME;
++	}
+ 
+ 	// The syscall is one we're interested in, but do we have a
+ 	// handler for it?
+ 	if ((is_mmap_or_mmap2 && tgt->mmap_events == 0)
+ 	    || (is_mprotect && tgt->mprotect_events == 0)
+-	    || (is_munmap && tgt->munmap_events == 0))
++	    || (is_munmap && tgt->munmap_events == 0)) {
++		__stp_tf_handler_end();
+ 		return UTRACE_RESUME;
++	}
+ 
+ 	// Save the needed arguments.  Note that for mmap, we really
+ 	// just need the return value, so there is no need to save
+ 	// any arguments.
+-	__stp_tf_handler_start();
+ 	if (is_munmap) {
+ 		// We need 2 arguments for munmap()
+ 		syscall_get_arguments(tsk, regs, 0, 2, args);
+@@ -1501,23 +1547,28 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
+ 	unsigned long rv;
+ 	struct __stp_tf_map_entry *entry;
+ 
++	__stp_tf_handler_start();
+ 	if (atomic_read(&__stp_task_finder_state) != __STP_TF_RUNNING) {
+ 		debug_task_finder_detach();
++		__stp_tf_handler_end();
+ 		return UTRACE_DETACH;
+ 	}
+ 
+-	if (tgt == NULL)
++	if (tgt == NULL) {
++		__stp_tf_handler_end();
+ 		return UTRACE_RESUME;
++	}
+ 
+ 	// See if we can find saved syscall info.  If we can, it must
+ 	// be one of the syscalls we are interested in (and we must
+ 	// have callbacks to call for it).
+ 	entry = __stp_tf_get_map_entry(tsk);
+-	if (entry == NULL)
++	if (entry == NULL) {
++		__stp_tf_handler_end();
+ 		return UTRACE_RESUME;
++	}
+ 
+ 	// Get return value
+-	__stp_tf_handler_start();
+ 	rv = syscall_get_return_value(tsk, regs);
+ 
+ 	dbug_task_vma(1,
+@@ -1547,8 +1598,8 @@ __stp_utrace_task_finder_target_syscall_exit(enum utrace_resume_action action,
+ 					      entry->arg1, entry->arg2);
+ 	}
+ 
+-	__stp_tf_handler_end();
+ 	__stp_tf_remove_map_entry(entry);
++	__stp_tf_handler_end();
+ 	return UTRACE_RESUME;
+ }
+ 
diff --git a/SPECS/systemtap.spec b/SPECS/systemtap.spec
index 1add99d..78cde03 100644
--- a/SPECS/systemtap.spec
+++ b/SPECS/systemtap.spec
@@ -61,7 +61,7 @@
 
 Name: %{?scl_prefix}systemtap
 Version: 3.1
-Release: 3s%{?dist}
+Release: 4s%{?dist}
 # for version, see also configure.ac
 
 # NB Patch1 is for elfutils, further below
@@ -178,6 +178,7 @@ BuildRequires: python3-devel
 BuildRequires: python3-setuptools
 %endif
 
+Patch10: rhbz1525651.patch
 
 # Install requirements
 Requires: %{?scl_prefix}systemtap-client = %{version}-%{release}
@@ -466,6 +467,8 @@ find . \( -name configure -o -name config.h.in \) -print | xargs touch
 cd ..
 %endif
 
+%patch10 -p1
+
 %build
 
 %if %{with_bundled_elfutils}
@@ -561,7 +564,10 @@ cd ..
 CPPFLAGS="-I%{_includedir} -I%{_includedir}/dyninst %{optflags}"
 export CPPFLAGS
 #LDFLAGS="-L%{_libdir}/dyninst"
-LDFLAGS="-L%{_libdir} -L%{_libdir}/dyninst -L%{_libdir}/elfutils"
+#
+# -rpath-link needed because new dts dyninst doesn't get its shlibs into the ld.so.cache
+# directories, so ld doesn't find them.
+LDFLAGS="-L%{_libdir} -Wl,-rpath-link,%{_libdir}/dyninst -L%{_libdir}/dyninst -L%{_libdir}/elfutils"
 export LDFLAGS
 
 %if %{with_virthost}
@@ -1148,6 +1154,9 @@ done
 #   http://sourceware.org/systemtap/wiki/SystemTapReleases
 
 %changelog
+* Tue Feb 27 2018 Frank Ch. Eigler <fche@redhat.com> - 3.0-4s
+- respin for newer dyninst
+
 * Thu Oct 05 2017 Frank Ch. Eigler <fche@redhat.com> - 3.0-3s
 - restore ppc64 dyninst option; available on .el7 variant