Blame SOURCES/rhbz1847676,1857749.patch

fce8b1
commit 11c39a7375bd2759b53b89236e755c91a4f5aad8
fce8b1
Author: Frank Ch. Eigler <fche@redhat.com>
fce8b1
Date:   Tue Jun 16 20:35:53 2020 -0400
fce8b1
fce8b1
    RHBZ1847676: uprobes-inode tweaks redux
fce8b1
    
fce8b1
    Added (back) a spinlock to manage the stapiu_consumer -> process_list
fce8b1
    structure, since it is occasionally travered from uprobe pre-handlers,
fce8b1
    which are sometimes entered in atomic context (e.g. on rhel7).  There,
fce8b1
    the normal mutex_t is unsafe.  So restoring a spinlock_t just for
fce8b1
    those shortlived traversals, rhel7 and rawhide are both happy.
fce8b1
fce8b1
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
fce8b1
index 156360e..922c9f1 100644
fce8b1
--- a/runtime/linux/uprobes-inode.c
fce8b1
+++ b/runtime/linux/uprobes-inode.c
fce8b1
@@ -143,7 +143,8 @@ struct stapiu_consumer {
fce8b1
   struct list_head instance_list_head; // the resulting uprobe instances for this consumer
fce8b1
 
fce8b1
   struct list_head process_list_head; // the processes for this consumer
fce8b1
-
fce8b1
+  spinlock_t process_list_lock; // protect list; used briefly from even atomic contexts
fce8b1
+        
fce8b1
   // List of perf counters used by each probe
fce8b1
   // This list is an index into struct stap_perf_probe,
fce8b1
   long perf_counters_dim;
fce8b1
@@ -174,16 +175,19 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
fce8b1
 
fce8b1
       // First find the related process, set by stapiu_change_plus.
fce8b1
       // NB: This is a linear search performed for every probe hit!
fce8b1
-      // This could be an algorithmic problem if the list gets large, but
fce8b1
-      // we'll wait until this is demonstratedly a hotspot before optimizing.
fce8b1
-      mutex_lock(&c->consumer_lock);
fce8b1
+      // This could be an algorithmic problem if the list gets large,
fce8b1
+      // but we'll wait until this is demonstratedly a hotspot before
fce8b1
+      // optimizing.  NB: on rhel7 sometimes we're invoked from atomic
fce8b1
+      // context, so must be careful to use the spinlock, not the
fce8b1
+      // mutex.
fce8b1
+      spin_lock(&c->process_list_lock);
fce8b1
       list_for_each_entry(p, &c->process_list_head, process_list) {
fce8b1
 	if (p->tgid == current->tgid) {
fce8b1
 	  process = p;
fce8b1
 	  break;
fce8b1
 	}
fce8b1
       }
fce8b1
-      mutex_unlock(&c->consumer_lock);
fce8b1
+      spin_unlock(&c->process_list_lock);
fce8b1
       if (!process) {
fce8b1
 #ifdef UPROBE_HANDLER_REMOVE
fce8b1
 	/* Once we're past the starting phase, we can be sure that any
fce8b1
@@ -344,7 +348,7 @@ static void
fce8b1
 stapiu_decrement_semaphores(struct stapiu_consumer *consumers, size_t nconsumers)
fce8b1
 {
fce8b1
   size_t i;
fce8b1
-  /* NB: no stapiu_process_slots_lock needed, as the task_finder engine is
fce8b1
+  /* NB: no process_list_lock use needed as the task_finder engine is
fce8b1
    * already stopped by now, so no one else will mess with us.  We need
fce8b1
    * to be sleepable for access_process_vm.  */
fce8b1
   for (i = 0; i < nconsumers; ++i) {
fce8b1
@@ -433,7 +437,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers)
fce8b1
     INIT_LIST_HEAD(&c->instance_list_head);
fce8b1
     INIT_LIST_HEAD(&c->process_list_head);
fce8b1
     mutex_init(&c->consumer_lock);
fce8b1
-
fce8b1
+    spin_lock_init(&c->process_list_lock);
fce8b1
+    
fce8b1
     dbug_uprobes("registering task-finder for procname:%s buildid:%s\n",
fce8b1
 		 ((char*)c->finder.procname ?: (char*)""),
fce8b1
 		 ((char*)c->finder.build_id ?: (char*)""));
fce8b1
@@ -560,7 +565,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
    * calls us in this case with relocation=offset=0, so
fce8b1
    * we don't have to worry about it.  */
fce8b1
   p->base = relocation - offset;
fce8b1
+  spin_lock (&c->process_list_lock);
fce8b1
   list_add(&p->process_list, &c->process_list_head);
fce8b1
+  spin_unlock (&c->process_list_lock);
fce8b1
 
fce8b1
   rc = 0;
fce8b1
   mutex_unlock(&c->consumer_lock);
fce8b1
@@ -587,28 +594,40 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
fce8b1
 {
fce8b1
   int rc = 0;
fce8b1
   struct stapiu_process *p;
fce8b1
+  int any_found;
fce8b1
   
fce8b1
   if (! c->sdt_sem_offset) // nothing to do
fce8b1
     return 0;
fce8b1
 
fce8b1
-  /* NB: no lock after this point, as we need to be sleepable for
fce8b1
-   * get/put_user semaphore action.  The given process should be frozen
fce8b1
-   * while we're busy, so it's not an issue.
fce8b1
-   */
fce8b1
-
fce8b1
-  mutex_lock(&c->consumer_lock);
fce8b1
-
fce8b1
+  // NB: we mustn't hold a lock while changing the task memory,
fce8b1
+  // but we need a lock to protect the process_list from concurrent
fce8b1
+  // add/delete.  So hold a spinlock during iteration until the first
fce8b1
+  // hit, then unlock & process.  NB: We could in principle have multiple
fce8b1
+  // instances of the same process in the list (e.g., if the process
fce8b1
+  // somehow maps in the same solib multiple times).  We can't easily
fce8b1
+  // both iterate this list (in a spinlock-protected safe way), and
fce8b1
+  // relax the spinlock enough to do a safe stapiu_write_task_semaphore()
fce8b1
+  // call within the loop.  So we will hit only the copy in our list.
fce8b1
+  any_found = 0;
fce8b1
+  spin_lock(&c->process_list_lock);
fce8b1
   /* Look through all the consumer's processes and increment semaphores.  */
fce8b1
   list_for_each_entry(p, &c->process_list_head, process_list) {
fce8b1
     unsigned long addr = p->base + c->sdt_sem_offset;
fce8b1
     if (addr >= relocation && addr < relocation + length) {
fce8b1
-      int rc2 = stapiu_write_task_semaphore(task, addr, +1);
fce8b1
+      int rc2;
fce8b1
+      // unlock list and process write for this entry
fce8b1
+      spin_unlock(&c->process_list_lock);
fce8b1
+      any_found=1;
fce8b1
+      rc2 = stapiu_write_task_semaphore(task, addr, +1);
fce8b1
       if (!rc)
fce8b1
-	rc = rc2;
fce8b1
+        rc = rc2;
fce8b1
+      break; // exit list_for_each loop
fce8b1
     }
fce8b1
   }
fce8b1
-
fce8b1
-  mutex_unlock(&c->consumer_lock);
fce8b1
+  if (! any_found)
fce8b1
+    spin_unlock(&c->process_list_lock);
fce8b1
+  else
fce8b1
+    ; // already unlocked
fce8b1
 
fce8b1
   return rc;
fce8b1
 }
fce8b1
@@ -635,8 +654,9 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
   //   process is dying anyway
fce8b1
   // - the stapiu_consumer's process_list linked list will have a record
fce8b1
   //   of the dead process: well, not great, it'll be cleaned up eventually,
fce8b1
-  //   and cleaning it up NOW is tricky - need some spin lock to protect the list,
fce8b1
-  //   but not out sleepy mutex:
fce8b1
+  //   and cleaning it up NOW is tricky - we could use the process_list_lock
fce8b1
+  //   to protect the list (as done in stapiu_change_semaphore_plus),
fce8b1
+  //   but not our sleepy mutex:
fce8b1
   //
fce8b1
   // [ 1955.410237]  ? stapiu_change_minus+0x38/0xf0 [stap_54a723c01c50d972590a5c901516849_15522]
fce8b1
   // [ 1955.411583]  __mutex_lock+0x35/0x820
fce8b1
fce8b1
commit 4ccdfe4536d702612912e96d7b6278b169917eaa
fce8b1
Author: Frank Ch. Eigler <fche@redhat.com>
fce8b1
Date:   Mon Jul 6 13:27:46 2020 -0400
fce8b1
fce8b1
    RHBZ1847676 cont'd: more uprobes-inode/onthefly concurrency controls
fce8b1
    
fce8b1
    The systemtap.onthefly/*.exp tests had recently become hang-prone on
fce8b1
    some kernels, for reasons still not completely understood.  This set
fce8b1
    of patches adds:
fce8b1
    
fce8b1
    - irq*-block spinlocks into uprobes-invoked paths, in case there is
fce8b1
      peculiar reentrancy (from irq-related tracepoints)
fce8b1
    
fce8b1
    - a mutex lock/unlock into the stapiu_exit() path, in case there is
fce8b1
      a concurrent stapiu_refresh() invoked by onthefly machinery around
fce8b1
      exit time
fce8b1
    
fce8b1
    - restrictions into the onthefly module_refresh() translator code to
fce8b1
      preclude STAP_SESSION_STOPPING as a time to do any sort of refresh
fce8b1
      operation.  Now probes that were disarmed will stay disarmed during
fce8b1
      probe-end/error/etc. processing, which is always valid with the
fce8b1
      spec, and avoids a class of late module-refresh ops
fce8b1
    
fce8b1
    Testing on rhel7 and rawhide indicates the reproducible hang is gone.
fce8b1
    Our testsuite already tortures this code; invoke by hand via:
fce8b1
    
fce8b1
    % sudo make installcheck RUNTESTFLAGS="-v affection.exp hrtimer_onthefly.exp kprobes_onthefly.exp tracepoint_onthefly.exp uprobes_onthefly.exp"
fce8b1
fce8b1
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
fce8b1
index 922c9f1..3de7281 100644
fce8b1
--- a/runtime/linux/uprobes-inode.c
fce8b1
+++ b/runtime/linux/uprobes-inode.c
fce8b1
@@ -172,6 +172,7 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
fce8b1
   if (_stp_target) // need we filter by pid at all?
fce8b1
     {
fce8b1
       struct stapiu_process *p, *process = NULL;
fce8b1
+      unsigned long flags;
fce8b1
 
fce8b1
       // First find the related process, set by stapiu_change_plus.
fce8b1
       // NB: This is a linear search performed for every probe hit!
fce8b1
@@ -180,14 +181,14 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
fce8b1
       // optimizing.  NB: on rhel7 sometimes we're invoked from atomic
fce8b1
       // context, so must be careful to use the spinlock, not the
fce8b1
       // mutex.
fce8b1
-      spin_lock(&c->process_list_lock);
fce8b1
+      spin_lock_irqsave(&c->process_list_lock, flags);
fce8b1
       list_for_each_entry(p, &c->process_list_head, process_list) {
fce8b1
 	if (p->tgid == current->tgid) {
fce8b1
 	  process = p;
fce8b1
 	  break;
fce8b1
 	}
fce8b1
       }
fce8b1
-      spin_unlock(&c->process_list_lock);
fce8b1
+      spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
       if (!process) {
fce8b1
 #ifdef UPROBE_HANDLER_REMOVE
fce8b1
 	/* Once we're past the starting phase, we can be sure that any
fce8b1
@@ -398,7 +399,7 @@ static void
fce8b1
 stapiu_consumer_refresh(struct stapiu_consumer *c)
fce8b1
 {
fce8b1
   struct stapiu_instance *inst;
fce8b1
-
fce8b1
+  
fce8b1
   mutex_lock(& c->consumer_lock);
fce8b1
 
fce8b1
   list_for_each_entry(inst, &c->instance_list_head, instance_list) {
fce8b1
@@ -420,7 +421,10 @@ stapiu_exit(struct stapiu_consumer *consumers, size_t nconsumers)
fce8b1
   stapiu_decrement_semaphores(consumers, nconsumers);
fce8b1
   for (i = 0; i < nconsumers; ++i) {
fce8b1
     struct stapiu_consumer *c = &consumers[i];
fce8b1
+    // protect against conceivable stapiu_refresh() at same time
fce8b1
+    mutex_lock(& c->consumer_lock);
fce8b1
     stapiu_consumer_unreg(c);
fce8b1
+    mutex_unlock(& c->consumer_lock);
fce8b1
     /* NB: task_finder needs no unregister. */
fce8b1
   }
fce8b1
 }
fce8b1
@@ -480,6 +484,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
   struct stapiu_instance *inst = NULL;
fce8b1
   struct stapiu_process *p;
fce8b1
   int j;
fce8b1
+  unsigned long flags;
fce8b1
 
fce8b1
   if (! inode) {
fce8b1
       rc = -EINVAL;
fce8b1
@@ -565,9 +570,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
    * calls us in this case with relocation=offset=0, so
fce8b1
    * we don't have to worry about it.  */
fce8b1
   p->base = relocation - offset;
fce8b1
-  spin_lock (&c->process_list_lock);
fce8b1
+  spin_lock_irqsave (&c->process_list_lock, flags);
fce8b1
   list_add(&p->process_list, &c->process_list_head);
fce8b1
-  spin_unlock (&c->process_list_lock);
fce8b1
+  spin_unlock_irqrestore (&c->process_list_lock, flags);
fce8b1
 
fce8b1
   rc = 0;
fce8b1
   mutex_unlock(&c->consumer_lock);
fce8b1
@@ -595,6 +600,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
fce8b1
   int rc = 0;
fce8b1
   struct stapiu_process *p;
fce8b1
   int any_found;
fce8b1
+  unsigned long flags;
fce8b1
   
fce8b1
   if (! c->sdt_sem_offset) // nothing to do
fce8b1
     return 0;
fce8b1
@@ -609,14 +615,14 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
fce8b1
   // relax the spinlock enough to do a safe stapiu_write_task_semaphore()
fce8b1
   // call within the loop.  So we will hit only the copy in our list.
fce8b1
   any_found = 0;
fce8b1
-  spin_lock(&c->process_list_lock);
fce8b1
+  spin_lock_irqsave(&c->process_list_lock, flags);
fce8b1
   /* Look through all the consumer's processes and increment semaphores.  */
fce8b1
   list_for_each_entry(p, &c->process_list_head, process_list) {
fce8b1
     unsigned long addr = p->base + c->sdt_sem_offset;
fce8b1
     if (addr >= relocation && addr < relocation + length) {
fce8b1
       int rc2;
fce8b1
       // unlock list and process write for this entry
fce8b1
-      spin_unlock(&c->process_list_lock);
fce8b1
+      spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
       any_found=1;
fce8b1
       rc2 = stapiu_write_task_semaphore(task, addr, +1);
fce8b1
       if (!rc)
fce8b1
@@ -625,7 +631,7 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
fce8b1
     }
fce8b1
   }
fce8b1
   if (! any_found)
fce8b1
-    spin_unlock(&c->process_list_lock);
fce8b1
+    spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
   else
fce8b1
     ; // already unlocked
fce8b1
 
fce8b1
diff --git a/translate.cxx b/translate.cxx
fce8b1
index 10b3d32..b10af5a 100644
fce8b1
--- a/translate.cxx
fce8b1
+++ b/translate.cxx
fce8b1
@@ -2144,19 +2144,13 @@ c_unparser::emit_module_refresh ()
fce8b1
     o->newline() << "mutex_lock(&module_refresh_mutex);";
fce8b1
 
fce8b1
   /* If we're not in STARTING/RUNNING state, don't try doing any work.
fce8b1
-     PR16766 */
fce8b1
+     PR16766.  We don't want to run refresh ops during e.g. STOPPING,
fce8b1
+     so as to possibly activate uprobes near shutdown. */
fce8b1
   o->newline() << "state = atomic_read (session_state());";
fce8b1
-  o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING && state != STAP_SESSION_ERROR) {";
fce8b1
-  // cannot _stp_warn etc. since we're not in probe context
fce8b1
-  o->newline(1) << "#if defined(__KERNEL__)";
fce8b1
-  o->newline() << "if (state != STAP_SESSION_STOPPING)";
fce8b1
-  o->newline(1) << "printk (KERN_ERR \"stap module notifier triggered in unexpected state %d\\n\", state);";
fce8b1
-  o->indent(-1);
fce8b1
-  o->newline() << "#endif";
fce8b1
-
fce8b1
+  o->newline() << "if (state != STAP_SESSION_RUNNING && state != STAP_SESSION_STARTING) {";
fce8b1
+  o->newline(1);
fce8b1
   if (!session->runtime_usermode_p())
fce8b1
     o->newline() << "mutex_unlock(&module_refresh_mutex);";
fce8b1
-
fce8b1
   o->newline() << "return;";
fce8b1
   o->newline(-1) << "}";
fce8b1
 
fce8b1
fce8b1
commit 046fa017d2ab7fea1a4ba2295c31f768c072855e
fce8b1
Author: Frank Ch. Eigler <fche@redhat.com>
fce8b1
Date:   Sun Jul 12 09:57:15 2020 -0400
fce8b1
fce8b1
    RHBZ1847676 cont'd: one more uprobes-inode/onthefly concurrency control
fce8b1
    
fce8b1
    In uprobes-inode.c (stapiu_change_plus), the runtime can react to
fce8b1
    arrivals of new mappings of a solib or executable by registering new
fce8b1
    uprobes.  Due to an assumption that this could not happen at
fce8b1
    inconvenient times (such as a stapiu_refresh or near shutdown times),
fce8b1
    the actual uprobes registration operation was done outside the
fce8b1
    consumer_lock mutex being held.  But it appears this can happen at bad
fce8b1
    times, so the mutex needs to be held, just like within
fce8b1
    stapiu_consumer_refresh().
fce8b1
    
fce8b1
    The onthefly tests now survive iterating testing on rawhide+lockdep
fce8b1
    and rhel7+lockdep.
fce8b1
fce8b1
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
fce8b1
index 3de7281..01c8a07 100644
fce8b1
--- a/runtime/linux/uprobes-inode.c
fce8b1
+++ b/runtime/linux/uprobes-inode.c
fce8b1
@@ -575,12 +575,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
   spin_unlock_irqrestore (&c->process_list_lock, flags);
fce8b1
 
fce8b1
   rc = 0;
fce8b1
-  mutex_unlock(&c->consumer_lock);
fce8b1
-  
fce8b1
   // Register actual uprobe if cond_enabled right now
fce8b1
   if (c->probe->cond_enabled)
fce8b1
     (void) stapiu_register(inst, c);
fce8b1
-  goto out;
fce8b1
+  goto out1;
fce8b1
 
fce8b1
  out2:
fce8b1
   _stp_kfree(inst);
fce8b1
fce8b1
commit a9a0131eb59e8abc197d3d2a553a86bcdec3dd70
fce8b1
Author: Frank Ch. Eigler <fche@redhat.com>
fce8b1
Date:   Fri Jul 17 22:33:04 2020 -0400
fce8b1
fce8b1
    rhbz1857749: uprobes-inode regression in sdt semaphore setting
fce8b1
    
fce8b1
    Previous code neglected to set sdt.h semaphores for more than the
fce8b1
    first process systemtap happened to encounter.  This was from a
fce8b1
    mistaken understanding of what it meant for stapiu_change_plus() to be
fce8b1
    called with the same inode/consumer combination.  Even though uprobes
fce8b1
    are automatically shared, each new process still needs its perfctr and
fce8b1
    sdt-semaphores individually set, so we do that now (as before the
fce8b1
    rework of this code).  Mechanized testing incoming shortly.
fce8b1
fce8b1
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
fce8b1
index 01c8a07..de81839 100644
fce8b1
--- a/runtime/linux/uprobes-inode.c
fce8b1
+++ b/runtime/linux/uprobes-inode.c
fce8b1
@@ -190,6 +190,10 @@ stapiu_probe_prehandler (struct uprobe_consumer *inst, struct pt_regs *regs)
fce8b1
       }
fce8b1
       spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
       if (!process) {
fce8b1
+        /* We know that we're in -c/-x mode, but this process is not
fce8b1
+           in the process hierarchy, so the uprobe should be ignored
fce8b1
+           and future hits prevented.  PR15278
fce8b1
+        */
fce8b1
 #ifdef UPROBE_HANDLER_REMOVE
fce8b1
 	/* Once we're past the starting phase, we can be sure that any
fce8b1
 	 * processes which are executing code in a mapping have already
fce8b1
@@ -242,8 +246,8 @@ stapiu_register (struct stapiu_instance* inst, struct stapiu_consumer* c)
fce8b1
 	       (unsigned long) inst->inode->i_ino,
fce8b1
 	       (void*) (uintptr_t) c->offset,
fce8b1
 	       c->probe->index,
fce8b1
-	       ((char*)c->finder.procname ?: (char*)""),
fce8b1
-	       ((char*)c->finder.build_id ?: (char*)""));
fce8b1
+	       ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
fce8b1
+               ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));
fce8b1
 
fce8b1
   if (!c->return_p) {
fce8b1
     inst->kconsumer.handler = stapiu_probe_prehandler;
fce8b1
@@ -444,8 +448,8 @@ stapiu_init(struct stapiu_consumer *consumers, size_t nconsumers)
fce8b1
     spin_lock_init(&c->process_list_lock);
fce8b1
     
fce8b1
     dbug_uprobes("registering task-finder for procname:%s buildid:%s\n",
fce8b1
-		 ((char*)c->finder.procname ?: (char*)""),
fce8b1
-		 ((char*)c->finder.build_id ?: (char*)""));
fce8b1
+                 ((char*)c->finder.procname ?: ""),
fce8b1
+                 ((char*)c->finder.build_id ?: ""));
fce8b1
 
fce8b1
     ret = stap_register_task_finder_target(&c->finder);
fce8b1
     if (ret != 0) {
fce8b1
@@ -499,22 +503,22 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
   if (rc)
fce8b1
     goto out;
fce8b1
 
fce8b1
-  dbug_uprobes("notified for inode-offset u%sprobe "
fce8b1
+  dbug_uprobes("notified for inode-offset arrival u%sprobe "
fce8b1
 	       "%lu:%p pidx %zu target procname:%s buildid:%s\n",
fce8b1
 	       c->return_p ? "ret" : "",
fce8b1
 	       (unsigned long) inode->i_ino,
fce8b1
 	       (void*) (uintptr_t) c->offset,
fce8b1
 	       c->probe->index,
fce8b1
-	       ((char*)c->finder.procname ?: (char*)""),
fce8b1
-	       ((char*)c->finder.build_id ?: (char*)""));
fce8b1
+	       ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
fce8b1
+               ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));
fce8b1
 
fce8b1
   /* Check the buildid of the target (if we haven't already). We
fce8b1
    * lock the target so we don't have concurrency issues. */
fce8b1
   mutex_lock(&c->consumer_lock);
fce8b1
 
fce8b1
-  // Check if we already have an instance for this inode, as though we
fce8b1
-  // were called twice by task-finder mishap, or (hypothetically) the
fce8b1
-  // shlib was mmapped twice.
fce8b1
+  // Check if we already have an instance for this inode.  This is normal:
fce8b1
+  // if a different process maps the same solib, or forks into the same
fce8b1
+  // executable.  In this case, we must not re-register the same uprobe.
fce8b1
   list_for_each_entry(i, &c->instance_list_head, instance_list) {
fce8b1
     if (i->inode == inode) {
fce8b1
       inst = i;
fce8b1
@@ -522,28 +526,33 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
     }
fce8b1
   }
fce8b1
 
fce8b1
-  if (inst) { // wouldn't expect a re-notification
fce8b1
-    if (inst->registered_p != c->probe->cond_enabled)
fce8b1
-      // ... this should not happen
fce8b1
-      ;
fce8b1
-    goto out1;
fce8b1
-  }
fce8b1
-
fce8b1
-  // Normal case: need a new one.
fce8b1
-  inst = _stp_kzalloc(sizeof(struct stapiu_instance));
fce8b1
-  if (! inst) {
fce8b1
-    rc = -ENOMEM;
fce8b1
-    goto out1;
fce8b1
-  }
fce8b1
+  if (!inst) { // new instance; need new uprobe etc.
fce8b1
+    // Normal case: need a new one.
fce8b1
+    inst = _stp_kzalloc(sizeof(struct stapiu_instance));
fce8b1
+    if (! inst) {
fce8b1
+      rc = -ENOMEM;
fce8b1
+      goto out1;
fce8b1
+    }
fce8b1
 
fce8b1
-  inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback
fce8b1
+    inst->sconsumer = c; // back link essential; that's how we go from uprobe *handler callback
fce8b1
+          
fce8b1
+    /* Grab the inode first (to prevent TOCTTOU problems). */
fce8b1
+    inst->inode = igrab(inode);
fce8b1
+    if (!inst->inode) {
fce8b1
+      rc = -EINVAL;
fce8b1
+      goto out2;
fce8b1
+    }
fce8b1
+  
fce8b1
+    // Add the inode/instance to the list
fce8b1
+    list_add(&inst->instance_list, &c->instance_list_head);
fce8b1
 
fce8b1
-  /* Grab the inode first (to prevent TOCTTOU problems). */
fce8b1
-  inst->inode = igrab(inode);
fce8b1
-  if (!inst->inode) {
fce8b1
-    rc = -EINVAL;
fce8b1
-    goto out2;
fce8b1
+    // Register the actual uprobe if cond_enabled already
fce8b1
+    if (c->probe->cond_enabled)
fce8b1
+      (void) stapiu_register(inst, c);
fce8b1
   }
fce8b1
+
fce8b1
+  // ... but we may have to do per-process work anyway: perfctr
fce8b1
+  // initialization and sdt.h semaphore manipulation!
fce8b1
   
fce8b1
   // Perform perfctr registration if required
fce8b1
   for (j=0; j < c->perf_counters_dim; j++) {
fce8b1
@@ -551,12 +560,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
       (void) _stp_perf_read_init ((c->perf_counters)[j], task);
fce8b1
   }
fce8b1
 
fce8b1
-  // Add the inode/instance to the list
fce8b1
-  list_add(&inst->instance_list, &c->instance_list_head);
fce8b1
-
fce8b1
   // Associate this consumer with this process.  If we encounter
fce8b1
   // resource problems here, we don't really have to undo the uprobe
fce8b1
-  // registrations etc. already in effect.
fce8b1
+  // registrations etc. already in effect.  It may break correct
fce8b1
+  // tracking of process hierarchy in -c/-x operation, but too bad.
fce8b1
   p = _stp_kzalloc(sizeof(struct stapiu_process));
fce8b1
   if (! p) {
fce8b1
     rc = -ENOMEM;
fce8b1
@@ -573,11 +580,10 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
   spin_lock_irqsave (&c->process_list_lock, flags);
fce8b1
   list_add(&p->process_list, &c->process_list_head);
fce8b1
   spin_unlock_irqrestore (&c->process_list_lock, flags);
fce8b1
-
fce8b1
+  // NB: actual semaphore value bumping is done later
fce8b1
+  
fce8b1
   rc = 0;
fce8b1
   // Register actual uprobe if cond_enabled right now
fce8b1
-  if (c->probe->cond_enabled)
fce8b1
-    (void) stapiu_register(inst, c);
fce8b1
   goto out1;
fce8b1
 
fce8b1
  out2:
fce8b1
@@ -617,11 +623,21 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
fce8b1
   /* Look through all the consumer's processes and increment semaphores.  */
fce8b1
   list_for_each_entry(p, &c->process_list_head, process_list) {
fce8b1
     unsigned long addr = p->base + c->sdt_sem_offset;
fce8b1
+    if (p->tgid != task->tgid) // skip other processes in the list
fce8b1
+      continue;
fce8b1
     if (addr >= relocation && addr < relocation + length) {
fce8b1
       int rc2;
fce8b1
       // unlock list and process write for this entry
fce8b1
       spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
       any_found=1;
fce8b1
+
fce8b1
+      dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
fce8b1
+                   "pidx %zu address %lx\n",
fce8b1
+                   c->return_p ? "ret" : "",
fce8b1
+                   (long) task->tgid,
fce8b1
+                   c->probe->index,
fce8b1
+                   (unsigned long) addr);
fce8b1
+
fce8b1
       rc2 = stapiu_write_task_semaphore(task, addr, +1);
fce8b1
       if (!rc)
fce8b1
         rc = rc2;
fce8b1
@@ -641,15 +657,8 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
fce8b1
  * about the semaphores, so we can just release the process slot.  */
fce8b1
 static int
fce8b1
 stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
-		    unsigned long relocation, unsigned long length)
fce8b1
+		    unsigned long addr, unsigned long length)
fce8b1
 {
fce8b1
-  dbug_uprobes("notified for inode-offset departure u%sprobe "
fce8b1
-	       "pidx %zu target procname:%s buildid:%s\n",
fce8b1
-	       c->return_p ? "ret" : "",
fce8b1
-	       c->probe->index,
fce8b1
-	       ((char*)c->finder.procname ?: (char*)""),
fce8b1
-	       ((char*)c->finder.build_id ?: (char*)""));
fce8b1
-  
fce8b1
   // We don't need do anything really.
fce8b1
   // A process going away means:
fce8b1
   // - its uprobes will no longer fire: no problem, the uprobe inode
fce8b1
@@ -674,6 +683,36 @@ stapiu_change_minus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
   // [ 1955.436334]  ? __x64_sys_execve+0x27/0x30
fce8b1
   // [ 1955.437700]  ? do_syscall_64+0x5c/0xa0
fce8b1
 
fce8b1
+  // But as an optimization - to avoid having them build up indefinitely,
fce8b1
+  // and make semaphore operations go slowly, we will nuke matching entries anyway.
fce8b1
+  unsigned long flags;
fce8b1
+  struct stapiu_process *p, *tmp;
fce8b1
+  unsigned nmatch=0;
fce8b1
+  
fce8b1
+  spin_lock_irqsave(&c->process_list_lock, flags);
fce8b1
+  list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) {
fce8b1
+    // we nuke by matching semaphore address (where ..._semaphore_plus wrote)
fce8b1
+    // against the address range being unmapped
fce8b1
+    unsigned long semaddr = p->base + c->sdt_sem_offset;
fce8b1
+    if (p->tgid != task->tgid) // skip other processes in the list
fce8b1
+      continue;
fce8b1
+    if (semaddr >= addr && semaddr < addr + length) {
fce8b1
+      list_del(&p->process_list);
fce8b1
+      _stp_kfree (p);
fce8b1
+      nmatch ++;
fce8b1
+    }
fce8b1
+  }
fce8b1
+  spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
+
fce8b1
+  if (nmatch > 0)
fce8b1
+    dbug_uprobes("notified for inode-offset departure u%sprobe "
fce8b1
+                 "pidx %zu matches:%u procname:%s buildid:%s\n",
fce8b1
+                 c->return_p ? "ret" : "",
fce8b1
+                 c->probe->index,
fce8b1
+                 nmatch,
fce8b1
+                 ((char*)c->finder.procname ?: ((char*)c->solib_pathname ?: "")),
fce8b1
+                 ((char*)c->finder.build_id ?: ((char*)c->solib_build_id ?: "")));
fce8b1
+
fce8b1
   return 0;
fce8b1
 }
fce8b1
 
fce8b1
fce8b1
commit e90530877ee21cffa2a9d53567ba5b5de1dd9b32
fce8b1
Author: Frank Ch. Eigler <fche@redhat.com>
fce8b1
Date:   Mon Jul 27 07:58:30 2020 -0400
fce8b1
fce8b1
    PR25568 / RHBZ1857749: buildid/uprobes/inode rework, task_finder etc. side
fce8b1
    
fce8b1
    During work on a new stress tests for build-id based probes (coming in
fce8b1
    next commit), it was found that the task_finder2 logic for buildid
fce8b1
    verification didn't, well, work, because it was never run (due to an
fce8b1
    erroneous pathlen conditional), and couldn't be safely run where it
fce8b1
    was (because it was under spinlock but would have done
fce8b1
    access_process_vm).  Reworked the relevant bits of task_finder2 to
fce8b1
    perform build-id verification for processes later - during the quiesce
fce8b1
    callback periods.  (Buildid verification for solibs is already done
fce8b1
    in the task_finder2 consumer uprobes-inode.c.)
fce8b1
    
fce8b1
    Testing with sdt_misc indicated a case where a preexisting process
fce8b1
    (with solib sdt.h semaphores) was being attached to by a new stap
fce8b1
    binary.  task_finder2's enumeration of the preexising processes'
fce8b1
    memory map segments violated assumptions by recent code related to
fce8b1
    tracking in stapiu_process[] lists.  (It did not mirror the temporal
fce8b1
    ld.so mmap sequence.)  Changed this tracking to use the inode* as the
fce8b1
    key, and stop trying to track mapping lengths, to make positive
fce8b1
    matches and eliminate duplicate stapiu_process[] entries for the same
fce8b1
    (process,solib) permutation.  Reworked stapiu_process[] accumulation
fce8b1
    generally to move to the two immediate task_finder callbacks, out of
fce8b1
    stapiu_change_plus().
fce8b1
    
fce8b1
    Added lots of commentary and diagnostics throughout.  stap
fce8b1
    -DDEBUG_UPROBES give meaningful info about uprobes & sdt semaphores;
fce8b1
    with -DDEBUG_TASK_FINDER, more but not overwhelming relevant info
fce8b1
    appears.
fce8b1
fce8b1
diff --git a/runtime/linux/task_finder2.c b/runtime/linux/task_finder2.c
fce8b1
index 9777efb..8b8057a 100644
fce8b1
--- a/runtime/linux/task_finder2.c
fce8b1
+++ b/runtime/linux/task_finder2.c
fce8b1
@@ -652,8 +652,8 @@ __verify_build_id(struct task_struct *tsk, unsigned long addr,
fce8b1
 	tsk_build_id[build_id_len] = '\0';
fce8b1
 
fce8b1
 	if (strcmp(build_id, tsk_build_id)) {
fce8b1
-		dbug_task(2, "target build-id not matched: [%s] != [%s]\n",
fce8b1
-			  build_id, tsk_build_id);
fce8b1
+		dbug_task(2, "target build-id not matched: [%s] @ 0x%lx != [%s]\n",
fce8b1
+			  build_id, addr, tsk_build_id);
fce8b1
 		return false;
fce8b1
 	}
fce8b1
 
fce8b1
@@ -884,16 +884,9 @@ __stp_utrace_attach_match_filename(struct task_struct *tsk,
fce8b1
 		// procname/build-id and match an "all thread" probe.
fce8b1
 		if (tgt == NULL)
fce8b1
 			continue;
fce8b1
-		/* buildid-based target */
fce8b1
-		else if (tgt->build_id_len > 0 && tgt->procname > 0
fce8b1
-			 && !__verify_build_id(tsk,
fce8b1
-					       tgt->build_id_vaddr,
fce8b1
-					       tgt->build_id,
fce8b1
-					       tgt->build_id_len))
fce8b1
-		{
fce8b1
-			continue;
fce8b1
-		}
fce8b1
-		else if (tgt->build_id_len == 0 && tgt->pathlen > 0
fce8b1
+                /* buildid-based target ... gets checked in __stp_tf_quiesce_worker */
fce8b1
+                /* procname-based target */
fce8b1
+		else if (tgt->pathlen > 0
fce8b1
 			 && (tgt->pathlen != filelen
fce8b1
 			     || strcmp(tgt->procname, filename) != 0))
fce8b1
 		{
fce8b1
@@ -1341,6 +1334,34 @@ __stp_tf_quiesce_worker(struct task_work *work)
fce8b1
 		return;
fce8b1
 	}
fce8b1
 
fce8b1
+        /* If we had a build-id based executable probe (so we have a
fce8b1
+         * tgt->build_id) set, we could not check it back in
fce8b1
+         * __stp_utrace_attach_* because we can't do sleepy
fce8b1
+         * access_process_vm() calls from there.  BUt now that we're
fce8b1
+         * in process context, quiesced, finally we can check.  If we
fce8b1
+         * were build-id based, and the build-id does not match, then
fce8b1
+         * we UTRACE_DETACH from this process and skip the callbacks.
fce8b1
+         *
fce8b1
+         * XXX: For processes that do match, we redo this check every
fce8b1
+         * time this callbacks is encountered somehow.  That's
fce8b1
+         * probably unnecessary.
fce8b1
+         */
fce8b1
+        if (tgt->build_id_len > 0) {
fce8b1
+                int ok = __verify_build_id(current,
fce8b1
+                                           tgt->build_id_vaddr,
fce8b1
+                                           tgt->build_id,
fce8b1
+                                           tgt->build_id_len);
fce8b1
+
fce8b1
+                dbug_task(2, "verified buildid-target process pid=%ld ok=%d\n",
fce8b1
+                          (long) current->tgid, ok);
fce8b1
+                if (!ok) {
fce8b1
+                        // stap_utrace_detach (current, & tgt->ops);
fce8b1
+                        /* Remember that this task_work_func is finished. */
fce8b1
+                        stp_task_work_func_done();
fce8b1
+                        return;
fce8b1
+                }
fce8b1
+        } 
fce8b1
+        
fce8b1
 	__stp_tf_handler_start();
fce8b1
 
fce8b1
 	/* NB make sure we run mmap callbacks before other callbacks
fce8b1
@@ -1434,6 +1455,21 @@ __stp_utrace_task_finder_target_quiesce(u32 action,
fce8b1
 		}
fce8b1
 	}
fce8b1
 	else {
fce8b1
+                /* Like in __stp_tf_quiesce_worker(), verify build-id now if belated. */
fce8b1
+                if (tgt->build_id_len > 0) {
fce8b1
+                        int ok = __verify_build_id(current,
fce8b1
+                                                   tgt->build_id_vaddr,
fce8b1
+                                                   tgt->build_id,
fce8b1
+                                                   tgt->build_id_len);
fce8b1
+                        
fce8b1
+                        dbug_task(2, "verified2 buildid-target process pid=%ld ok=%d\n",
fce8b1
+                                  (long) current->tgid, ok);
fce8b1
+                        if (!ok) {
fce8b1
+                                __stp_tf_handler_end();
fce8b1
+                                return UTRACE_RESUME; // NB: not _DETACH; that interferes with other engines
fce8b1
+                        }
fce8b1
+                } 
fce8b1
+                
fce8b1
 		/* NB make sure we run mmap callbacks before other callbacks
fce8b1
 		 * like 'probe process.begin' handlers so that the vma tracker
fce8b1
 		 * is already initialized in the latter contexts */
fce8b1
@@ -1797,15 +1833,7 @@ stap_start_task_finder(void)
fce8b1
 					 struct stap_task_finder_target, list);
fce8b1
 			if (tgt == NULL)
fce8b1
 				continue;
fce8b1
-			/* buildid-based target */
fce8b1
-			else if (tgt->build_id_len > 0 && tgt->procname > 0
fce8b1
-				 && !__verify_build_id(tsk,
fce8b1
-						       tgt->build_id_vaddr,
fce8b1
-						       tgt->build_id,
fce8b1
-						       tgt->build_id_len))
fce8b1
-			{
fce8b1
-				continue;
fce8b1
-			}
fce8b1
+			/* buildid-based target ... gets checked in __stp_tf_quiesce_worker */
fce8b1
 			/* procname-based target */
fce8b1
 			else if (tgt->build_id == 0 && tgt->pathlen > 0
fce8b1
 				 && (tgt->pathlen != mmpathlen
fce8b1
diff --git a/runtime/linux/uprobes-inode.c b/runtime/linux/uprobes-inode.c
fce8b1
index de81839..757da30 100644
fce8b1
--- a/runtime/linux/uprobes-inode.c
fce8b1
+++ b/runtime/linux/uprobes-inode.c
fce8b1
@@ -76,7 +76,7 @@ struct stapiu_instance {
fce8b1
   struct list_head instance_list;   // to find other instances e.g. during shutdown
fce8b1
 
fce8b1
   struct uprobe_consumer kconsumer; // the kernel-side struct for uprobe callbacks etc.
fce8b1
-  struct inode *inode;              // XXX: refcount?
fce8b1
+  struct inode *inode;              // refcounted
fce8b1
   unsigned registered_p:1;          // whether the this kconsumer is registered (= armed, live)
fce8b1
 
fce8b1
   struct stapiu_consumer *sconsumer; // whose instance are we
fce8b1
@@ -86,10 +86,14 @@ struct stapiu_instance {
fce8b1
 /* A snippet to record the per-process vm where a particular
fce8b1
    executable/solib was mapped.  Used for sdt semaphore setting, and
fce8b1
    for identifying processes of our interest (vs. disinterest) for
fce8b1
-   uprobe hits.  This object is owned by a stapiu_consumer. */
fce8b1
+   uprobe hits.  This object is owned by a stapiu_consumer.  We use
fce8b1
+   the same inode* as the stapiu_instance, and have the same lifespan,
fce8b1
+   so don't bother separately refcount it. 
fce8b1
+*/
fce8b1
 struct stapiu_process {
fce8b1
   struct list_head process_list;    // to find other processes
fce8b1
 
fce8b1
+  struct inode *inode;              // the inode* for solib or executable
fce8b1
   unsigned long relocation;         // the mmap'ed .text address
fce8b1
   unsigned long base;               // the address to apply sdt offsets against
fce8b1
   pid_t tgid;                       // pid
fce8b1
@@ -392,6 +396,7 @@ stapiu_consumer_unreg(struct stapiu_consumer *c)
fce8b1
   // multiple times in the list.  Don't break after the first.
fce8b1
   list_for_each_entry_safe(p, tmp, &c->process_list_head, process_list) {
fce8b1
     list_del(&p->process_list);
fce8b1
+    // no refcount used for the inode field
fce8b1
     _stp_kfree (p);
fce8b1
   }
fce8b1
 }
fce8b1
@@ -498,6 +503,8 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
   /* Do the buildid check.  NB: on F29+, offset may not equal
fce8b1
      0 for LOADable "R E" segments, because the read-only .note.*
fce8b1
      stuff may have been loaded earlier, separately.  PR23890. */
fce8b1
+  // NB: this is not really necessary for buildid-based probes,
fce8b1
+  // which had this verified already.
fce8b1
   rc = _stp_usermodule_check(task, c->module_name,
fce8b1
 			     relocation - offset);
fce8b1
   if (rc)
fce8b1
@@ -527,7 +534,6 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
   }
fce8b1
 
fce8b1
   if (!inst) { // new instance; need new uprobe etc.
fce8b1
-    // Normal case: need a new one.
fce8b1
     inst = _stp_kzalloc(sizeof(struct stapiu_instance));
fce8b1
     if (! inst) {
fce8b1
       rc = -ENOMEM;
fce8b1
@@ -560,30 +566,9 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
       (void) _stp_perf_read_init ((c->perf_counters)[j], task);
fce8b1
   }
fce8b1
 
fce8b1
-  // Associate this consumer with this process.  If we encounter
fce8b1
-  // resource problems here, we don't really have to undo the uprobe
fce8b1
-  // registrations etc. already in effect.  It may break correct
fce8b1
-  // tracking of process hierarchy in -c/-x operation, but too bad.
fce8b1
-  p = _stp_kzalloc(sizeof(struct stapiu_process));
fce8b1
-  if (! p) {
fce8b1
-    rc = -ENOMEM;
fce8b1
-    goto out1;
fce8b1
-  }
fce8b1
-  p->tgid = task->tgid;
fce8b1
-  p->relocation = relocation;
fce8b1
-  /* The base is used for relocating semaphores.  If the
fce8b1
-   * probe is in an ET_EXEC binary, then that offset
fce8b1
-   * already is a real address.  But stapiu_process_found
fce8b1
-   * calls us in this case with relocation=offset=0, so
fce8b1
-   * we don't have to worry about it.  */
fce8b1
-  p->base = relocation - offset;
fce8b1
-  spin_lock_irqsave (&c->process_list_lock, flags);
fce8b1
-  list_add(&p->process_list, &c->process_list_head);
fce8b1
-  spin_unlock_irqrestore (&c->process_list_lock, flags);
fce8b1
-  // NB: actual semaphore value bumping is done later
fce8b1
+  // NB: process_list[] already extended up in stapiu_mmap_found().
fce8b1
   
fce8b1
   rc = 0;
fce8b1
-  // Register actual uprobe if cond_enabled right now
fce8b1
   goto out1;
fce8b1
 
fce8b1
  out2:
fce8b1
@@ -599,7 +584,7 @@ stapiu_change_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
  * Increment the semaphore now.  */
fce8b1
 static int
fce8b1
 stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task,
fce8b1
-			     unsigned long relocation, unsigned long length)
fce8b1
+			     unsigned long relocation, struct inode* inode)
fce8b1
 {
fce8b1
   int rc = 0;
fce8b1
   struct stapiu_process *p;
fce8b1
@@ -609,6 +594,13 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
fce8b1
   if (! c->sdt_sem_offset) // nothing to do
fce8b1
     return 0;
fce8b1
 
fce8b1
+  dbug_uprobes("considering semaphore (u%sprobe) pid %ld inode 0x%lx"
fce8b1
+               "pidx %zu\n",
fce8b1
+               c->return_p ? "ret" : "",
fce8b1
+               (long) task->tgid,
fce8b1
+               (unsigned long) inode,
fce8b1
+               c->probe->index);
fce8b1
+  
fce8b1
   // NB: we mustn't hold a lock while changing the task memory,
fce8b1
   // but we need a lock to protect the process_list from concurrent
fce8b1
   // add/delete.  So hold a spinlock during iteration until the first
fce8b1
@@ -617,32 +609,31 @@ stapiu_change_semaphore_plus(struct stapiu_consumer* c, struct task_struct *task
fce8b1
   // somehow maps in the same solib multiple times).  We can't easily
fce8b1
   // both iterate this list (in a spinlock-protected safe way), and
fce8b1
   // relax the spinlock enough to do a safe stapiu_write_task_semaphore()
fce8b1
-  // call within the loop.  So we will hit only the copy in our list.
fce8b1
+  // call within the loop.  So we will hit only the first copy in our list.
fce8b1
   any_found = 0;
fce8b1
   spin_lock_irqsave(&c->process_list_lock, flags);
fce8b1
   /* Look through all the consumer's processes and increment semaphores.  */
fce8b1
   list_for_each_entry(p, &c->process_list_head, process_list) {
fce8b1
     unsigned long addr = p->base + c->sdt_sem_offset;
fce8b1
-    if (p->tgid != task->tgid) // skip other processes in the list
fce8b1
-      continue;
fce8b1
-    if (addr >= relocation && addr < relocation + length) {
fce8b1
-      int rc2;
fce8b1
-      // unlock list and process write for this entry
fce8b1
-      spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
-      any_found=1;
fce8b1
-
fce8b1
-      dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
fce8b1
-                   "pidx %zu address %lx\n",
fce8b1
-                   c->return_p ? "ret" : "",
fce8b1
-                   (long) task->tgid,
fce8b1
-                   c->probe->index,
fce8b1
-                   (unsigned long) addr);
fce8b1
-
fce8b1
-      rc2 = stapiu_write_task_semaphore(task, addr, +1);
fce8b1
-      if (!rc)
fce8b1
-        rc = rc2;
fce8b1
-      break; // exit list_for_each loop
fce8b1
-    }
fce8b1
+    int rc2;
fce8b1
+    if (p->tgid != task->tgid) continue; // skip other processes in the list
fce8b1
+    if (p->inode != inode) continue; // skip other inodes
fce8b1
+
fce8b1
+    // unlock list and process write for this entry
fce8b1
+    spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
+    any_found=1;
fce8b1
+
fce8b1
+    dbug_uprobes("incrementing semaphore (u%sprobe) pid %ld "
fce8b1
+                 "pidx %zu address 0x%lx\n",
fce8b1
+                 c->return_p ? "ret" : "",
fce8b1
+                 (long) task->tgid,
fce8b1
+                 c->probe->index,
fce8b1
+                 (unsigned long) addr);
fce8b1
+
fce8b1
+    rc2 = stapiu_write_task_semaphore(task, addr, +1);
fce8b1
+    if (!rc)
fce8b1
+            rc = rc2;
fce8b1
+    break; // exit list_for_each loop
fce8b1
   }
fce8b1
   if (! any_found)
fce8b1
     spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
@@ -755,17 +746,41 @@ stapiu_process_found(struct stap_task_finder_target *tf_target,
fce8b1
   
fce8b1
   if (!process_p)
fce8b1
     return 0; /* ignore threads */
fce8b1
-  
fce8b1
+
fce8b1
+  dbug_uprobes("process_found pid=%ld f.p=%s f.b=%s c.p=%s c.b=%s\n",
fce8b1
+               (long)task->tgid,
fce8b1
+	       ((char*)c->finder.procname ?: ""),
fce8b1
+               ((char*)c->finder.build_id ?: ""),
fce8b1
+	       ((char*)c->solib_pathname ?: ""),
fce8b1
+               ((char*)c->solib_build_id ?: ""));
fce8b1
+
fce8b1
   /* ET_EXEC events are like shlib events, but with 0 relocation bases */
fce8b1
   if (register_p) {
fce8b1
     int rc = -EINVAL;
fce8b1
     struct inode *inode = stapiu_get_task_inode(task);
fce8b1
     
fce8b1
     if (inode) {
fce8b1
-      rc = stapiu_change_plus(c, task, 0, TASK_SIZE,
fce8b1
-			      0, 0, inode);
fce8b1
-      stapiu_change_semaphore_plus(c, task, 0,
fce8b1
-				   TASK_SIZE);
fce8b1
+      // Add a stapiu_process record to the consumer, so that
fce8b1
+      // the semaphore increment logic will accept this task.
fce8b1
+      struct stapiu_process* p;
fce8b1
+      unsigned long flags;
fce8b1
+      p = _stp_kzalloc(sizeof(struct stapiu_process));
fce8b1
+      if (p) {
fce8b1
+        p->tgid = task->tgid;
fce8b1
+        p->relocation = 0;
fce8b1
+        p->inode = inode;
fce8b1
+        p->base = 0;
fce8b1
+        spin_lock_irqsave (&c->process_list_lock, flags);
fce8b1
+        list_add(&p->process_list, &c->process_list_head);
fce8b1
+        spin_unlock_irqrestore (&c->process_list_lock, flags);
fce8b1
+      } else {
fce8b1
+         _stp_warn("out of memory tracking executable in process %ld\n",
fce8b1
+                   (long) task->tgid);
fce8b1
+      }
fce8b1
+            
fce8b1
+      rc = stapiu_change_plus(c, task, 0, TASK_SIZE, 0, 0, inode);
fce8b1
+      
fce8b1
+      stapiu_change_semaphore_plus(c, task, 0, inode);
fce8b1
     }
fce8b1
     return rc;
fce8b1
   } else
fce8b1
@@ -776,6 +791,8 @@ stapiu_process_found(struct stap_task_finder_target *tf_target,
fce8b1
 bool
fce8b1
 __verify_build_id (struct task_struct *tsk, unsigned long addr,
fce8b1
 		   unsigned const char *build_id, int build_id_len);
fce8b1
+// defined in task_finder2.c
fce8b1
+
fce8b1
 
fce8b1
 
fce8b1
 /* The task_finder_mmap_callback.  These callbacks are NOT
fce8b1
@@ -791,28 +808,119 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target,
fce8b1
   struct stapiu_consumer *c =
fce8b1
     container_of(tf_target, struct stapiu_consumer, finder);
fce8b1
   int rc = 0;
fce8b1
+  struct stapiu_process* p;
fce8b1
+  int known_mapping_p;
fce8b1
+  unsigned long flags;
fce8b1
 
fce8b1
-  /* The file path or build-id must match. The build-id address
fce8b1
-   * is calculated using start address of this vma, the file
fce8b1
-   * offset of the vma start address and the file offset of
fce8b1
-   * the build-id. */
fce8b1
-  if (c->solib_pathname && path && strcmp (path, c->solib_pathname))
fce8b1
-    return 0;
fce8b1
-  if (c->solib_build_id_len > 0 && !__verify_build_id(task,
fce8b1
-						      addr - offset + c->solib_build_id_vaddr,
fce8b1
-						      c->solib_build_id,
fce8b1
-						      c->solib_build_id_len))
fce8b1
-    return 0;
fce8b1
+  /*
fce8b1
+  We need to verify that this file/mmap corresponds to the given stapiu_consumer.
fce8b1
+  One could compare (inode) file name, but that won't work with buildid-based
fce8b1
+  uprobes.  For those, one cannot just
fce8b1
+ 
fce8b1
+  __verify_build_id(... addr - offset + c->solib_build_id_vaddr ...)
fce8b1
+ 
fce8b1
+  because dlopen()ing a shared library involves multiple mmaps, including
fce8b1
+  some at repeating/offset addresses.  See glibc _dl_map_segments() in various
fce8b1
+  versions.  So by the fourth call (!) on modern glibc's, we get a VM_WRITE-able
fce8b1
+  data segment mapped, but that's at a load/mapping address that is offset by a
fce8b1
+  page from the base (file offset=0) mapping.
fce8b1
+
fce8b1
+  e.g. on Fedora 32 / glibc 2.31, with testsuite/libsdt_buildid.so:
fce8b1
+
fce8b1
+  Program Headers:
fce8b1
+  Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
fce8b1
+  LOAD           0x000000 0x0000000000000000 0x0000000000000000 0x0004b8 0x0004b8 R   0x1000
fce8b1
+  LOAD           0x001000 0x0000000000001000 0x0000000000001000 0x000161 0x000161 R E 0x1000
fce8b1
+  LOAD           0x002000 0x0000000000002000 0x0000000000002000 0x0000cc 0x0000cc R   0x1000
fce8b1
+  LOAD           0x002df8 0x0000000000003df8 0x0000000000003df8 0x000232 0x000238 RW  0x1000
fce8b1
+  DYNAMIC        0x002e10 0x0000000000003e10 0x0000000000003e10 0x0001d0 0x0001d0 RW  0x8
fce8b1
+
fce8b1
+  strace:
fce8b1
+  openat(AT_FDCWD, ".../libsdt_buildid.so", O_RDONLY|O_CLOEXEC) = 3
fce8b1
+  mmap(NULL, 16432, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x148c764ac000
fce8b1
+  mmap(0x148c764ad000, 4096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x148c764ad000
fce8b1
+  mmap(0x148c764ae000, 4096, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764ae000
fce8b1
+  mmap(0x148c764af000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x148c764af000
fce8b1
+
fce8b1
+  Note how the virtual mapping for the fourth mmap (also) maps file-offset 0x2000 at
fce8b1
+  vm offset 0x3000.
fce8b1
+
fce8b1
+  So what we do is rely on the name/buildid validation tests being run
fce8b1
+  -earlier- in the dlopen/mmap sequence to validate near-future
fce8b1
+  mmap()s.  We search the c->process_list[] for a mapping that already
fce8b1
+  overlaps the new range, and if so, consider it validated ... whether
fce8b1
+  for the solib_pathname or the solib_build_id case.
fce8b1
+
fce8b1
+  This is complicated for startup-time traversal of processes/mmaps,
fce8b1
+  where it seems sometimes we get notifications out of temporal sequence.
fce8b1
+  */
fce8b1
   
fce8b1
-  /* 1 - shared libraries' executable segments load from offset 0
fce8b1
-   *   - ld.so convention offset != 0 is now allowed
fce8b1
-   *     so stap_uprobe_change_plus can set a semaphore,
fce8b1
-   *     i.e. a static extern, in a shared object
fce8b1
-   * 2 - the shared library we're interested in
fce8b1
-   * 3 - mapping should be executable or writeable (for
fce8b1
-   *     semaphore in .so)
fce8b1
-   *     NB: or both, on kernels that lack noexec mapping
fce8b1
-   */
fce8b1
+  known_mapping_p = 0;
fce8b1
+  spin_lock_irqsave(&c->process_list_lock, flags);
fce8b1
+  list_for_each_entry(p, &c->process_list_head, process_list) {
fce8b1
+    if (p->tgid != task->tgid) continue;
fce8b1
+    if (p->inode != dentry->d_inode) continue;
fce8b1
+    known_mapping_p = 1;
fce8b1
+    break;
fce8b1
+  }
fce8b1
+  spin_unlock_irqrestore(&c->process_list_lock, flags);
fce8b1
+
fce8b1
+
fce8b1
+  // Check if this mapping (solib) is of interest: whether we expect
fce8b1
+  // it by buildid or name.
fce8b1
+  
fce8b1
+  if (! known_mapping_p) {
fce8b1
+    /* The file path or build-id must match. The build-id address
fce8b1
+     * is calculated using start address of this vma, the file
fce8b1
+     * offset of the vma start address and the file offset of
fce8b1
+     * the build-id. */
fce8b1
+    if (c->solib_pathname && path && strcmp (path, c->solib_pathname))
fce8b1
+      return 0;
fce8b1
+    if (c->solib_build_id_len > 0 && !__verify_build_id(task,
fce8b1
+  						        addr - offset + c->solib_build_id_vaddr,
fce8b1
+  						        c->solib_build_id,
fce8b1
+						        c->solib_build_id_len))
fce8b1
+      return 0;
fce8b1
+  }
fce8b1
+
fce8b1
+  // If we made it this far, we have an interesting solib.
fce8b1
+
fce8b1
+  dbug_uprobes("mmap_found pid=%ld path=%s addr=0x%lx length=%lu offset=%lu flags=0x%lx known=%d\n",
fce8b1
+               (long) task->tgid, path, addr, length, offset, vm_flags, known_mapping_p);
fce8b1
+  
fce8b1
+  if (! known_mapping_p) {
fce8b1
+    // OK, let's add it.  The first mapping should be a VM_READ mapping
fce8b1
+    // of the entire solib file, which will also serve as the apprx.
fce8b1
+    // outer bounds of the repeatedly-mapped segments.
fce8b1
+
fce8b1
+#if 0
fce8b1
+    // Consider an assumption about the dlopen/mmap sequence
fce8b1
+    // If it comes out of sequence, we could get length/base wrong in the stored
fce8b1
+    // stapiu_process, which could lead us to miscalculate semaphore addresses.
fce8b1
+    //
fce8b1
+    // However, this has been observed on task-finder initial-enumeration case,
fce8b1
+    // (sdt_misc.exp, where a solib test is already running when stap starts).
fce8b1
+    if (offset != 0)
fce8b1
+      return 0;
fce8b1
+#endif
fce8b1
+    
fce8b1
+    // Associate this consumer with this process.  If we encounter
fce8b1
+    // resource problems here, we don't really have to undo the uprobe
fce8b1
+    // registrations etc. already in effect.  It may break correct
fce8b1
+    // tracking of process hierarchy in -c/-x operation, but too bad.
fce8b1
+    p = _stp_kzalloc(sizeof(struct stapiu_process));
fce8b1
+    if (p) {
fce8b1
+      p->tgid = task->tgid;
fce8b1
+      p->relocation = addr;
fce8b1
+      p->inode = dentry->d_inode;
fce8b1
+      p->base = addr-offset; // ... in case caught this during the second mmap
fce8b1
+      spin_lock_irqsave (&c->process_list_lock, flags);
fce8b1
+      list_add(&p->process_list, &c->process_list_head);
fce8b1
+      spin_unlock_irqrestore (&c->process_list_lock, flags);
fce8b1
+    } else
fce8b1
+      _stp_warn("out of memory tracking solib %s in process %ld\n",
fce8b1
+                path, (long) task->tgid);
fce8b1
+  }
fce8b1
 
fce8b1
   /* Check non-writable, executable sections for probes. */
fce8b1
   if ((vm_flags & VM_EXEC) && !(vm_flags & VM_WRITE))
fce8b1
@@ -827,7 +935,7 @@ stapiu_mmap_found(struct stap_task_finder_target *tf_target,
fce8b1
    */
fce8b1
 
fce8b1
   if ((rc == 0) && (vm_flags & VM_WRITE))
fce8b1
-    rc = stapiu_change_semaphore_plus(c, task, addr, length);
fce8b1
+    rc = stapiu_change_semaphore_plus(c, task, addr, dentry->d_inode);
fce8b1
 
fce8b1
   return rc;
fce8b1
 }
fce8b1
diff --git a/runtime/sym.c b/runtime/sym.c
fce8b1
index be09ec8..21d820a 100644
fce8b1
--- a/runtime/sym.c
fce8b1
+++ b/runtime/sym.c
fce8b1
@@ -713,9 +713,10 @@ static int _stp_build_id_check (struct _stp_module *m,
fce8b1
 	  // NB: It is normal for different binaries with the same file path
fce8b1
 	  // coexist in the same system via chroot or namespaces, therefore
fce8b1
 	  // we make sure below is really a warning.
fce8b1
-          _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" address "
fce8b1
+          _stp_warn ("Build-id mismatch [man warning::buildid]: \"%s\" pid %ld address "
fce8b1
 		     "%#lx, expected %s actual %s\n",
fce8b1
-                     m->path, notes_addr, hexstring_theory, hexstring_practice);
fce8b1
+                     m->path, (long) tsk->tgid,
fce8b1
+                     notes_addr, hexstring_theory, hexstring_practice);
fce8b1
       return 1;
fce8b1
   }
fce8b1
   
fce8b1
fce8b1
commit 5e1ef9d7f2a5ea6e5511ef5228cf05dda1c570b3
fce8b1
Author: Frank Ch. Eigler <fche@redhat.com>
fce8b1
Date:   Mon Jul 27 07:58:30 2020 -0400
fce8b1
fce8b1
    PR25568 / RHBZ1857749: sdt_buildid.exp test case
fce8b1
    
fce8b1
    Add new test that checks for combinations of buildid and pathname
fce8b1
    based uprobes for executables and shared libraries.
fce8b1
fce8b1
diff --git a/testsuite/systemtap.base/sdt_buildid.c b/testsuite/systemtap.base/sdt_buildid.c
fce8b1
new file mode 100644
fce8b1
index 0000000..ccbb2f2
fce8b1
--- /dev/null
fce8b1
+++ b/testsuite/systemtap.base/sdt_buildid.c
fce8b1
@@ -0,0 +1,26 @@
fce8b1
+#include <unistd.h>
fce8b1
+#include <stdlib.h>
fce8b1
+#include <stdio.h>
fce8b1
+
fce8b1
+void bar ();
fce8b1
+
fce8b1
+#ifndef ONLY_MAIN
fce8b1
+#include "sdt_buildid_.h"
fce8b1
+
fce8b1
+void
fce8b1
+bar ()
fce8b1
+{
fce8b1
+  printf("%s=%ld\n", "test_probe_0_semaphore", SDT_BUILDID_TEST_PROBE_0_ENABLED());
fce8b1
+  if (SDT_BUILDID_TEST_PROBE_0_ENABLED())
fce8b1
+    SDT_BUILDID_TEST_PROBE_0();
fce8b1
+}
fce8b1
+#endif
fce8b1
+
fce8b1
+#ifndef NO_MAIN
fce8b1
+int
fce8b1
+main ()
fce8b1
+{
fce8b1
+  bar();
fce8b1
+  return 0;
fce8b1
+}
fce8b1
+#endif
fce8b1
diff --git a/testsuite/systemtap.base/sdt_buildid.exp b/testsuite/systemtap.base/sdt_buildid.exp
fce8b1
new file mode 100644
fce8b1
index 0000000..3141fd6
fce8b1
--- /dev/null
fce8b1
+++ b/testsuite/systemtap.base/sdt_buildid.exp
fce8b1
@@ -0,0 +1,214 @@
fce8b1
+set test "sdt_buildid"
fce8b1
+
fce8b1
+set pbtype_flags {{additional_flags=-g} {} {}}
fce8b1
+set fail_count 0
fce8b1
+
fce8b1
+# Compile a C program to use as the user-space probing target
fce8b1
+set stap_path $env(SYSTEMTAP_PATH)/stap
fce8b1
+set sup_dpath "[pwd]/sdt_buildid_.d"
fce8b1
+set sup_hpath "[pwd]/sdt_buildid_.h"
fce8b1
+set sup_opath "[pwd]/sdt_buildid_.o"
fce8b1
+
fce8b1
+# Run dtrace
fce8b1
+if {[installtest_p]} {
fce8b1
+    set dtrace $env(SYSTEMTAP_PATH)/dtrace
fce8b1
+} else {
fce8b1
+    set dtrace ../dtrace
fce8b1
+}
fce8b1
+
fce8b1
+verbose -log "$dtrace --types -h -s $srcdir/$subdir/sdt_buildid_.d"
fce8b1
+if {[catch {exec $dtrace --types -h -s \
fce8b1
+                $srcdir/$subdir/sdt_buildid_.d} res]} {
fce8b1
+    verbose -log "unable to run $dtrace: $res"
fce8b1
+}
fce8b1
+verbose -log "$dtrace --types -G -s $srcdir/$subdir/sdt_buildid_.d"
fce8b1
+if {[catch {exec $dtrace --types -G -s \
fce8b1
+                $srcdir/$subdir/sdt_buildid_.d} res]} {
fce8b1
+    verbose -log "unable to run $dtrace: $res"
fce8b1
+}
fce8b1
+if {[file exists $sup_hpath] && [file exists $sup_opath]} then {
fce8b1
+    pass "$test dtrace"
fce8b1
+} else {
fce8b1
+    incr fail_count
fce8b1
+    fail "$test dtrace"
fce8b1
+    return
fce8b1
+}
fce8b1
+    
fce8b1
+set sup_flags [sdt_includes]
fce8b1
+set sup_flags "$sup_flags additional_flags=-Wall"
fce8b1
+set sup_flags "$sup_flags additional_flags=-Werror"
fce8b1
+set sup_flags "$sup_flags additional_flags=$sup_opath"
fce8b1
+set sup_flags "$sup_flags additional_flags=-I."
fce8b1
+set sup_exepath "[pwd]/sdt_buildid.x"
fce8b1
+
fce8b1
+set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exepath \
fce8b1
+             executable $sup_flags]
fce8b1
+if { $res != "" } {
fce8b1
+    incr fail_count
fce8b1
+    verbose "target_compile failed: $res" 2
fce8b1
+    fail "$test compiling"
fce8b1
+    return
fce8b1
+} else {
fce8b1
+    pass "$test compiling"
fce8b1
+}
fce8b1
+
fce8b1
+
fce8b1
+set sup41_flags "$sup_flags additional_flags=-shared"
fce8b1
+set sup41_flags "$sup41_flags additional_flags=-fPIC"
fce8b1
+set sup41_flags "$sup41_flags additional_flags=-DNO_MAIN"
fce8b1
+set sup_sopath "[pwd]/libsdt_buildid.so"
fce8b1
+set sup_exe2path "[pwd]/sdt_buildid_shared.x"
fce8b1
+set res0 [target_compile $srcdir/$subdir/sdt_buildid.c $sup_sopath \
fce8b1
+		  executable $sup41_flags ]
fce8b1
+set sup42_flags "additional_flags=-Wl,-rpath,[pwd]"
fce8b1
+set sup42_flags "$sup42_flags additional_flags=-L[pwd] additional_flags=-lsdt_buildid"
fce8b1
+set sup42_flags "$sup42_flags additional_flags=-DONLY_MAIN"
fce8b1
+set res [target_compile $srcdir/$subdir/sdt_buildid.c $sup_exe2path \
fce8b1
+             executable $sup42_flags ]
fce8b1
+if { $res0 != "" || $res != "" } {
fce8b1
+    incr fail_count
fce8b1
+    verbose "target_compile failed: $res0 $res" 2
fce8b1
+    fail "$test compiling -shared"
fce8b1
+    return
fce8b1
+} else {
fce8b1
+    pass "$test compiling -shared"
fce8b1
+}
fce8b1
+
fce8b1
+catch { exec eu-readelf -n $sup_exepath | grep Build.ID | awk "{print \$NF}" } bid1
fce8b1
+catch { exec eu-readelf -n $sup_sopath | grep Build.ID | awk "{print \$NF}" } bidso
fce8b1
+catch { exec eu-readelf -n $sup_exe2path | grep Build.ID | awk "{print \$NF}" } bid2
fce8b1
+verbose -log "buildid: $sup_exepath $bid1"
fce8b1
+verbose -log "buildid: $sup_sopath $bidso"
fce8b1
+verbose -log "buildid: $sup_exe2path $bid2"
fce8b1
+# though we won't use the $bid2
fce8b1
+
fce8b1
+if {![installtest_p]} {
fce8b1
+    untested $test
fce8b1
+    return
fce8b1
+}
fce8b1
+
fce8b1
+# To test via build-id, we need a debuginfod server to scan the testsuite build
fce8b1
+# directory.
fce8b1
+
fce8b1
+
fce8b1
+if [catch {exec /usr/bin/which debuginfod} debuginfod] then {
fce8b1
+    untested "$test debuginfod"
fce8b1
+} else {
fce8b1
+    set port [expr {10000 + int(rand()*10000)}]
fce8b1
+    spawn $debuginfod -p $port -d :memory: -F .
fce8b1
+    set debuginfod_pid [exp_pid $spawn_id]
fce8b1
+    # give it time to scan the build directory
fce8b1
+    sleep 10
fce8b1
+    # XXX: we could expect some verbose traffic
fce8b1
+    set env(DEBUGINFOD_URLS) "http://localhost:$port $env(DEBUGINFOD_URLS)"
fce8b1
+    verbose -log "started debuginfod on port $port"
fce8b1
+
fce8b1
+    set subtest "$test debuginfod buildid-exe buildid-solib"
fce8b1
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $bidso
fce8b1
+    set ok 0
fce8b1
+    expect {
fce8b1
+        -timeout 240
fce8b1
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
fce8b1
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
fce8b1
+        eof { }
fce8b1
+        timeout { }
fce8b1
+    }
fce8b1
+    catch {close}; catch {wait}
fce8b1
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
fce8b1
+
fce8b1
+    set subtest "$test debuginfod buildid-exe path-solib"
fce8b1
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1 $sup_sopath
fce8b1
+    set ok 0
fce8b1
+    expect {
fce8b1
+        -timeout 240
fce8b1
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
fce8b1
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
fce8b1
+        eof { }
fce8b1
+        timeout { }
fce8b1
+    }
fce8b1
+    catch {close}; catch {wait}
fce8b1
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
fce8b1
+
fce8b1
+    set subtest "$test debuginfod path-exe buildid-solib"
fce8b1
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $bidso
fce8b1
+    set ok 0
fce8b1
+    expect {
fce8b1
+        -timeout 240
fce8b1
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
fce8b1
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
fce8b1
+        eof { }
fce8b1
+        timeout { }
fce8b1
+    }
fce8b1
+    catch {close}; catch {wait}
fce8b1
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
fce8b1
+
fce8b1
+    set subtest "$test debuginfod buildid-solib"
fce8b1
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bidso
fce8b1
+    set ok 0
fce8b1
+    expect {
fce8b1
+        -timeout 240
fce8b1
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
fce8b1
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
fce8b1
+        eof { }
fce8b1
+        timeout { }
fce8b1
+    }
fce8b1
+    catch {close}; catch {wait}
fce8b1
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
fce8b1
+
fce8b1
+    set subtest "$test debuginfod buildid-exe"
fce8b1
+    spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $bid1
fce8b1
+    set ok 0
fce8b1
+    expect {
fce8b1
+        -timeout 240
fce8b1
+        -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
fce8b1
+        -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
fce8b1
+        eof { }
fce8b1
+        timeout { }
fce8b1
+    }
fce8b1
+    catch {close}; catch {wait}
fce8b1
+    if {$ok > 6} then { pass $subtest } else { fail $subtest }
fce8b1
+
fce8b1
+    kill -INT $debuginfod_pid
fce8b1
+}
fce8b1
+
fce8b1
+
fce8b1
+set subtest "$test non-buildid both"
fce8b1
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath $sup_sopath
fce8b1
+set ok 0
fce8b1
+expect {
fce8b1
+    -timeout 240
fce8b1
+    -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
fce8b1
+    -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
fce8b1
+    eof { }
fce8b1
+    timeout { }
fce8b1
+}
fce8b1
+catch {close}; catch {wait}
fce8b1
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
fce8b1
+
fce8b1
+set subtest "$test non-buildid exe"
fce8b1
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_exepath
fce8b1
+set ok 0
fce8b1
+expect {
fce8b1
+    -timeout 240
fce8b1
+    -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
fce8b1
+    -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exepath; exp_continue }
fce8b1
+    eof { }
fce8b1
+    timeout { }
fce8b1
+}
fce8b1
+catch {close}; catch {wait}
fce8b1
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
fce8b1
+
fce8b1
+set subtest "$test non-buildid solib"
fce8b1
+spawn $stap_path $srcdir/$subdir/sdt_buildid.stp $sup_sopath
fce8b1
+set ok 0
fce8b1
+expect {
fce8b1
+    -timeout 240
fce8b1
+    -re {^Count [0-9]*[02468][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
fce8b1
+    -re {^Count [0-9]*[13579][^\r\n]*\r\n} { incr ok; exec $sup_exe2path; exp_continue }
fce8b1
+    eof { }
fce8b1
+    timeout { }
fce8b1
+}
fce8b1
+catch {close}; catch {wait}
fce8b1
+if {$ok > 6} then { pass $subtest } else { fail $subtest }
fce8b1
+
fce8b1
+return
fce8b1
diff --git a/testsuite/systemtap.base/sdt_buildid.stp b/testsuite/systemtap.base/sdt_buildid.stp
fce8b1
new file mode 100644
fce8b1
index 0000000..a26d183
fce8b1
--- /dev/null
fce8b1
+++ b/testsuite/systemtap.base/sdt_buildid.stp
fce8b1
@@ -0,0 +1,19 @@
fce8b1
+global count
fce8b1
+
fce8b1
+function trace () {
fce8b1
+  printf ("Count %d [%d] %s %s\n", count++, pid(), $$name, pp())
fce8b1
+}
fce8b1
+
fce8b1
+probe process(@1).mark("test_probe_0") { trace() }
fce8b1
+%( $# > 1 %? probe process(@2).mark("test_probe_0") { trace() } %)
fce8b1
+
fce8b1
+probe begin
fce8b1
+{
fce8b1
+  printf ("Count %d\n", count++)
fce8b1
+}
fce8b1
+
fce8b1
+probe timer.s(1) // exit quickly after enough marks fire
fce8b1
+{
fce8b1
+  if (count > 10) exit()
fce8b1
+}
fce8b1
+
fce8b1
diff --git a/testsuite/systemtap.base/sdt_buildid_.d b/testsuite/systemtap.base/sdt_buildid_.d
fce8b1
new file mode 100644
fce8b1
index 0000000..ebfca55
fce8b1
--- /dev/null
fce8b1
+++ b/testsuite/systemtap.base/sdt_buildid_.d
fce8b1
@@ -0,0 +1,4 @@
fce8b1
+provider sdt_buildid {
fce8b1
+        probe test_probe_0 ();
fce8b1
+};
fce8b1
+