Blob Blame History Raw
commit 775bd386c2b9a6e553c61a5c0f86d29d9c2320a8
Author: Mark Goodwin <mgoodwin@redhat.com>
Date:   Thu Jul 25 13:30:05 2019 +1000

    pmdaproc: fix memory leak in pidlist refresh
    
    RHBZ #1721107
    
    The pidlist refresh function was leaking instance name buffers for
    processes that had exited between refreshes. Fix this by storing the
    instance name (truncated psargs) in a new field of the hash table
    entry, and harvest them correctly if they've exited after each refresh.
    The indom table instance names now share a pointer to the new field
    in the hash table entry so they dont need to be separately free'd.
    This similifies the code and memory management.
    
    Also put a 4K cap on the maximum length of a psargs string for the
    unlikely scenario of a psargs string that is not NULL terminated,
    i.e. use strndup() rather than strdup(). This may be possible if
    a rogue process modified it's argv[0] at run-time.
    
    For testing, the recipe in RHBZ#1721107 was repro'd using valgind
    and a workload with lots of process churn (i.e. a couple of large
    builds running). Before the fix, valgrind reported numerous similar
    leaks as reported in the bug. With the fix applied, those leaks
    are gone. The refresh is more efficient now too because we only
    have one copy of the psargs strings (in the hash table entry).
    
    Also thoroughly exercised all tests in the QA pmda.proc and
    pmda.hotproc groups. A new test might be possible, but the nature
    of the required workload would make it difficult (and expensive)
    to run.

diff --git a/src/pmdas/linux_proc/proc_pid.c b/src/pmdas/linux_proc/proc_pid.c
index 2e8f705b9..dcbc28850 100644
--- a/src/pmdas/linux_proc/proc_pid.c
+++ b/src/pmdas/linux_proc/proc_pid.c
@@ -820,7 +820,7 @@ disable_hotproc(void)
 static void
 refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids)
 {
-    int i;
+    int i, numinst, idx;
     int fd;
     char *p;
     char buf[MAXPATHLEN];
@@ -828,14 +828,6 @@ refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids)
     proc_pid_entry_t *ep;
     pmdaIndom *indomp = proc_pid->indom;
 
-    if (indomp->it_numinst < pids->count) {
-	indomp->it_set = (pmdaInstid *)realloc(indomp->it_set,
-	    pids->count * sizeof(pmdaInstid));
-    	memset(&indomp->it_set[indomp->it_numinst], 0,
-	    (pids->count - indomp->it_numinst) * sizeof(pmdaInstid));
-    }
-    indomp->it_numinst = pids->count;
-
     /*
      * invalidate all entries so we can harvest pids that have exited
      */
@@ -852,7 +844,9 @@ refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids)
      */
     for (i=0; i < pids->count; i++) {
 	node = __pmHashSearch(pids->pids[i], &proc_pid->pidhash);
-	if (node == NULL) {
+	if (node)
+	    ep = (proc_pid_entry_t *)node->data;
+	else {
 	    int k = 0;
 
 	    ep = (proc_pid_entry_t *)malloc(sizeof(proc_pid_entry_t));
@@ -940,57 +934,52 @@ refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids)
 	    __pmHashAdd(pids->pids[i], (void *)ep, &proc_pid->pidhash);
 	    //fprintf(stderr, "key %d : ADDED \"%s\" to hash table\n", pids->pids[i], buf);
 	}
-	else
-	    ep = (proc_pid_entry_t *)node->data;
-	
-	/* mark pid as still existing */
-	ep->flags |= PROC_PID_FLAG_VALID;
 
-	/* refresh the indom pointer */
-	indomp->it_set[i].i_inst = ep->id;
-	if (indomp->it_set[i].i_name) {
-	    int len = strlen(indomp->it_set[i].i_name);
-	    if (strncmp(indomp->it_set[i].i_name, ep->name, len) != 0) {
-	    	free(indomp->it_set[i].i_name);
-	    	indomp->it_set[i].i_name = NULL;
-	    }
-	    else if (pmDebugOptions.libpmda) {
-		fprintf(stderr, "refresh_proc_pidlist: Instance id=%d \"%s\" no change\n",
-		    ep->id, indomp->it_set[i].i_name);
-	    }
-	}
-	if (indomp->it_set[i].i_name == NULL) {
-	    /*
-	     * The external instance name is the pid followed by
-	     * a copy of the psargs truncated at the first space.
-	     * e.g. "012345 /path/to/command". Command line args,
-	     * if any, are truncated. The full command line is
-	     * available in the proc.psinfo.psargs metric.
-	     */
-	    if ((p = strchr(ep->name, ' ')) != NULL) {
-		if ((p = strchr(p+1, ' ')) != NULL) {
-		    int len = p - ep->name;
-		    indomp->it_set[i].i_name = (char *)malloc(len+1);
-		    strncpy(indomp->it_set[i].i_name, ep->name, len);
-		    indomp->it_set[i].i_name[len] = '\0';
-		}
-	    }
-	    if (indomp->it_set[i].i_name == NULL)
-		indomp->it_set[i].i_name = strdup(ep->name);
+	if (ep->instname == NULL) {
+           /*
+             * The external instance name is the pid followed by
+             * a copy of the psargs truncated at the first space.
+             * e.g. "012345 /path/to/command". Command line args,
+             * if any, are truncated. The full command line is
+             * available in the proc.psinfo.psargs metric.
+             */
+            if ((p = strchr(ep->name, ' ')) != NULL) {
+                if ((p = strchr(p+1, ' ')) != NULL) {
+                    int len = p - ep->name;
+		    if (len > PROC_PID_STAT_CMD_MAXLEN)
+			len = PROC_PID_STAT_CMD_MAXLEN;
+                    ep->instname = (char *)malloc(len+1);
+                    strncpy(ep->instname, ep->name, len);
+                    ep->instname[len] = '\0';
+                }
+            }
+            if (ep->instname == NULL) /* no spaces found, so use the full name */
+                ep->instname = strndup(ep->name, PROC_PID_STAT_CMD_MAXLEN);
 	}
+	
+	/* mark pid as valid (new or still running) */
+	ep->flags |= PROC_PID_FLAG_VALID;
     }
 
     /* 
-     * harvest exited pids from the pid hash table
+     * harvest pids that have exit'ed
      */
+    numinst = 0;
     for (i=0; i < proc_pid->pidhash.hsize; i++) {
 	for (prev=NULL, node=proc_pid->pidhash.hash[i]; node != NULL;) {
 	    next = node->next;
 	    ep = (proc_pid_entry_t *)node->data;
 	    // fprintf(stderr, "CHECKING key=%d node=" PRINTF_P_PFX "%p prev=" PRINTF_P_PFX "%p next=" PRINTF_P_PFX "%p ep=" PRINTF_P_PFX "%p valid=%d\n",
 	    	// ep->id, node, prev, node->next, ep, ep->valid);
-	    if (!(ep->flags & PROC_PID_FLAG_VALID)) {
+	    if (ep->flags & PROC_PID_FLAG_VALID) {
+		numinst++;
+	    	prev = node;
+	    }
+	    else {
+		// This process has exited.
 	        //fprintf(stderr, "DELETED key=%d name=\"%s\"\n", ep->id, ep->name);
+		if (ep->instname != NULL)
+		    free(ep->instname);
 		if (ep->name != NULL)
 		    free(ep->name);
 		if (ep->stat_buf != NULL)
@@ -1017,13 +1006,25 @@ refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids)
 		free(ep);
 		free(node);
 	    }
-	    else {
-	    	prev = node;
-	    }
 	    if ((node = next) == NULL)
 	    	break;
 	}
     }
+
+    /*
+     * At this point, the hash table contains only valid pids. Refresh the indom table,
+     * based on the updated process hash table. Indom table instance names are shared
+     * with their hash table entry, do not free!
+     */
+    indomp->it_numinst = numinst;
+    indomp->it_set = (pmdaInstid *)realloc(indomp->it_set, numinst * sizeof(pmdaInstid));
+    for (idx=0, i=0; i < proc_pid->pidhash.hsize; i++) {
+	for (node=proc_pid->pidhash.hash[i]; node != NULL; node=node->next, idx++) {
+	    ep = (proc_pid_entry_t *)node->data;
+	    indomp->it_set[idx].i_inst = ep->id; /* internal instid is pid */
+	    indomp->it_set[idx].i_name = ep->instname; /* ptr ref, do not free */
+	}
+    }
 }
 
 int
diff --git a/src/pmdas/linux_proc/proc_pid.h b/src/pmdas/linux_proc/proc_pid.h
index 344f4efc0..582965b3a 100644
--- a/src/pmdas/linux_proc/proc_pid.h
+++ b/src/pmdas/linux_proc/proc_pid.h
@@ -20,6 +20,10 @@
 #include "proc_runq.h"
 #include "hotproc.h"
 
+/*
+ * Maximim length of psargs and proc instance name.
+ */
+#define PROC_PID_STAT_CMD_MAXLEN	4096
 
 /*
  * /proc/<pid>/stat metrics
@@ -252,7 +256,8 @@ enum {
 typedef struct {
     int			id;	/* pid, hash key and internal instance id */
     int			flags;	/* combinations of PROC_PID_FLAG_* values */
-    char		*name;	/* external instance name (<pid> cmdline) */
+    char		*name;	/* full command line and args */
+    char		*instname; /* external instance name (truncated <pid> cmdline) */
 
     /* /proc/<pid>/stat cluster */
     int			stat_buflen;