commit 775bd386c2b9a6e553c61a5c0f86d29d9c2320a8 Author: Mark Goodwin Date: Thu Jul 25 13:30:05 2019 +1000 pmdaproc: fix memory leak in pidlist refresh RHBZ #1721107 The pidlist refresh function was leaking instance name buffers for processes that had exited between refreshes. Fix this by storing the instance name (truncated psargs) in a new field of the hash table entry, and harvest them correctly if they've exited after each refresh. The indom table instance names now share a pointer to the new field in the hash table entry so they dont need to be separately free'd. This similifies the code and memory management. Also put a 4K cap on the maximum length of a psargs string for the unlikely scenario of a psargs string that is not NULL terminated, i.e. use strndup() rather than strdup(). This may be possible if a rogue process modified it's argv[0] at run-time. For testing, the recipe in RHBZ#1721107 was repro'd using valgind and a workload with lots of process churn (i.e. a couple of large builds running). Before the fix, valgrind reported numerous similar leaks as reported in the bug. With the fix applied, those leaks are gone. The refresh is more efficient now too because we only have one copy of the psargs strings (in the hash table entry). Also thoroughly exercised all tests in the QA pmda.proc and pmda.hotproc groups. A new test might be possible, but the nature of the required workload would make it difficult (and expensive) to run. diff --git a/src/pmdas/linux_proc/proc_pid.c b/src/pmdas/linux_proc/proc_pid.c index 2e8f705b9..dcbc28850 100644 --- a/src/pmdas/linux_proc/proc_pid.c +++ b/src/pmdas/linux_proc/proc_pid.c @@ -820,7 +820,7 @@ disable_hotproc(void) static void refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids) { - int i; + int i, numinst, idx; int fd; char *p; char buf[MAXPATHLEN]; @@ -828,14 +828,6 @@ refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids) proc_pid_entry_t *ep; pmdaIndom *indomp = proc_pid->indom; - if (indomp->it_numinst < pids->count) { - indomp->it_set = (pmdaInstid *)realloc(indomp->it_set, - pids->count * sizeof(pmdaInstid)); - memset(&indomp->it_set[indomp->it_numinst], 0, - (pids->count - indomp->it_numinst) * sizeof(pmdaInstid)); - } - indomp->it_numinst = pids->count; - /* * invalidate all entries so we can harvest pids that have exited */ @@ -852,7 +844,9 @@ refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids) */ for (i=0; i < pids->count; i++) { node = __pmHashSearch(pids->pids[i], &proc_pid->pidhash); - if (node == NULL) { + if (node) + ep = (proc_pid_entry_t *)node->data; + else { int k = 0; ep = (proc_pid_entry_t *)malloc(sizeof(proc_pid_entry_t)); @@ -940,57 +934,52 @@ refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids) __pmHashAdd(pids->pids[i], (void *)ep, &proc_pid->pidhash); //fprintf(stderr, "key %d : ADDED \"%s\" to hash table\n", pids->pids[i], buf); } - else - ep = (proc_pid_entry_t *)node->data; - - /* mark pid as still existing */ - ep->flags |= PROC_PID_FLAG_VALID; - /* refresh the indom pointer */ - indomp->it_set[i].i_inst = ep->id; - if (indomp->it_set[i].i_name) { - int len = strlen(indomp->it_set[i].i_name); - if (strncmp(indomp->it_set[i].i_name, ep->name, len) != 0) { - free(indomp->it_set[i].i_name); - indomp->it_set[i].i_name = NULL; - } - else if (pmDebugOptions.libpmda) { - fprintf(stderr, "refresh_proc_pidlist: Instance id=%d \"%s\" no change\n", - ep->id, indomp->it_set[i].i_name); - } - } - if (indomp->it_set[i].i_name == NULL) { - /* - * The external instance name is the pid followed by - * a copy of the psargs truncated at the first space. - * e.g. "012345 /path/to/command". Command line args, - * if any, are truncated. The full command line is - * available in the proc.psinfo.psargs metric. - */ - if ((p = strchr(ep->name, ' ')) != NULL) { - if ((p = strchr(p+1, ' ')) != NULL) { - int len = p - ep->name; - indomp->it_set[i].i_name = (char *)malloc(len+1); - strncpy(indomp->it_set[i].i_name, ep->name, len); - indomp->it_set[i].i_name[len] = '\0'; - } - } - if (indomp->it_set[i].i_name == NULL) - indomp->it_set[i].i_name = strdup(ep->name); + if (ep->instname == NULL) { + /* + * The external instance name is the pid followed by + * a copy of the psargs truncated at the first space. + * e.g. "012345 /path/to/command". Command line args, + * if any, are truncated. The full command line is + * available in the proc.psinfo.psargs metric. + */ + if ((p = strchr(ep->name, ' ')) != NULL) { + if ((p = strchr(p+1, ' ')) != NULL) { + int len = p - ep->name; + if (len > PROC_PID_STAT_CMD_MAXLEN) + len = PROC_PID_STAT_CMD_MAXLEN; + ep->instname = (char *)malloc(len+1); + strncpy(ep->instname, ep->name, len); + ep->instname[len] = '\0'; + } + } + if (ep->instname == NULL) /* no spaces found, so use the full name */ + ep->instname = strndup(ep->name, PROC_PID_STAT_CMD_MAXLEN); } + + /* mark pid as valid (new or still running) */ + ep->flags |= PROC_PID_FLAG_VALID; } /* - * harvest exited pids from the pid hash table + * harvest pids that have exit'ed */ + numinst = 0; for (i=0; i < proc_pid->pidhash.hsize; i++) { for (prev=NULL, node=proc_pid->pidhash.hash[i]; node != NULL;) { next = node->next; ep = (proc_pid_entry_t *)node->data; // fprintf(stderr, "CHECKING key=%d node=" PRINTF_P_PFX "%p prev=" PRINTF_P_PFX "%p next=" PRINTF_P_PFX "%p ep=" PRINTF_P_PFX "%p valid=%d\n", // ep->id, node, prev, node->next, ep, ep->valid); - if (!(ep->flags & PROC_PID_FLAG_VALID)) { + if (ep->flags & PROC_PID_FLAG_VALID) { + numinst++; + prev = node; + } + else { + // This process has exited. //fprintf(stderr, "DELETED key=%d name=\"%s\"\n", ep->id, ep->name); + if (ep->instname != NULL) + free(ep->instname); if (ep->name != NULL) free(ep->name); if (ep->stat_buf != NULL) @@ -1017,13 +1006,25 @@ refresh_proc_pidlist(proc_pid_t *proc_pid, proc_pid_list_t *pids) free(ep); free(node); } - else { - prev = node; - } if ((node = next) == NULL) break; } } + + /* + * At this point, the hash table contains only valid pids. Refresh the indom table, + * based on the updated process hash table. Indom table instance names are shared + * with their hash table entry, do not free! + */ + indomp->it_numinst = numinst; + indomp->it_set = (pmdaInstid *)realloc(indomp->it_set, numinst * sizeof(pmdaInstid)); + for (idx=0, i=0; i < proc_pid->pidhash.hsize; i++) { + for (node=proc_pid->pidhash.hash[i]; node != NULL; node=node->next, idx++) { + ep = (proc_pid_entry_t *)node->data; + indomp->it_set[idx].i_inst = ep->id; /* internal instid is pid */ + indomp->it_set[idx].i_name = ep->instname; /* ptr ref, do not free */ + } + } } int diff --git a/src/pmdas/linux_proc/proc_pid.h b/src/pmdas/linux_proc/proc_pid.h index 344f4efc0..582965b3a 100644 --- a/src/pmdas/linux_proc/proc_pid.h +++ b/src/pmdas/linux_proc/proc_pid.h @@ -20,6 +20,10 @@ #include "proc_runq.h" #include "hotproc.h" +/* + * Maximim length of psargs and proc instance name. + */ +#define PROC_PID_STAT_CMD_MAXLEN 4096 /* * /proc//stat metrics @@ -252,7 +256,8 @@ enum { typedef struct { int id; /* pid, hash key and internal instance id */ int flags; /* combinations of PROC_PID_FLAG_* values */ - char *name; /* external instance name ( cmdline) */ + char *name; /* full command line and args */ + char *instname; /* external instance name (truncated cmdline) */ /* /proc//stat cluster */ int stat_buflen;