1092308 - backport diskio device filtering Backported from: commit 5be210c90870ff6bab193d497d401b92c1d50db9 Author: Jan Safranek Date: Thu Mar 6 13:26:30 2014 +0100 CHANGES: snmpd: add new snmpd.conf option 'diskio' to monitor only selected disks. On machines with thousands of block devices, parsing /proc/diskstats is really slow. The new option enables monitoring of selected devices, saving lot of CPU time. diff -up net-snmp-5.7.2/agent/mibgroup/ucd-snmp/diskio.c.test net-snmp-5.7.2/agent/mibgroup/ucd-snmp/diskio.c --- net-snmp-5.7.2/agent/mibgroup/ucd-snmp/diskio.c.test 2012-10-10 00:28:58.000000000 +0200 +++ net-snmp-5.7.2/agent/mibgroup/ucd-snmp/diskio.c 2015-06-18 15:14:57.164891695 +0200 @@ -27,11 +27,18 @@ #include +#if defined (linux) +/* for stat() */ +#include +#include +#endif + #include #include #include "util_funcs/header_simple_table.h" +#include "struct.h" /* * include our .h file */ @@ -95,6 +102,66 @@ static int ps_numdisks; /* number of d #if defined (linux) #define DISKIO_SAMPLE_INTERVAL 5 void devla_getstats(unsigned int regno, void * dummy); +static void diskio_parse_config_disks(const char *token, char *cptr); +static void diskio_free_config(void); +static int get_sysfs_stats(void); + +struct diskiopart { + char syspath[STRMAX]; /* full stat path */ + char name[STRMAX]; /* name as provided */ + char shortname[STRMAX]; /* short name for output */ + int major; + int minor; +}; + +static int numdisks; +static int maxdisks = 0; +static struct diskiopart *disks; + +#define DISK_INCR 2 + +typedef struct linux_diskio +{ + int major; + int minor; + unsigned long blocks; + char name[256]; + unsigned long rio; + unsigned long rmerge; + unsigned long rsect; + unsigned long ruse; + unsigned long wio; + unsigned long wmerge; + unsigned long wsect; + unsigned long wuse; + unsigned long running; + unsigned long use; + unsigned long aveq; +} linux_diskio; + +/* disk load averages */ +typedef struct linux_diskio_la +{ + unsigned long use_prev; + double la1, la5, la15; +} linux_diskio_la; + +typedef struct linux_diskio_header +{ + linux_diskio* indices; + int length; + int alloc; +} linux_diskio_header; + +typedef struct linux_diskio_la_header +{ + linux_diskio_la * indices; + int length; +} linux_diskio_la_header; + +static linux_diskio_header head; +static linux_diskio_la_header la_head; + #endif /* linux */ #if defined (darwin) @@ -228,6 +295,8 @@ init_diskio(void) devla_getstats(0, NULL); /* collect LA data regularly */ snmp_alarm_register(DISKIO_SAMPLE_INTERVAL, SA_REPEAT, devla_getstats, NULL); + snmpd_register_config_handler("diskio", diskio_parse_config_disks, + diskio_free_config, "path | device"); #endif @@ -870,49 +939,134 @@ var_diskio(struct variable * vp, #ifdef linux -#define DISK_INCR 2 - -typedef struct linux_diskio +static void +diskio_free_config() + { + if (la_head.length) { + /* reset any usage stats, we may get different list of devices from config */ + free(la_head.indices); + la_head.length = 0; + la_head.indices = NULL; + } + if (numdisks > 0) { + int i; + head.length = 0; + numdisks = 0; + for (i = 0; i < maxdisks; i++) { /* init/erase disk db */ + disks[i].syspath[0] = 0; + disks[i].name[0] = 0; + disks[i].shortname[0] = 0; + disks[i].major = -1; + disks[i].minor = -1; + } + } +} +static int +disk_exists(char *path) { - int major; - int minor; - unsigned long blocks; - char name[256]; - unsigned long rio; - unsigned long rmerge; - unsigned long rsect; - unsigned long ruse; - unsigned long wio; - unsigned long wmerge; - unsigned long wsect; - unsigned long wuse; - unsigned long running; - unsigned long use; - unsigned long aveq; -} linux_diskio; + int index; + for(index = 0; index < numdisks; index++) { + DEBUGMSGTL(("ucd-snmp/disk", "Checking for %s. Found %s at %d\n", path, disks[index].syspath, index)); + if(strcmp(path, disks[index].syspath) == 0) { + return index; + } + } + return -1; +} -/* disk load averages */ -typedef struct linux_diskio_la -{ - unsigned long use_prev; - double la1, la5, la15; -} linux_diskio_la; +static void +add_device(char *path, int addNewDisks ) + { + int index; + char device[STRMAX]; + char syspath[STRMAX]; + char *basename; + struct stat stbuf; -typedef struct linux_diskio_header -{ - linux_diskio* indices; - int length; - int alloc; -} linux_diskio_header; + if (!path || !strcmp(path, "none")) { + DEBUGMSGTL(("ucd-snmp/diskio", "Skipping null path device (%s)\n", path)); + return; + } + if (numdisks == maxdisks) { + if (maxdisks == 0) { + maxdisks = 50; + disks = malloc(maxdisks * sizeof(struct diskiopart)); + if (!disks) { + config_perror("malloc failed for new disko allocation."); + netsnmp_config_error("\tignoring: %s", path); + return; + } + memset(disks, 0, maxdisks * sizeof(struct diskiopart)); + } else { + maxdisks *= 2; + disks = realloc(disks, maxdisks * sizeof(struct diskiopart)); + if (!disks) { + config_perror("malloc failed for new disko allocation."); + netsnmp_config_error("\tignoring: %s", path); + return; + } + memset(disks + maxdisks/2, 0, maxdisks/2 * sizeof(struct diskiopart)); + } + } -typedef struct linux_diskio_la_header -{ - linux_diskio_la * indices; - int length; -} linux_diskio_la_header; + /* first find the path for this device */ + device[0]='\0'; + if ( *path != '/' ) { + strlcpy(device, "/dev/", STRMAX - 1 ); + } + strncat(device, path, STRMAX - 1 ); + + /* check for /dev existence */ + if ( stat(device,&stbuf)!=0 ) { /* ENOENT */ + config_perror("diskio path does not exist."); + netsnmp_config_error("\tignoring: %s", path); + return; + } + else if ( ! S_ISBLK(stbuf.st_mode) ) { /* ENODEV */ + config_perror("diskio path is not a device."); + netsnmp_config_error("\tignoring: %s", path); + return; + } -static linux_diskio_header head; -static linux_diskio_la_header la_head; + /* either came with a slash or we just put one there, so the following always works */ + basename = strrchr(device, '/' )+1; + /* construct a sys path using the device numbers to avoid having to disambiguate the various text forms */ + snprintf( syspath, STRMAX - 1, "/sys/dev/block/%d:%d/stat", major(stbuf.st_rdev), minor(stbuf.st_rdev) ); + DEBUGMSGTL(("ucd-snmp/diskio", " monitoring sys path (%s)\n", syspath)); + + index = disk_exists(syspath); + + if(index == -1 && addNewDisks){ + /* The following buffers are cleared above, no need to add '\0' */ + strlcpy(disks[numdisks].syspath, syspath, sizeof(disks[numdisks].syspath) - 1); + strlcpy(disks[numdisks].name, path, sizeof(disks[numdisks].name) - 1); + strlcpy(disks[numdisks].shortname, basename, sizeof(disks[numdisks].shortname) - 1); + disks[numdisks].major = major(stbuf.st_rdev); + disks[numdisks].minor = minor(stbuf.st_rdev); + numdisks++; + } +} + +static void +diskio_parse_config_disks(const char *token, char *cptr) + { +#if HAVE_FSTAB_H || HAVE_GETMNTENT || HAVE_STATFS + char path[STRMAX]; + + + /* + * read disk path (eg, /1 or /usr) + */ + copy_nword(cptr, path, sizeof(path)); + + /* TODO: we may include regular expressions in future */ + /* + * check if the disk already exists, if so then modify its + * parameters. if it does not exist then add it + */ + add_device(path, 1); +#endif /* HAVE_FSTAB_H || HAVE_GETMNTENT || HAVE_STATFS */ +} void devla_getstats(unsigned int regno, void * dummy) { @@ -976,6 +1130,47 @@ int is_excluded(const char *name) return 0; } +static int get_sysfs_stats() +{ + int i; + char buffer[1024]; + + head.length = 0; + + for(i = 0; i < numdisks; i++) { + FILE *f = fopen(disks[i].syspath, "r"); + if ( f == NULL ) { + DEBUGMSGTL(("ucd-snmp/diskio", "Can't open %s, skipping", disks[i].syspath)); + continue; + } + if (fgets(buffer, sizeof(buffer), f) == NULL) { + DEBUGMSGTL(("ucd-snmp/diskio", "Can't read %s, skipping", disks[i].syspath)); + fclose(f); + continue; + } + + linux_diskio* pTemp; + if (head.length == head.alloc) { + head.alloc += DISK_INCR; + head.indices = (linux_diskio *) realloc(head.indices, head.alloc*sizeof(linux_diskio)); + } + pTemp = &head.indices[head.length]; + pTemp->major = disks[i].major; + pTemp->minor = disks[i].minor; + strlcpy( pTemp->name, disks[i].shortname, sizeof(pTemp->name) - 1 ); + if (sscanf (buffer, "%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu\n", + &pTemp->rio, &pTemp->rmerge, &pTemp->rsect, &pTemp->ruse, + &pTemp->wio, &pTemp->wmerge, &pTemp->wsect, &pTemp->wuse, + &pTemp->running, &pTemp->use, &pTemp->aveq) != 11) + sscanf (buffer, "%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu\n", + &pTemp->rio, &pTemp->rsect, + &pTemp->wio, &pTemp->wsect); + head.length++; + fclose(f); + } + return 0; +} + static int getstats(void) { @@ -995,6 +1189,14 @@ getstats(void) memset(head.indices, 0, head.alloc*sizeof(linux_diskio)); + if (numdisks>0) { + /* 'diskio' configuration is used - go through the whitelist only and + * read /sys/dev/block/xxx */ + cache_time = now; + return get_sysfs_stats(); + } + /* 'diskio' configuration is not used - report all devices */ + /* Is this a 2.6 kernel? */ parts = fopen("/proc/diskstats", "r"); if (parts) { @@ -1111,13 +1313,22 @@ var_diskio(struct variable * vp, long_ret = head.indices[indx].wio & 0xffffffff; return (u_char *) & long_ret; case DISKIO_LA1: - long_ret = la_head.indices[indx].la1; + if (la_head.length > indx) + long_ret = la_head.indices[indx].la1; + else + long_ret = 0; return (u_char *) & long_ret; case DISKIO_LA5: - long_ret = la_head.indices[indx].la5; + if (la_head.length > indx) + long_ret = la_head.indices[indx].la5; + else + long_ret = 0; return (u_char *) & long_ret; case DISKIO_LA15: - long_ret = la_head.indices[indx].la15; + if (la_head.length > indx) + long_ret = la_head.indices[indx].la15; + else + long_ret = 0; return (u_char *) & long_ret; case DISKIO_NREADX: *var_len = sizeof(struct counter64); diff -up net-snmp-5.7.2/man/snmpd.conf.5.def.test net-snmp-5.7.2/man/snmpd.conf.5.def --- net-snmp-5.7.2/man/snmpd.conf.5.def.test 2015-06-18 15:13:31.249470179 +0200 +++ net-snmp-5.7.2/man/snmpd.conf.5.def 2015-06-18 15:16:45.481423115 +0200 @@ -715,6 +715,15 @@ e.g. "loop0" .IP "diskio_exclude_ram yes" Excludes all LInux ramdisk block devices, whose names start with "ram", e.g. "ram0" +.PP +On Linux systems, it is possible to report only explicitly whitelisted +devices. It may take significant amount of time to process diskIOTable data +on systems with tens of thousands of block devices and whitelisting only the +important ones avoids large CPU consumption. +.IP "diskio " +Enables whitelisting of devices and adds the device to the whitelist. Only +explicitly whitelisted devices will be reported. This option may be used +multiple times. .SS System Load Monitoring This requires that the agent was built with support for either the \fIucd\-snmp/loadave\fR module or the \fIucd\-snmp/memory\fR module commit 59f9f3387dab4238114804a0be9e4c15667d868c Author: Jan Safranek Date: Fri Jun 19 09:29:06 2015 +0200 Fixed memory leak on realloc failure. Found by Coverity. diff --git a/agent/mibgroup/ucd-snmp/diskio.c b/agent/mibgroup/ucd-snmp/diskio.c index f04d5c5..58163d8 100644 --- a/agent/mibgroup/ucd-snmp/diskio.c +++ b/agent/mibgroup/ucd-snmp/diskio.c @@ -405,13 +405,17 @@ add_device(char *path, int addNewDisks ) } memset(disks, 0, maxdisks * sizeof(struct diskiopart)); } else { + struct diskiopart *newdisks; maxdisks *= 2; - disks = realloc(disks, maxdisks * sizeof(struct diskiopart)); - if (!disks) { + newdisks = realloc(disks, maxdisks * sizeof(struct diskiopart)); + if (!newdisks) { + free(disks); + disks = NULL; config_perror("malloc failed for new disko allocation."); netsnmp_config_error("\tignoring: %s", path); return; } + disks = newdisks; memset(disks + maxdisks/2, 0, maxdisks/2 * sizeof(struct diskiopart)); } }