1092308 - backport diskio device filtering
Backported from:
commit 5be210c90870ff6bab193d497d401b92c1d50db9
Author: Jan Safranek <jsafranek@users.sourceforge.net>
Date: Thu Mar 6 13:26:30 2014 +0100
CHANGES: snmpd: add new snmpd.conf option 'diskio' to monitor only selected disks.
On machines with thousands of block devices, parsing /proc/diskstats is really
slow. The new option enables monitoring of selected devices, saving lot of CPU
time.
diff -up net-snmp-5.7.2/agent/mibgroup/ucd-snmp/diskio.c.test net-snmp-5.7.2/agent/mibgroup/ucd-snmp/diskio.c
--- net-snmp-5.7.2/agent/mibgroup/ucd-snmp/diskio.c.test 2012-10-10 00:28:58.000000000 +0200
+++ net-snmp-5.7.2/agent/mibgroup/ucd-snmp/diskio.c 2015-06-18 15:14:57.164891695 +0200
@@ -27,11 +27,18 @@
#include <math.h>
+#if defined (linux)
+/* for stat() */
+#include <ctype.h>
+#include <sys/stat.h>
+#endif
+
#include <net-snmp/net-snmp-includes.h>
#include <net-snmp/agent/net-snmp-agent-includes.h>
#include "util_funcs/header_simple_table.h"
+#include "struct.h"
/*
* include our .h file
*/
@@ -95,6 +102,66 @@ static int ps_numdisks; /* number of d
#if defined (linux)
#define DISKIO_SAMPLE_INTERVAL 5
void devla_getstats(unsigned int regno, void * dummy);
+static void diskio_parse_config_disks(const char *token, char *cptr);
+static void diskio_free_config(void);
+static int get_sysfs_stats(void);
+
+struct diskiopart {
+ char syspath[STRMAX]; /* full stat path */
+ char name[STRMAX]; /* name as provided */
+ char shortname[STRMAX]; /* short name for output */
+ int major;
+ int minor;
+};
+
+static int numdisks;
+static int maxdisks = 0;
+static struct diskiopart *disks;
+
+#define DISK_INCR 2
+
+typedef struct linux_diskio
+{
+ int major;
+ int minor;
+ unsigned long blocks;
+ char name[256];
+ unsigned long rio;
+ unsigned long rmerge;
+ unsigned long rsect;
+ unsigned long ruse;
+ unsigned long wio;
+ unsigned long wmerge;
+ unsigned long wsect;
+ unsigned long wuse;
+ unsigned long running;
+ unsigned long use;
+ unsigned long aveq;
+} linux_diskio;
+
+/* disk load averages */
+typedef struct linux_diskio_la
+{
+ unsigned long use_prev;
+ double la1, la5, la15;
+} linux_diskio_la;
+
+typedef struct linux_diskio_header
+{
+ linux_diskio* indices;
+ int length;
+ int alloc;
+} linux_diskio_header;
+
+typedef struct linux_diskio_la_header
+{
+ linux_diskio_la * indices;
+ int length;
+} linux_diskio_la_header;
+
+static linux_diskio_header head;
+static linux_diskio_la_header la_head;
+
#endif /* linux */
#if defined (darwin)
@@ -228,6 +295,8 @@ init_diskio(void)
devla_getstats(0, NULL);
/* collect LA data regularly */
snmp_alarm_register(DISKIO_SAMPLE_INTERVAL, SA_REPEAT, devla_getstats, NULL);
+ snmpd_register_config_handler("diskio", diskio_parse_config_disks,
+ diskio_free_config, "path | device");
#endif
@@ -870,49 +939,134 @@ var_diskio(struct variable * vp,
#ifdef linux
-#define DISK_INCR 2
-
-typedef struct linux_diskio
+static void
+diskio_free_config()
+ {
+ if (la_head.length) {
+ /* reset any usage stats, we may get different list of devices from config */
+ free(la_head.indices);
+ la_head.length = 0;
+ la_head.indices = NULL;
+ }
+ if (numdisks > 0) {
+ int i;
+ head.length = 0;
+ numdisks = 0;
+ for (i = 0; i < maxdisks; i++) { /* init/erase disk db */
+ disks[i].syspath[0] = 0;
+ disks[i].name[0] = 0;
+ disks[i].shortname[0] = 0;
+ disks[i].major = -1;
+ disks[i].minor = -1;
+ }
+ }
+}
+static int
+disk_exists(char *path)
{
- int major;
- int minor;
- unsigned long blocks;
- char name[256];
- unsigned long rio;
- unsigned long rmerge;
- unsigned long rsect;
- unsigned long ruse;
- unsigned long wio;
- unsigned long wmerge;
- unsigned long wsect;
- unsigned long wuse;
- unsigned long running;
- unsigned long use;
- unsigned long aveq;
-} linux_diskio;
+ int index;
+ for(index = 0; index < numdisks; index++) {
+ DEBUGMSGTL(("ucd-snmp/disk", "Checking for %s. Found %s at %d\n", path, disks[index].syspath, index));
+ if(strcmp(path, disks[index].syspath) == 0) {
+ return index;
+ }
+ }
+ return -1;
+}
-/* disk load averages */
-typedef struct linux_diskio_la
-{
- unsigned long use_prev;
- double la1, la5, la15;
-} linux_diskio_la;
+static void
+add_device(char *path, int addNewDisks )
+ {
+ int index;
+ char device[STRMAX];
+ char syspath[STRMAX];
+ char *basename;
+ struct stat stbuf;
-typedef struct linux_diskio_header
-{
- linux_diskio* indices;
- int length;
- int alloc;
-} linux_diskio_header;
+ if (!path || !strcmp(path, "none")) {
+ DEBUGMSGTL(("ucd-snmp/diskio", "Skipping null path device (%s)\n", path));
+ return;
+ }
+ if (numdisks == maxdisks) {
+ if (maxdisks == 0) {
+ maxdisks = 50;
+ disks = malloc(maxdisks * sizeof(struct diskiopart));
+ if (!disks) {
+ config_perror("malloc failed for new disko allocation.");
+ netsnmp_config_error("\tignoring: %s", path);
+ return;
+ }
+ memset(disks, 0, maxdisks * sizeof(struct diskiopart));
+ } else {
+ maxdisks *= 2;
+ disks = realloc(disks, maxdisks * sizeof(struct diskiopart));
+ if (!disks) {
+ config_perror("malloc failed for new disko allocation.");
+ netsnmp_config_error("\tignoring: %s", path);
+ return;
+ }
+ memset(disks + maxdisks/2, 0, maxdisks/2 * sizeof(struct diskiopart));
+ }
+ }
-typedef struct linux_diskio_la_header
-{
- linux_diskio_la * indices;
- int length;
-} linux_diskio_la_header;
+ /* first find the path for this device */
+ device[0]='\0';
+ if ( *path != '/' ) {
+ strlcpy(device, "/dev/", STRMAX - 1 );
+ }
+ strncat(device, path, STRMAX - 1 );
+
+ /* check for /dev existence */
+ if ( stat(device,&stbuf)!=0 ) { /* ENOENT */
+ config_perror("diskio path does not exist.");
+ netsnmp_config_error("\tignoring: %s", path);
+ return;
+ }
+ else if ( ! S_ISBLK(stbuf.st_mode) ) { /* ENODEV */
+ config_perror("diskio path is not a device.");
+ netsnmp_config_error("\tignoring: %s", path);
+ return;
+ }
-static linux_diskio_header head;
-static linux_diskio_la_header la_head;
+ /* either came with a slash or we just put one there, so the following always works */
+ basename = strrchr(device, '/' )+1;
+ /* construct a sys path using the device numbers to avoid having to disambiguate the various text forms */
+ snprintf( syspath, STRMAX - 1, "/sys/dev/block/%d:%d/stat", major(stbuf.st_rdev), minor(stbuf.st_rdev) );
+ DEBUGMSGTL(("ucd-snmp/diskio", " monitoring sys path (%s)\n", syspath));
+
+ index = disk_exists(syspath);
+
+ if(index == -1 && addNewDisks){
+ /* The following buffers are cleared above, no need to add '\0' */
+ strlcpy(disks[numdisks].syspath, syspath, sizeof(disks[numdisks].syspath) - 1);
+ strlcpy(disks[numdisks].name, path, sizeof(disks[numdisks].name) - 1);
+ strlcpy(disks[numdisks].shortname, basename, sizeof(disks[numdisks].shortname) - 1);
+ disks[numdisks].major = major(stbuf.st_rdev);
+ disks[numdisks].minor = minor(stbuf.st_rdev);
+ numdisks++;
+ }
+}
+
+static void
+diskio_parse_config_disks(const char *token, char *cptr)
+ {
+#if HAVE_FSTAB_H || HAVE_GETMNTENT || HAVE_STATFS
+ char path[STRMAX];
+
+
+ /*
+ * read disk path (eg, /1 or /usr)
+ */
+ copy_nword(cptr, path, sizeof(path));
+
+ /* TODO: we may include regular expressions in future */
+ /*
+ * check if the disk already exists, if so then modify its
+ * parameters. if it does not exist then add it
+ */
+ add_device(path, 1);
+#endif /* HAVE_FSTAB_H || HAVE_GETMNTENT || HAVE_STATFS */
+}
void devla_getstats(unsigned int regno, void * dummy) {
@@ -976,6 +1130,47 @@ int is_excluded(const char *name)
return 0;
}
+static int get_sysfs_stats()
+{
+ int i;
+ char buffer[1024];
+
+ head.length = 0;
+
+ for(i = 0; i < numdisks; i++) {
+ FILE *f = fopen(disks[i].syspath, "r");
+ if ( f == NULL ) {
+ DEBUGMSGTL(("ucd-snmp/diskio", "Can't open %s, skipping", disks[i].syspath));
+ continue;
+ }
+ if (fgets(buffer, sizeof(buffer), f) == NULL) {
+ DEBUGMSGTL(("ucd-snmp/diskio", "Can't read %s, skipping", disks[i].syspath));
+ fclose(f);
+ continue;
+ }
+
+ linux_diskio* pTemp;
+ if (head.length == head.alloc) {
+ head.alloc += DISK_INCR;
+ head.indices = (linux_diskio *) realloc(head.indices, head.alloc*sizeof(linux_diskio));
+ }
+ pTemp = &head.indices[head.length];
+ pTemp->major = disks[i].major;
+ pTemp->minor = disks[i].minor;
+ strlcpy( pTemp->name, disks[i].shortname, sizeof(pTemp->name) - 1 );
+ if (sscanf (buffer, "%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu\n",
+ &pTemp->rio, &pTemp->rmerge, &pTemp->rsect, &pTemp->ruse,
+ &pTemp->wio, &pTemp->wmerge, &pTemp->wsect, &pTemp->wuse,
+ &pTemp->running, &pTemp->use, &pTemp->aveq) != 11)
+ sscanf (buffer, "%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu%*[ \n\t]%lu\n",
+ &pTemp->rio, &pTemp->rsect,
+ &pTemp->wio, &pTemp->wsect);
+ head.length++;
+ fclose(f);
+ }
+ return 0;
+}
+
static int
getstats(void)
{
@@ -995,6 +1189,14 @@ getstats(void)
memset(head.indices, 0, head.alloc*sizeof(linux_diskio));
+ if (numdisks>0) {
+ /* 'diskio' configuration is used - go through the whitelist only and
+ * read /sys/dev/block/xxx */
+ cache_time = now;
+ return get_sysfs_stats();
+ }
+ /* 'diskio' configuration is not used - report all devices */
+
/* Is this a 2.6 kernel? */
parts = fopen("/proc/diskstats", "r");
if (parts) {
@@ -1111,13 +1313,22 @@ var_diskio(struct variable * vp,
long_ret = head.indices[indx].wio & 0xffffffff;
return (u_char *) & long_ret;
case DISKIO_LA1:
- long_ret = la_head.indices[indx].la1;
+ if (la_head.length > indx)
+ long_ret = la_head.indices[indx].la1;
+ else
+ long_ret = 0;
return (u_char *) & long_ret;
case DISKIO_LA5:
- long_ret = la_head.indices[indx].la5;
+ if (la_head.length > indx)
+ long_ret = la_head.indices[indx].la5;
+ else
+ long_ret = 0;
return (u_char *) & long_ret;
case DISKIO_LA15:
- long_ret = la_head.indices[indx].la15;
+ if (la_head.length > indx)
+ long_ret = la_head.indices[indx].la15;
+ else
+ long_ret = 0;
return (u_char *) & long_ret;
case DISKIO_NREADX:
*var_len = sizeof(struct counter64);
diff -up net-snmp-5.7.2/man/snmpd.conf.5.def.test net-snmp-5.7.2/man/snmpd.conf.5.def
--- net-snmp-5.7.2/man/snmpd.conf.5.def.test 2015-06-18 15:13:31.249470179 +0200
+++ net-snmp-5.7.2/man/snmpd.conf.5.def 2015-06-18 15:16:45.481423115 +0200
@@ -715,6 +715,15 @@ e.g. "loop0"
.IP "diskio_exclude_ram yes"
Excludes all LInux ramdisk block devices, whose names start with "ram", e.g.
"ram0"
+.PP
+On Linux systems, it is possible to report only explicitly whitelisted
+devices. It may take significant amount of time to process diskIOTable data
+on systems with tens of thousands of block devices and whitelisting only the
+important ones avoids large CPU consumption.
+.IP "diskio <device>"
+Enables whitelisting of devices and adds the device to the whitelist. Only
+explicitly whitelisted devices will be reported. This option may be used
+multiple times.
.SS System Load Monitoring
This requires that the agent was built with support for either the
\fIucd\-snmp/loadave\fR module or the \fIucd\-snmp/memory\fR module
commit 59f9f3387dab4238114804a0be9e4c15667d868c
Author: Jan Safranek <jsafranek@users.sourceforge.net>
Date: Fri Jun 19 09:29:06 2015 +0200
Fixed memory leak on realloc failure.
Found by Coverity.
diff --git a/agent/mibgroup/ucd-snmp/diskio.c b/agent/mibgroup/ucd-snmp/diskio.c
index f04d5c5..58163d8 100644
--- a/agent/mibgroup/ucd-snmp/diskio.c
+++ b/agent/mibgroup/ucd-snmp/diskio.c
@@ -405,13 +405,17 @@ add_device(char *path, int addNewDisks )
}
memset(disks, 0, maxdisks * sizeof(struct diskiopart));
} else {
+ struct diskiopart *newdisks;
maxdisks *= 2;
- disks = realloc(disks, maxdisks * sizeof(struct diskiopart));
- if (!disks) {
+ newdisks = realloc(disks, maxdisks * sizeof(struct diskiopart));
+ if (!newdisks) {
+ free(disks);
+ disks = NULL;
config_perror("malloc failed for new disko allocation.");
netsnmp_config_error("\tignoring: %s", path);
return;
}
+ disks = newdisks;
memset(disks + maxdisks/2, 0, maxdisks/2 * sizeof(struct diskiopart));
}
}