From 3c74f736c657d007770fe866842b08d0a74772ca Mon Sep 17 00:00:00 2001 From: Mark Reynolds Date: Wed, 9 Dec 2020 15:21:11 -0500 Subject: [PATCH 6/6] Issue 4414 - disk monitoring - prevent division by zero crash Bug Description: If a disk mount has zero total space or zero used space then a division by zero can occur and the server will crash. It has also been observed that sometimes a system can return the wrong disk entirely, and when that happens the incorrect disk also has zero available space which triggers the disk monitioring thread to immediately shut the server down. Fix Description: Check the total and used space for zero and do not divide, just ignore it. As a preemptive measure ignore disks from /dev, /proc, /sys (except /dev/shm). Yes it's a bit hacky, but the true underlying cause is not known yet. So better to be safe than sorry. Relates: https://github.com/389ds/389-ds-base/issues/4414 Reviewed by: firstyear(Thanks!) --- ldap/servers/slapd/daemon.c | 22 +++++++++++++++++++++- ldap/servers/slapd/monitor.c | 13 +++++-------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c index 691f77570..bfd965263 100644 --- a/ldap/servers/slapd/daemon.c +++ b/ldap/servers/slapd/daemon.c @@ -221,7 +221,27 @@ disk_mon_get_mount_point(char *dir) } if (s.st_dev == dev_id) { endmntent(fp); - return (slapi_ch_strdup(mnt->mnt_dir)); + + if ((strncmp(mnt->mnt_dir, "/dev", 4) == 0 && strncmp(mnt->mnt_dir, "/dev/shm", 8) != 0) || + strncmp(mnt->mnt_dir, "/proc", 4) == 0 || + strncmp(mnt->mnt_dir, "/sys", 4) == 0) + { + /* + * Ignore "mount directories" starting with /dev (except + * /dev/shm), /proc, /sys For some reason these mounts are + * occasionally/incorrectly returned. Only seen this at a + * customer site once. When it happens it causes disk + * monitoring to think the server has 0 disk space left, and + * it abruptly/unexpectedly shuts the server down. At this + * point it looks like a bug in stat(), setmntent(), or + * getmntent(), but there is no way to prove that since there + * is no way to reproduce the original issue. For now just + * return NULL to be safe. + */ + return NULL; + } else { + return (slapi_ch_strdup(mnt->mnt_dir)); + } } } endmntent(fp); diff --git a/ldap/servers/slapd/monitor.c b/ldap/servers/slapd/monitor.c index 562721bed..65f082986 100644 --- a/ldap/servers/slapd/monitor.c +++ b/ldap/servers/slapd/monitor.c @@ -131,7 +131,6 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)), { int32_t rc = LDAP_SUCCESS; char **dirs = NULL; - char buf[BUFSIZ]; struct berval val; struct berval *vals[2]; uint64_t total_space; @@ -143,15 +142,13 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)), disk_mon_get_dirs(&dirs); - for (uint16_t i = 0; dirs && dirs[i]; i++) { + for (size_t i = 0; dirs && dirs[i]; i++) { + char buf[BUFSIZ] = {0}; rc = disk_get_info(dirs[i], &total_space, &avail_space, &used_space); - if (rc) { - slapi_log_err(SLAPI_LOG_WARNING, "monitor_disk_info", - "Unable to get 'cn=disk space,cn=monitor' stats for %s\n", dirs[i]); - } else { + if (rc == 0 && total_space > 0 && used_space > 0) { val.bv_len = snprintf(buf, sizeof(buf), - "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"", - dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space); + "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"", + dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space); val.bv_val = buf; attrlist_merge(&e->e_attrs, "dsDisk", vals); } -- 2.26.2