Blame SOURCES/0025-Issue-4414-disk-monitoring-prevent-division-by-zero-.patch

fec594
From 3c74f736c657d007770fe866842b08d0a74772ca Mon Sep 17 00:00:00 2001
fec594
From: Mark Reynolds <mreynolds@redhat.com>
fec594
Date: Wed, 9 Dec 2020 15:21:11 -0500
fec594
Subject: [PATCH 6/6] Issue 4414 - disk monitoring - prevent division by zero
fec594
 crash
fec594
fec594
Bug Description:  If a disk mount has zero total space or zero used
fec594
                  space then a division by zero can occur and the
fec594
                  server will crash.
fec594
fec594
                  It has also been observed that sometimes a system
fec594
                  can return the wrong disk entirely, and when that
fec594
                  happens the incorrect disk also has zero available
fec594
                  space which triggers the disk monitioring thread to
fec594
                  immediately shut the server down.
fec594
fec594
Fix Description:  Check the total and used space for zero and do not
fec594
                  divide, just ignore it.  As a preemptive measure
fec594
                  ignore disks from /dev, /proc, /sys (except /dev/shm).
fec594
                  Yes it's a bit hacky, but the true underlying cause
fec594
                  is not known yet.  So better to be safe than sorry.
fec594
fec594
Relates: https://github.com/389ds/389-ds-base/issues/4414
fec594
fec594
Reviewed by: firstyear(Thanks!)
fec594
---
fec594
 ldap/servers/slapd/daemon.c  | 22 +++++++++++++++++++++-
fec594
 ldap/servers/slapd/monitor.c | 13 +++++--------
fec594
 2 files changed, 26 insertions(+), 9 deletions(-)
fec594
fec594
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
fec594
index 691f77570..bfd965263 100644
fec594
--- a/ldap/servers/slapd/daemon.c
fec594
+++ b/ldap/servers/slapd/daemon.c
fec594
@@ -221,7 +221,27 @@ disk_mon_get_mount_point(char *dir)
fec594
         }
fec594
         if (s.st_dev == dev_id) {
fec594
             endmntent(fp);
fec594
-            return (slapi_ch_strdup(mnt->mnt_dir));
fec594
+
fec594
+            if ((strncmp(mnt->mnt_dir, "/dev", 4) == 0 && strncmp(mnt->mnt_dir, "/dev/shm", 8) != 0) ||
fec594
+                strncmp(mnt->mnt_dir, "/proc", 4) == 0 ||
fec594
+                strncmp(mnt->mnt_dir, "/sys", 4) == 0)
fec594
+            {
fec594
+                /*
fec594
+                 * Ignore "mount directories" starting with /dev (except
fec594
+                 * /dev/shm), /proc, /sys  For some reason these mounts are
fec594
+                 * occasionally/incorrectly returned.  Only seen this at a
fec594
+                 * customer site once.  When it happens it causes disk
fec594
+                 * monitoring to think the server has 0 disk space left, and
fec594
+                 * it abruptly/unexpectedly shuts the server down.  At this
fec594
+                 * point it looks like a bug in stat(), setmntent(), or
fec594
+                 * getmntent(), but there is no way to prove that since there
fec594
+                 * is no way to reproduce the original issue.  For now just
fec594
+                 * return NULL to be safe.
fec594
+                 */
fec594
+                return NULL;
fec594
+            } else {
fec594
+                return (slapi_ch_strdup(mnt->mnt_dir));
fec594
+            }
fec594
         }
fec594
     }
fec594
     endmntent(fp);
fec594
diff --git a/ldap/servers/slapd/monitor.c b/ldap/servers/slapd/monitor.c
fec594
index 562721bed..65f082986 100644
fec594
--- a/ldap/servers/slapd/monitor.c
fec594
+++ b/ldap/servers/slapd/monitor.c
fec594
@@ -131,7 +131,6 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)),
fec594
 {
fec594
     int32_t rc = LDAP_SUCCESS;
fec594
     char **dirs = NULL;
fec594
-    char buf[BUFSIZ];
fec594
     struct berval val;
fec594
     struct berval *vals[2];
fec594
     uint64_t total_space;
fec594
@@ -143,15 +142,13 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)),
fec594
 
fec594
     disk_mon_get_dirs(&dirs);
fec594
 
fec594
-    for (uint16_t i = 0; dirs && dirs[i]; i++) {
fec594
+    for (size_t i = 0; dirs && dirs[i]; i++) {
fec594
+    	char buf[BUFSIZ] = {0};
fec594
         rc = disk_get_info(dirs[i], &total_space, &avail_space, &used_space);
fec594
-        if (rc) {
fec594
-            slapi_log_err(SLAPI_LOG_WARNING, "monitor_disk_info",
fec594
-                          "Unable to get 'cn=disk space,cn=monitor' stats for %s\n", dirs[i]);
fec594
-        } else {
fec594
+        if (rc == 0 && total_space > 0 && used_space > 0) {
fec594
             val.bv_len = snprintf(buf, sizeof(buf),
fec594
-                                  "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"",
fec594
-                                  dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space);
fec594
+                    "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"",
fec594
+                    dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space);
fec594
             val.bv_val = buf;
fec594
             attrlist_merge(&e->e_attrs, "dsDisk", vals);
fec594
         }
fec594
-- 
fec594
2.26.2
fec594