Blame SOURCES/0025-Issue-4414-disk-monitoring-prevent-division-by-zero-.patch

3280a9
From 3c74f736c657d007770fe866842b08d0a74772ca Mon Sep 17 00:00:00 2001
3280a9
From: Mark Reynolds <mreynolds@redhat.com>
3280a9
Date: Wed, 9 Dec 2020 15:21:11 -0500
3280a9
Subject: [PATCH 6/6] Issue 4414 - disk monitoring - prevent division by zero
3280a9
 crash
3280a9
3280a9
Bug Description:  If a disk mount has zero total space or zero used
3280a9
                  space then a division by zero can occur and the
3280a9
                  server will crash.
3280a9
3280a9
                  It has also been observed that sometimes a system
3280a9
                  can return the wrong disk entirely, and when that
3280a9
                  happens the incorrect disk also has zero available
3280a9
                  space which triggers the disk monitioring thread to
3280a9
                  immediately shut the server down.
3280a9
3280a9
Fix Description:  Check the total and used space for zero and do not
3280a9
                  divide, just ignore it.  As a preemptive measure
3280a9
                  ignore disks from /dev, /proc, /sys (except /dev/shm).
3280a9
                  Yes it's a bit hacky, but the true underlying cause
3280a9
                  is not known yet.  So better to be safe than sorry.
3280a9
3280a9
Relates: https://github.com/389ds/389-ds-base/issues/4414
3280a9
3280a9
Reviewed by: firstyear(Thanks!)
3280a9
---
3280a9
 ldap/servers/slapd/daemon.c  | 22 +++++++++++++++++++++-
3280a9
 ldap/servers/slapd/monitor.c | 13 +++++--------
3280a9
 2 files changed, 26 insertions(+), 9 deletions(-)
3280a9
3280a9
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
3280a9
index 691f77570..bfd965263 100644
3280a9
--- a/ldap/servers/slapd/daemon.c
3280a9
+++ b/ldap/servers/slapd/daemon.c
3280a9
@@ -221,7 +221,27 @@ disk_mon_get_mount_point(char *dir)
3280a9
         }
3280a9
         if (s.st_dev == dev_id) {
3280a9
             endmntent(fp);
3280a9
-            return (slapi_ch_strdup(mnt->mnt_dir));
3280a9
+
3280a9
+            if ((strncmp(mnt->mnt_dir, "/dev", 4) == 0 && strncmp(mnt->mnt_dir, "/dev/shm", 8) != 0) ||
3280a9
+                strncmp(mnt->mnt_dir, "/proc", 4) == 0 ||
3280a9
+                strncmp(mnt->mnt_dir, "/sys", 4) == 0)
3280a9
+            {
3280a9
+                /*
3280a9
+                 * Ignore "mount directories" starting with /dev (except
3280a9
+                 * /dev/shm), /proc, /sys  For some reason these mounts are
3280a9
+                 * occasionally/incorrectly returned.  Only seen this at a
3280a9
+                 * customer site once.  When it happens it causes disk
3280a9
+                 * monitoring to think the server has 0 disk space left, and
3280a9
+                 * it abruptly/unexpectedly shuts the server down.  At this
3280a9
+                 * point it looks like a bug in stat(), setmntent(), or
3280a9
+                 * getmntent(), but there is no way to prove that since there
3280a9
+                 * is no way to reproduce the original issue.  For now just
3280a9
+                 * return NULL to be safe.
3280a9
+                 */
3280a9
+                return NULL;
3280a9
+            } else {
3280a9
+                return (slapi_ch_strdup(mnt->mnt_dir));
3280a9
+            }
3280a9
         }
3280a9
     }
3280a9
     endmntent(fp);
3280a9
diff --git a/ldap/servers/slapd/monitor.c b/ldap/servers/slapd/monitor.c
3280a9
index 562721bed..65f082986 100644
3280a9
--- a/ldap/servers/slapd/monitor.c
3280a9
+++ b/ldap/servers/slapd/monitor.c
3280a9
@@ -131,7 +131,6 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)),
3280a9
 {
3280a9
     int32_t rc = LDAP_SUCCESS;
3280a9
     char **dirs = NULL;
3280a9
-    char buf[BUFSIZ];
3280a9
     struct berval val;
3280a9
     struct berval *vals[2];
3280a9
     uint64_t total_space;
3280a9
@@ -143,15 +142,13 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)),
3280a9
 
3280a9
     disk_mon_get_dirs(&dirs);
3280a9
 
3280a9
-    for (uint16_t i = 0; dirs && dirs[i]; i++) {
3280a9
+    for (size_t i = 0; dirs && dirs[i]; i++) {
3280a9
+    	char buf[BUFSIZ] = {0};
3280a9
         rc = disk_get_info(dirs[i], &total_space, &avail_space, &used_space);
3280a9
-        if (rc) {
3280a9
-            slapi_log_err(SLAPI_LOG_WARNING, "monitor_disk_info",
3280a9
-                          "Unable to get 'cn=disk space,cn=monitor' stats for %s\n", dirs[i]);
3280a9
-        } else {
3280a9
+        if (rc == 0 && total_space > 0 && used_space > 0) {
3280a9
             val.bv_len = snprintf(buf, sizeof(buf),
3280a9
-                                  "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"",
3280a9
-                                  dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space);
3280a9
+                    "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"",
3280a9
+                    dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space);
3280a9
             val.bv_val = buf;
3280a9
             attrlist_merge(&e->e_attrs, "dsDisk", vals);
3280a9
         }
3280a9
-- 
3280a9
2.26.2
3280a9