Blame SOURCES/0025-Issue-4414-disk-monitoring-prevent-division-by-zero-.patch

be9751
From 3c74f736c657d007770fe866842b08d0a74772ca Mon Sep 17 00:00:00 2001
be9751
From: Mark Reynolds <mreynolds@redhat.com>
be9751
Date: Wed, 9 Dec 2020 15:21:11 -0500
be9751
Subject: [PATCH 6/6] Issue 4414 - disk monitoring - prevent division by zero
be9751
 crash
be9751
be9751
Bug Description:  If a disk mount has zero total space or zero used
be9751
                  space then a division by zero can occur and the
be9751
                  server will crash.
be9751
be9751
                  It has also been observed that sometimes a system
be9751
                  can return the wrong disk entirely, and when that
be9751
                  happens the incorrect disk also has zero available
be9751
                  space which triggers the disk monitioring thread to
be9751
                  immediately shut the server down.
be9751
be9751
Fix Description:  Check the total and used space for zero and do not
be9751
                  divide, just ignore it.  As a preemptive measure
be9751
                  ignore disks from /dev, /proc, /sys (except /dev/shm).
be9751
                  Yes it's a bit hacky, but the true underlying cause
be9751
                  is not known yet.  So better to be safe than sorry.
be9751
be9751
Relates: https://github.com/389ds/389-ds-base/issues/4414
be9751
be9751
Reviewed by: firstyear(Thanks!)
be9751
---
be9751
 ldap/servers/slapd/daemon.c  | 22 +++++++++++++++++++++-
be9751
 ldap/servers/slapd/monitor.c | 13 +++++--------
be9751
 2 files changed, 26 insertions(+), 9 deletions(-)
be9751
be9751
diff --git a/ldap/servers/slapd/daemon.c b/ldap/servers/slapd/daemon.c
be9751
index 691f77570..bfd965263 100644
be9751
--- a/ldap/servers/slapd/daemon.c
be9751
+++ b/ldap/servers/slapd/daemon.c
be9751
@@ -221,7 +221,27 @@ disk_mon_get_mount_point(char *dir)
be9751
         }
be9751
         if (s.st_dev == dev_id) {
be9751
             endmntent(fp);
be9751
-            return (slapi_ch_strdup(mnt->mnt_dir));
be9751
+
be9751
+            if ((strncmp(mnt->mnt_dir, "/dev", 4) == 0 && strncmp(mnt->mnt_dir, "/dev/shm", 8) != 0) ||
be9751
+                strncmp(mnt->mnt_dir, "/proc", 4) == 0 ||
be9751
+                strncmp(mnt->mnt_dir, "/sys", 4) == 0)
be9751
+            {
be9751
+                /*
be9751
+                 * Ignore "mount directories" starting with /dev (except
be9751
+                 * /dev/shm), /proc, /sys  For some reason these mounts are
be9751
+                 * occasionally/incorrectly returned.  Only seen this at a
be9751
+                 * customer site once.  When it happens it causes disk
be9751
+                 * monitoring to think the server has 0 disk space left, and
be9751
+                 * it abruptly/unexpectedly shuts the server down.  At this
be9751
+                 * point it looks like a bug in stat(), setmntent(), or
be9751
+                 * getmntent(), but there is no way to prove that since there
be9751
+                 * is no way to reproduce the original issue.  For now just
be9751
+                 * return NULL to be safe.
be9751
+                 */
be9751
+                return NULL;
be9751
+            } else {
be9751
+                return (slapi_ch_strdup(mnt->mnt_dir));
be9751
+            }
be9751
         }
be9751
     }
be9751
     endmntent(fp);
be9751
diff --git a/ldap/servers/slapd/monitor.c b/ldap/servers/slapd/monitor.c
be9751
index 562721bed..65f082986 100644
be9751
--- a/ldap/servers/slapd/monitor.c
be9751
+++ b/ldap/servers/slapd/monitor.c
be9751
@@ -131,7 +131,6 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)),
be9751
 {
be9751
     int32_t rc = LDAP_SUCCESS;
be9751
     char **dirs = NULL;
be9751
-    char buf[BUFSIZ];
be9751
     struct berval val;
be9751
     struct berval *vals[2];
be9751
     uint64_t total_space;
be9751
@@ -143,15 +142,13 @@ monitor_disk_info (Slapi_PBlock *pb __attribute__((unused)),
be9751
 
be9751
     disk_mon_get_dirs(&dirs);
be9751
 
be9751
-    for (uint16_t i = 0; dirs && dirs[i]; i++) {
be9751
+    for (size_t i = 0; dirs && dirs[i]; i++) {
be9751
+    	char buf[BUFSIZ] = {0};
be9751
         rc = disk_get_info(dirs[i], &total_space, &avail_space, &used_space);
be9751
-        if (rc) {
be9751
-            slapi_log_err(SLAPI_LOG_WARNING, "monitor_disk_info",
be9751
-                          "Unable to get 'cn=disk space,cn=monitor' stats for %s\n", dirs[i]);
be9751
-        } else {
be9751
+        if (rc == 0 && total_space > 0 && used_space > 0) {
be9751
             val.bv_len = snprintf(buf, sizeof(buf),
be9751
-                                  "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"",
be9751
-                                  dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space);
be9751
+                    "partition=\"%s\" size=\"%" PRIu64 "\" used=\"%" PRIu64 "\" available=\"%" PRIu64 "\" use%%=\"%" PRIu64 "\"",
be9751
+                    dirs[i], total_space, used_space, avail_space, used_space * 100 / total_space);
be9751
             val.bv_val = buf;
be9751
             attrlist_merge(&e->e_attrs, "dsDisk", vals);
be9751
         }
be9751
-- 
be9751
2.26.2
be9751