Blob Blame History Raw
From eaeed6cca46a0223617ead834aaa576dd5ad07ff Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Fri, 31 May 2019 16:11:16 +0200
Subject: [PATCH] Fix: sbd-common: query rt-budget > 0 otherwise try moving to
 root-slice

---
 src/sbd-common.c     | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/sbd-inquisitor.c |  15 +++++++
 src/sbd.h            |   2 +
 src/sbd.sysconfig    |  14 +++++++
 4 files changed, 141 insertions(+)

diff --git a/src/sbd-common.c b/src/sbd-common.c
index 873a76e..ebfdaa3 100644
--- a/src/sbd-common.c
+++ b/src/sbd-common.c
@@ -662,6 +662,112 @@ static void sbd_memlock(int stackgrowK, int heapgrowK)
 #endif
 }
 
+static int get_realtime_budget(void)
+{
+    FILE *f;
+    char fname[PATH_MAX];
+    int res = -1, lnum = 0;
+    char *cgroup = NULL, *namespecs = NULL;
+
+    snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid());
+    f = fopen(fname, "rt");
+    if (f == NULL) {
+        cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd",
+                            (intmax_t)getpid());
+        goto exit_res;
+    }
+    while( fscanf(f, "%d:%m[^:]:%m[^\n]", &lnum,  &namespecs, &cgroup) !=EOF ) {
+        if (namespecs && strstr(namespecs, "cpuacct")) {
+            free(namespecs);
+            break;
+        }
+        if (cgroup) {
+            free(cgroup);
+            cgroup = NULL;
+        }
+        if (namespecs) {
+            free(namespecs);
+            namespecs = NULL;
+        }
+    }
+    fclose(f);
+    if (cgroup == NULL) {
+        cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd",
+                            (intmax_t)getpid());
+        goto exit_res;
+    }
+    snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us",
+                              cgroup);
+    f = fopen(fname, "rt");
+    if (f == NULL) {
+        cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but "
+            "doesn't for '%s'", cgroup);
+        goto exit_res;
+    }
+    if (fscanf(f, "%d", &res) != 1) {
+        cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname);
+    } else {
+        cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res);
+    }
+    fclose(f);
+
+exit_res:
+    if (cgroup) {
+        free(cgroup);
+    }
+    return res;
+}
+
+/* stolen from corosync */
+static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) {
+    FILE *f;
+    int res = -1;
+
+    /*
+     * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now
+     * using systemd and systemd uses hardcoded path of cgroup mount point.
+     *
+     * This feature is expected to be removed as soon as systemd gets support
+     * for managing RT configuration.
+     */
+    f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt");
+    if (f == NULL) {
+        cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> "
+            "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED");
+        res = 0;
+        goto exit_res;
+    }
+    fclose(f);
+
+    if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) {
+        cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are "
+                          "-> skip moving to root-slice");
+        res = 0;
+        goto exit_res;
+    }
+
+    f = fopen("/sys/fs/cgroup/cpu/tasks", "w");
+    if (f == NULL) {
+        cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing");
+
+        goto exit_res;
+    }
+
+    if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) {
+        cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file");
+        goto close_and_exit_res;
+    }
+
+close_and_exit_res:
+    if (fclose(f) != 0) {
+        cl_log(LOG_WARNING, "Can't close cgroups tasks file");
+        goto exit_res;
+    }
+
+exit_res:
+    return (res);
+}
+
 void
 sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
 {
@@ -670,6 +776,10 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK)
     }
 
 #ifdef SCHED_RR
+    if (move_to_root_cgroup) {
+        sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup);
+    }
+
     {
         int pcurrent = 0;
         int pmin = sched_get_priority_min(SCHED_RR);
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
index abde4e5..cef5cc7 100644
--- a/src/sbd-inquisitor.c
+++ b/src/sbd-inquisitor.c
@@ -33,6 +33,8 @@ int	start_mode = 0;
 char*	pidfile = NULL;
 bool do_flush = true;
 char timeout_sysrq_char = 'b';
+bool move_to_root_cgroup = true;
+bool enforce_moving_to_root_cgroup = false;
 
 int parse_device_line(const char *line);
 
@@ -965,6 +967,19 @@ int main(int argc, char **argv, char **envp)
             timeout_action = strdup(value);
         }
 
+        value = getenv("SBD_MOVE_TO_ROOT_CGROUP");
+        if(value) {
+            move_to_root_cgroup = crm_is_true(value);
+
+            if (move_to_root_cgroup) {
+               enforce_moving_to_root_cgroup = true;
+            } else {
+                if (strcmp(value, "auto") == 0) {
+                    move_to_root_cgroup = true;
+                }
+            }
+        }
+
 	while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) {
 		switch (c) {
 		case 'D':
diff --git a/src/sbd.h b/src/sbd.h
index 3b05a11..ac30ec7 100644
--- a/src/sbd.h
+++ b/src/sbd.h
@@ -159,6 +159,8 @@ extern bool watchdogdev_is_default;
 extern char*  local_uname;
 extern bool do_flush;
 extern char timeout_sysrq_char;
+extern bool move_to_root_cgroup;
+extern bool enforce_moving_to_root_cgroup;
 
 /* Global, non-tunable variables: */
 extern int  sector_size;
diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig
index f163f21..e1a60ed 100644
--- a/src/sbd.sysconfig
+++ b/src/sbd.sysconfig
@@ -91,6 +91,20 @@ SBD_WATCHDOG_TIMEOUT=5
 #
 SBD_TIMEOUT_ACTION=flush,reboot
 
+## Type: yesno / auto
+## Default: auto
+#
+# If CPUAccounting is enabled default is not to assign any RT-budget
+# to the system.slice which prevents sbd from running RR-scheduled.
+#
+# One way to escape that issue is to move sbd-processes from the
+# slice they were originally started to root-slice.
+# Of course starting sbd in a certain slice might be intentional.
+# Thus in auto-mode sbd will check if the slice has RT-budget assigned.
+# If that is the case sbd will stay in that slice while it will
+# be moved to root-slice otherwise.
+SBD_MOVE_TO_ROOT_CGROUP=auto
+
 ## Type: string
 ## Default: ""
 #
-- 
1.8.3.1