Blame SOURCES/0016-Feature-make-timeout-action-executed-by-sbd-configur.patch

6f8c14
From b64c30af56e7eabd63ce1db25bc5ed9b953485af Mon Sep 17 00:00:00 2001
6f8c14
From: Klaus Wenninger <klaus.wenninger@aon.at>
6f8c14
Date: Fri, 23 Nov 2018 14:09:22 +0100
6f8c14
Subject: [PATCH] Feature: make timeout-action executed by sbd configurable
6f8c14
6f8c14
---
6f8c14
 man/sbd.8.pod        | 19 +++++++++++++++++++
6f8c14
 src/sbd-common.c     | 22 ++++++++++++++++------
6f8c14
 src/sbd-inquisitor.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++---
6f8c14
 src/sbd-md.c         |  2 +-
6f8c14
 src/sbd.h            |  3 +++
6f8c14
 src/sbd.sysconfig    | 18 ++++++++++++++++++
6f8c14
 6 files changed, 107 insertions(+), 10 deletions(-)
6f8c14
6f8c14
diff --git a/man/sbd.8.pod b/man/sbd.8.pod
6f8c14
index ffd01c2..dbb3855 100644
6f8c14
--- a/man/sbd.8.pod
6f8c14
+++ b/man/sbd.8.pod
6f8c14
@@ -333,6 +333,23 @@ prevent a successful crashdump from ever being written.
6f8c14
 
6f8c14
 Defaults to 240 seconds. Set to zero to disable.
6f8c14
 
6f8c14
+=item B<-r> I<N>
6f8c14
+
6f8c14
+Actions to be executed when the watchers don't timely report to the sbd
6f8c14
+master process or one of the watchers detects that the master process
6f8c14
+has died.
6f8c14
+
6f8c14
+Set timeout-action to comma-separated combination of
6f8c14
+noflush|flush plus reboot|crashdump|off.
6f8c14
+If just one of both is given the other stays at the default.
6f8c14
+
6f8c14
+This doesn't affect actions like off, crashdump, reboot explicitly
6f8c14
+triggered via message slots.
6f8c14
+And it does as well not configure the action a watchdog would
6f8c14
+trigger should it run off (there is no generic interface).
6f8c14
+
6f8c14
+Defaults to flush,reboot.
6f8c14
+
6f8c14
 =back
6f8c14
 
6f8c14
 =head2 allocate
6f8c14
@@ -552,6 +569,8 @@ options to pass to the daemon:
6f8c14
 
6f8c14
 C<sbd> will fail to start if no C<SBD_DEVICE> is specified. See the
6f8c14
 installed template for more options that can be configured here.
6f8c14
+In general configuration done via parameters takes precedence over
6f8c14
+the configuration from the configuration file.
6f8c14
 
6f8c14
 =head2 Testing the sbd installation
6f8c14
 
6f8c14
diff --git a/src/sbd-common.c b/src/sbd-common.c
6f8c14
index cc84cd0..0e8be65 100644
6f8c14
--- a/src/sbd-common.c
6f8c14
+++ b/src/sbd-common.c
6f8c14
@@ -98,6 +98,8 @@ usage(void)
6f8c14
 "			(default is 1, set to 0 to disable)\n"
6f8c14
 "-P		Check Pacemaker quorum and node health (optional, watch only)\n"
6f8c14
 "-Z		Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n"
6f8c14
+"-r		Set timeout-action to comma-separated combination of\n"
6f8c14
+"		noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n"
6f8c14
 "Commands:\n"
6f8c14
 #if SUPPORT_SHARED_DISK
6f8c14
 "create		initialize N slots on <dev> - OVERWRITES DEVICE!\n"
6f8c14
@@ -769,7 +771,7 @@ sysrq_trigger(char t)
6f8c14
 
6f8c14
 
6f8c14
 static void
6f8c14
-do_exit(char kind) 
6f8c14
+do_exit(char kind, bool do_flush)
6f8c14
 {
6f8c14
     /* TODO: Turn debug_mode into a bit field? Delay + kdump for example */
6f8c14
     const char *reason = NULL;
6f8c14
@@ -814,7 +816,9 @@ do_exit(char kind)
6f8c14
     }
6f8c14
 
6f8c14
     cl_log(LOG_EMERG, "Rebooting system: %s", reason);
6f8c14
-    sync();
6f8c14
+    if (do_flush) {
6f8c14
+        sync();
6f8c14
+    }
6f8c14
 
6f8c14
     if(kind == 'c') {
6f8c14
         watchdog_close(true);
6f8c14
@@ -834,19 +838,25 @@ do_exit(char kind)
6f8c14
 void
6f8c14
 do_crashdump(void)
6f8c14
 {
6f8c14
-    do_exit('c');
6f8c14
+    do_exit('c', true);
6f8c14
 }
6f8c14
 
6f8c14
 void
6f8c14
 do_reset(void)
6f8c14
 {
6f8c14
-    do_exit('b');
6f8c14
+    do_exit('b', true);
6f8c14
 }
6f8c14
 
6f8c14
 void
6f8c14
 do_off(void)
6f8c14
 {
6f8c14
-    do_exit('o');
6f8c14
+    do_exit('o', true);
6f8c14
+}
6f8c14
+
6f8c14
+void
6f8c14
+do_timeout_action(void)
6f8c14
+{
6f8c14
+	do_exit(timeout_sysrq_char, do_flush);
6f8c14
 }
6f8c14
 
6f8c14
 /*
6f8c14
@@ -980,7 +990,7 @@ notify_parent(void)
6f8c14
         /* Our parent died unexpectedly. Triggering
6f8c14
          * self-fence. */
6f8c14
         cl_log(LOG_WARNING, "Our parent is dead.");
6f8c14
-        do_reset();
6f8c14
+        do_timeout_action();
6f8c14
     }
6f8c14
 
6f8c14
     switch (servant_health) {
6f8c14
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
6f8c14
index 9b193d4..8e0bc87 100644
6f8c14
--- a/src/sbd-inquisitor.c
6f8c14
+++ b/src/sbd-inquisitor.c
6f8c14
@@ -31,6 +31,8 @@ int	servant_restart_interval = 5;
6f8c14
 int	servant_restart_count = 1;
6f8c14
 int	start_mode = 0;
6f8c14
 char*	pidfile = NULL;
6f8c14
+bool do_flush = true;
6f8c14
+char timeout_sysrq_char = 'b';
6f8c14
 
6f8c14
 int parse_device_line(const char *line);
6f8c14
 
6f8c14
@@ -655,7 +657,7 @@ void inquisitor_child(void)
6f8c14
 				/* At level 2 or above, we do nothing, but expect
6f8c14
 				 * things to eventually return to
6f8c14
 				 * normal. */
6f8c14
-				do_reset();
6f8c14
+				do_timeout_action();
6f8c14
 			} else {
6f8c14
 				cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!");
6f8c14
 			}
6f8c14
@@ -668,7 +670,7 @@ void inquisitor_child(void)
6f8c14
 
6f8c14
                         if (debug_mode && watchdog_use) {
6f8c14
                             /* In debug mode, trigger a reset before the watchdog can panic the machine */
6f8c14
-                            do_reset();
6f8c14
+                            do_timeout_action();
6f8c14
                         }
6f8c14
 		}
6f8c14
 
6f8c14
@@ -833,6 +835,7 @@ int main(int argc, char **argv, char **envp)
6f8c14
         int qb_facility;
6f8c14
         const char *value = NULL;
6f8c14
         int start_delay = 0;
6f8c14
+        char *timeout_action = NULL;
6f8c14
 
6f8c14
 	if ((cmdname = strrchr(argv[0], '/')) == NULL) {
6f8c14
 		cmdname = argv[0];
6f8c14
@@ -928,7 +931,12 @@ int main(int argc, char **argv, char **envp)
6f8c14
         }
6f8c14
         cl_log(LOG_DEBUG, "Start delay: %d (%s)", (int)start_delay, value?value:"default");
6f8c14
 
6f8c14
-	while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:")) != -1) {
6f8c14
+        value = getenv("SBD_TIMEOUT_ACTION");
6f8c14
+        if(value) {
6f8c14
+            timeout_action = strdup(value);
6f8c14
+        }
6f8c14
+
6f8c14
+	while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) {
6f8c14
 		switch (c) {
6f8c14
 		case 'D':
6f8c14
 			break;
6f8c14
@@ -1043,6 +1051,12 @@ int main(int argc, char **argv, char **envp)
6f8c14
 			cl_log(LOG_INFO, "Servant restart count set to %d",
6f8c14
 					(int)servant_restart_count);
6f8c14
 			break;
6f8c14
+		case 'r':
6f8c14
+			if (timeout_action) {
6f8c14
+				free(timeout_action);
6f8c14
+			}
6f8c14
+			timeout_action = strdup(optarg);
6f8c14
+			break;
6f8c14
 		case 'h':
6f8c14
 			usage();
6f8c14
 			return (0);
6f8c14
@@ -1101,6 +1115,39 @@ int main(int argc, char **argv, char **envp)
6f8c14
 		goto out;
6f8c14
 	}
6f8c14
 
6f8c14
+	if (timeout_action) {
6f8c14
+		char *p[2];
6f8c14
+		int i;
6f8c14
+		char c;
6f8c14
+		int nrflags = sscanf(timeout_action, "%m[a-z],%m[a-z]%c", &p[0], &p[1], &c);
6f8c14
+		bool parse_error = (nrflags < 1) || (nrflags > 2);
6f8c14
+
6f8c14
+		for (i = 0; (i < nrflags) && (i < 2); i++) {
6f8c14
+			if (!strcmp(p[i], "reboot")) {
6f8c14
+				timeout_sysrq_char = 'b';
6f8c14
+			} else if (!strcmp(p[i], "crashdump")) {
6f8c14
+				timeout_sysrq_char = 'c';
6f8c14
+			} else if (!strcmp(p[i], "off")) {
6f8c14
+				timeout_sysrq_char = 'o';
6f8c14
+			} else if (!strcmp(p[i], "flush")) {
6f8c14
+				do_flush = true;
6f8c14
+			} else if (!strcmp(p[i], "noflush")) {
6f8c14
+				do_flush = false;
6f8c14
+			} else {
6f8c14
+				parse_error = true;
6f8c14
+			}
6f8c14
+			free(p[i]);
6f8c14
+		}
6f8c14
+		if (parse_error) {
6f8c14
+			fprintf(stderr, "Failed to parse timeout-action \"%s\".\n",
6f8c14
+				timeout_action);
6f8c14
+			exit_status = -1;
6f8c14
+			goto out;
6f8c14
+		}
6f8c14
+	}
6f8c14
+	cl_log(LOG_NOTICE, "%s flush + writing \'%c\' to sysrq on timeout",
6f8c14
+		do_flush?"Doing":"Skipping", timeout_sysrq_char);
6f8c14
+
6f8c14
 #if SUPPORT_SHARED_DISK
6f8c14
 	if (strcmp(argv[optind], "create") == 0) {
6f8c14
 		exit_status = init_devices(servants_leader);
6f8c14
diff --git a/src/sbd-md.c b/src/sbd-md.c
6f8c14
index a736118..579d273 100644
6f8c14
--- a/src/sbd-md.c
6f8c14
+++ b/src/sbd-md.c
6f8c14
@@ -1149,7 +1149,7 @@ int servant(const char *diskname, int mode, const void* argp)
6f8c14
 		if (ppid == 1) {
6f8c14
 			/* Our parent died unexpectedly. Triggering
6f8c14
 			 * self-fence. */
6f8c14
-			do_reset();
6f8c14
+			do_timeout_action();
6f8c14
 		}
6f8c14
 
6f8c14
 		/* These attempts are, by definition, somewhat racy. If
6f8c14
diff --git a/src/sbd.h b/src/sbd.h
6f8c14
index 0f8847a..386c85c 100644
6f8c14
--- a/src/sbd.h
6f8c14
+++ b/src/sbd.h
6f8c14
@@ -130,6 +130,7 @@ void sysrq_trigger(char t);
6f8c14
 void do_crashdump(void);
6f8c14
 void do_reset(void);
6f8c14
 void do_off(void);
6f8c14
+void do_timeout_action(void);
6f8c14
 pid_t make_daemon(void);
6f8c14
 void maximize_priority(void);
6f8c14
 void sbd_get_uname(void);
6f8c14
@@ -153,6 +154,8 @@ extern int  debug_mode;
6f8c14
 extern char *watchdogdev;
6f8c14
 extern bool watchdogdev_is_default;
6f8c14
 extern char*  local_uname;
6f8c14
+extern bool do_flush;
6f8c14
+extern char timeout_sysrq_char;
6f8c14
 
6f8c14
 /* Global, non-tunable variables: */
6f8c14
 extern int  sector_size;
6f8c14
diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig
6f8c14
index c6d7c07..8f38426 100644
6f8c14
--- a/src/sbd.sysconfig
6f8c14
+++ b/src/sbd.sysconfig
6f8c14
@@ -71,6 +71,24 @@ SBD_WATCHDOG_DEV=/dev/watchdog
6f8c14
 SBD_WATCHDOG_TIMEOUT=5
6f8c14
 
6f8c14
 ## Type: string
6f8c14
+## Default: "flush,reboot"
6f8c14
+#
6f8c14
+# Actions to be executed when the watchers don't timely report to the sbd
6f8c14
+# master process or one of the watchers detects that the master process
6f8c14
+# has died.
6f8c14
+#
6f8c14
+# Set timeout-action to comma-separated combination of
6f8c14
+# noflush|flush plus reboot|crashdump|off.
6f8c14
+# If just one of both is given the other stays at the default.
6f8c14
+#
6f8c14
+# This doesn't affect actions like off, crashdump, reboot explicitly
6f8c14
+# triggered via message slots.
6f8c14
+# And it does as well not configure the action a watchdog would
6f8c14
+# trigger should it run off (there is no generic interface).
6f8c14
+#
6f8c14
+SBD_TIMEOUT_ACTION=flush,reboot
6f8c14
+
6f8c14
+## Type: string
6f8c14
 ## Default: ""
6f8c14
 #
6f8c14
 # Additional options for starting sbd
6f8c14
-- 
6f8c14
1.8.3.1
6f8c14