|
|
6f8c14 |
From b64c30af56e7eabd63ce1db25bc5ed9b953485af Mon Sep 17 00:00:00 2001
|
|
|
6f8c14 |
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
|
|
6f8c14 |
Date: Fri, 23 Nov 2018 14:09:22 +0100
|
|
|
6f8c14 |
Subject: [PATCH] Feature: make timeout-action executed by sbd configurable
|
|
|
6f8c14 |
|
|
|
6f8c14 |
---
|
|
|
6f8c14 |
man/sbd.8.pod | 19 +++++++++++++++++++
|
|
|
6f8c14 |
src/sbd-common.c | 22 ++++++++++++++++------
|
|
|
6f8c14 |
src/sbd-inquisitor.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++---
|
|
|
6f8c14 |
src/sbd-md.c | 2 +-
|
|
|
6f8c14 |
src/sbd.h | 3 +++
|
|
|
6f8c14 |
src/sbd.sysconfig | 18 ++++++++++++++++++
|
|
|
6f8c14 |
6 files changed, 107 insertions(+), 10 deletions(-)
|
|
|
6f8c14 |
|
|
|
6f8c14 |
diff --git a/man/sbd.8.pod b/man/sbd.8.pod
|
|
|
6f8c14 |
index ffd01c2..dbb3855 100644
|
|
|
6f8c14 |
--- a/man/sbd.8.pod
|
|
|
6f8c14 |
+++ b/man/sbd.8.pod
|
|
|
6f8c14 |
@@ -333,6 +333,23 @@ prevent a successful crashdump from ever being written.
|
|
|
6f8c14 |
|
|
|
6f8c14 |
Defaults to 240 seconds. Set to zero to disable.
|
|
|
6f8c14 |
|
|
|
6f8c14 |
+=item B<-r> I<N>
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
+Actions to be executed when the watchers don't timely report to the sbd
|
|
|
6f8c14 |
+master process or one of the watchers detects that the master process
|
|
|
6f8c14 |
+has died.
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
+Set timeout-action to comma-separated combination of
|
|
|
6f8c14 |
+noflush|flush plus reboot|crashdump|off.
|
|
|
6f8c14 |
+If just one of both is given the other stays at the default.
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
+This doesn't affect actions like off, crashdump, reboot explicitly
|
|
|
6f8c14 |
+triggered via message slots.
|
|
|
6f8c14 |
+And it does as well not configure the action a watchdog would
|
|
|
6f8c14 |
+trigger should it run off (there is no generic interface).
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
+Defaults to flush,reboot.
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
=back
|
|
|
6f8c14 |
|
|
|
6f8c14 |
=head2 allocate
|
|
|
6f8c14 |
@@ -552,6 +569,8 @@ options to pass to the daemon:
|
|
|
6f8c14 |
|
|
|
6f8c14 |
C<sbd> will fail to start if no C<SBD_DEVICE> is specified. See the
|
|
|
6f8c14 |
installed template for more options that can be configured here.
|
|
|
6f8c14 |
+In general configuration done via parameters takes precedence over
|
|
|
6f8c14 |
+the configuration from the configuration file.
|
|
|
6f8c14 |
|
|
|
6f8c14 |
=head2 Testing the sbd installation
|
|
|
6f8c14 |
|
|
|
6f8c14 |
diff --git a/src/sbd-common.c b/src/sbd-common.c
|
|
|
6f8c14 |
index cc84cd0..0e8be65 100644
|
|
|
6f8c14 |
--- a/src/sbd-common.c
|
|
|
6f8c14 |
+++ b/src/sbd-common.c
|
|
|
6f8c14 |
@@ -98,6 +98,8 @@ usage(void)
|
|
|
6f8c14 |
" (default is 1, set to 0 to disable)\n"
|
|
|
6f8c14 |
"-P Check Pacemaker quorum and node health (optional, watch only)\n"
|
|
|
6f8c14 |
"-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n"
|
|
|
6f8c14 |
+"-r Set timeout-action to comma-separated combination of\n"
|
|
|
6f8c14 |
+" noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n"
|
|
|
6f8c14 |
"Commands:\n"
|
|
|
6f8c14 |
#if SUPPORT_SHARED_DISK
|
|
|
6f8c14 |
"create initialize N slots on <dev> - OVERWRITES DEVICE!\n"
|
|
|
6f8c14 |
@@ -769,7 +771,7 @@ sysrq_trigger(char t)
|
|
|
6f8c14 |
|
|
|
6f8c14 |
|
|
|
6f8c14 |
static void
|
|
|
6f8c14 |
-do_exit(char kind)
|
|
|
6f8c14 |
+do_exit(char kind, bool do_flush)
|
|
|
6f8c14 |
{
|
|
|
6f8c14 |
/* TODO: Turn debug_mode into a bit field? Delay + kdump for example */
|
|
|
6f8c14 |
const char *reason = NULL;
|
|
|
6f8c14 |
@@ -814,7 +816,9 @@ do_exit(char kind)
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
|
|
|
6f8c14 |
cl_log(LOG_EMERG, "Rebooting system: %s", reason);
|
|
|
6f8c14 |
- sync();
|
|
|
6f8c14 |
+ if (do_flush) {
|
|
|
6f8c14 |
+ sync();
|
|
|
6f8c14 |
+ }
|
|
|
6f8c14 |
|
|
|
6f8c14 |
if(kind == 'c') {
|
|
|
6f8c14 |
watchdog_close(true);
|
|
|
6f8c14 |
@@ -834,19 +838,25 @@ do_exit(char kind)
|
|
|
6f8c14 |
void
|
|
|
6f8c14 |
do_crashdump(void)
|
|
|
6f8c14 |
{
|
|
|
6f8c14 |
- do_exit('c');
|
|
|
6f8c14 |
+ do_exit('c', true);
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
|
|
|
6f8c14 |
void
|
|
|
6f8c14 |
do_reset(void)
|
|
|
6f8c14 |
{
|
|
|
6f8c14 |
- do_exit('b');
|
|
|
6f8c14 |
+ do_exit('b', true);
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
|
|
|
6f8c14 |
void
|
|
|
6f8c14 |
do_off(void)
|
|
|
6f8c14 |
{
|
|
|
6f8c14 |
- do_exit('o');
|
|
|
6f8c14 |
+ do_exit('o', true);
|
|
|
6f8c14 |
+}
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
+void
|
|
|
6f8c14 |
+do_timeout_action(void)
|
|
|
6f8c14 |
+{
|
|
|
6f8c14 |
+ do_exit(timeout_sysrq_char, do_flush);
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
|
|
|
6f8c14 |
/*
|
|
|
6f8c14 |
@@ -980,7 +990,7 @@ notify_parent(void)
|
|
|
6f8c14 |
/* Our parent died unexpectedly. Triggering
|
|
|
6f8c14 |
* self-fence. */
|
|
|
6f8c14 |
cl_log(LOG_WARNING, "Our parent is dead.");
|
|
|
6f8c14 |
- do_reset();
|
|
|
6f8c14 |
+ do_timeout_action();
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
|
|
|
6f8c14 |
switch (servant_health) {
|
|
|
6f8c14 |
diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c
|
|
|
6f8c14 |
index 9b193d4..8e0bc87 100644
|
|
|
6f8c14 |
--- a/src/sbd-inquisitor.c
|
|
|
6f8c14 |
+++ b/src/sbd-inquisitor.c
|
|
|
6f8c14 |
@@ -31,6 +31,8 @@ int servant_restart_interval = 5;
|
|
|
6f8c14 |
int servant_restart_count = 1;
|
|
|
6f8c14 |
int start_mode = 0;
|
|
|
6f8c14 |
char* pidfile = NULL;
|
|
|
6f8c14 |
+bool do_flush = true;
|
|
|
6f8c14 |
+char timeout_sysrq_char = 'b';
|
|
|
6f8c14 |
|
|
|
6f8c14 |
int parse_device_line(const char *line);
|
|
|
6f8c14 |
|
|
|
6f8c14 |
@@ -655,7 +657,7 @@ void inquisitor_child(void)
|
|
|
6f8c14 |
/* At level 2 or above, we do nothing, but expect
|
|
|
6f8c14 |
* things to eventually return to
|
|
|
6f8c14 |
* normal. */
|
|
|
6f8c14 |
- do_reset();
|
|
|
6f8c14 |
+ do_timeout_action();
|
|
|
6f8c14 |
} else {
|
|
|
6f8c14 |
cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!");
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
@@ -668,7 +670,7 @@ void inquisitor_child(void)
|
|
|
6f8c14 |
|
|
|
6f8c14 |
if (debug_mode && watchdog_use) {
|
|
|
6f8c14 |
/* In debug mode, trigger a reset before the watchdog can panic the machine */
|
|
|
6f8c14 |
- do_reset();
|
|
|
6f8c14 |
+ do_timeout_action();
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
|
|
|
6f8c14 |
@@ -833,6 +835,7 @@ int main(int argc, char **argv, char **envp)
|
|
|
6f8c14 |
int qb_facility;
|
|
|
6f8c14 |
const char *value = NULL;
|
|
|
6f8c14 |
int start_delay = 0;
|
|
|
6f8c14 |
+ char *timeout_action = NULL;
|
|
|
6f8c14 |
|
|
|
6f8c14 |
if ((cmdname = strrchr(argv[0], '/')) == NULL) {
|
|
|
6f8c14 |
cmdname = argv[0];
|
|
|
6f8c14 |
@@ -928,7 +931,12 @@ int main(int argc, char **argv, char **envp)
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
cl_log(LOG_DEBUG, "Start delay: %d (%s)", (int)start_delay, value?value:"default");
|
|
|
6f8c14 |
|
|
|
6f8c14 |
- while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:")) != -1) {
|
|
|
6f8c14 |
+ value = getenv("SBD_TIMEOUT_ACTION");
|
|
|
6f8c14 |
+ if(value) {
|
|
|
6f8c14 |
+ timeout_action = strdup(value);
|
|
|
6f8c14 |
+ }
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
+ while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) {
|
|
|
6f8c14 |
switch (c) {
|
|
|
6f8c14 |
case 'D':
|
|
|
6f8c14 |
break;
|
|
|
6f8c14 |
@@ -1043,6 +1051,12 @@ int main(int argc, char **argv, char **envp)
|
|
|
6f8c14 |
cl_log(LOG_INFO, "Servant restart count set to %d",
|
|
|
6f8c14 |
(int)servant_restart_count);
|
|
|
6f8c14 |
break;
|
|
|
6f8c14 |
+ case 'r':
|
|
|
6f8c14 |
+ if (timeout_action) {
|
|
|
6f8c14 |
+ free(timeout_action);
|
|
|
6f8c14 |
+ }
|
|
|
6f8c14 |
+ timeout_action = strdup(optarg);
|
|
|
6f8c14 |
+ break;
|
|
|
6f8c14 |
case 'h':
|
|
|
6f8c14 |
usage();
|
|
|
6f8c14 |
return (0);
|
|
|
6f8c14 |
@@ -1101,6 +1115,39 @@ int main(int argc, char **argv, char **envp)
|
|
|
6f8c14 |
goto out;
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
|
|
|
6f8c14 |
+ if (timeout_action) {
|
|
|
6f8c14 |
+ char *p[2];
|
|
|
6f8c14 |
+ int i;
|
|
|
6f8c14 |
+ char c;
|
|
|
6f8c14 |
+ int nrflags = sscanf(timeout_action, "%m[a-z],%m[a-z]%c", &p[0], &p[1], &c);
|
|
|
6f8c14 |
+ bool parse_error = (nrflags < 1) || (nrflags > 2);
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
+ for (i = 0; (i < nrflags) && (i < 2); i++) {
|
|
|
6f8c14 |
+ if (!strcmp(p[i], "reboot")) {
|
|
|
6f8c14 |
+ timeout_sysrq_char = 'b';
|
|
|
6f8c14 |
+ } else if (!strcmp(p[i], "crashdump")) {
|
|
|
6f8c14 |
+ timeout_sysrq_char = 'c';
|
|
|
6f8c14 |
+ } else if (!strcmp(p[i], "off")) {
|
|
|
6f8c14 |
+ timeout_sysrq_char = 'o';
|
|
|
6f8c14 |
+ } else if (!strcmp(p[i], "flush")) {
|
|
|
6f8c14 |
+ do_flush = true;
|
|
|
6f8c14 |
+ } else if (!strcmp(p[i], "noflush")) {
|
|
|
6f8c14 |
+ do_flush = false;
|
|
|
6f8c14 |
+ } else {
|
|
|
6f8c14 |
+ parse_error = true;
|
|
|
6f8c14 |
+ }
|
|
|
6f8c14 |
+ free(p[i]);
|
|
|
6f8c14 |
+ }
|
|
|
6f8c14 |
+ if (parse_error) {
|
|
|
6f8c14 |
+ fprintf(stderr, "Failed to parse timeout-action \"%s\".\n",
|
|
|
6f8c14 |
+ timeout_action);
|
|
|
6f8c14 |
+ exit_status = -1;
|
|
|
6f8c14 |
+ goto out;
|
|
|
6f8c14 |
+ }
|
|
|
6f8c14 |
+ }
|
|
|
6f8c14 |
+ cl_log(LOG_NOTICE, "%s flush + writing \'%c\' to sysrq on timeout",
|
|
|
6f8c14 |
+ do_flush?"Doing":"Skipping", timeout_sysrq_char);
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
#if SUPPORT_SHARED_DISK
|
|
|
6f8c14 |
if (strcmp(argv[optind], "create") == 0) {
|
|
|
6f8c14 |
exit_status = init_devices(servants_leader);
|
|
|
6f8c14 |
diff --git a/src/sbd-md.c b/src/sbd-md.c
|
|
|
6f8c14 |
index a736118..579d273 100644
|
|
|
6f8c14 |
--- a/src/sbd-md.c
|
|
|
6f8c14 |
+++ b/src/sbd-md.c
|
|
|
6f8c14 |
@@ -1149,7 +1149,7 @@ int servant(const char *diskname, int mode, const void* argp)
|
|
|
6f8c14 |
if (ppid == 1) {
|
|
|
6f8c14 |
/* Our parent died unexpectedly. Triggering
|
|
|
6f8c14 |
* self-fence. */
|
|
|
6f8c14 |
- do_reset();
|
|
|
6f8c14 |
+ do_timeout_action();
|
|
|
6f8c14 |
}
|
|
|
6f8c14 |
|
|
|
6f8c14 |
/* These attempts are, by definition, somewhat racy. If
|
|
|
6f8c14 |
diff --git a/src/sbd.h b/src/sbd.h
|
|
|
6f8c14 |
index 0f8847a..386c85c 100644
|
|
|
6f8c14 |
--- a/src/sbd.h
|
|
|
6f8c14 |
+++ b/src/sbd.h
|
|
|
6f8c14 |
@@ -130,6 +130,7 @@ void sysrq_trigger(char t);
|
|
|
6f8c14 |
void do_crashdump(void);
|
|
|
6f8c14 |
void do_reset(void);
|
|
|
6f8c14 |
void do_off(void);
|
|
|
6f8c14 |
+void do_timeout_action(void);
|
|
|
6f8c14 |
pid_t make_daemon(void);
|
|
|
6f8c14 |
void maximize_priority(void);
|
|
|
6f8c14 |
void sbd_get_uname(void);
|
|
|
6f8c14 |
@@ -153,6 +154,8 @@ extern int debug_mode;
|
|
|
6f8c14 |
extern char *watchdogdev;
|
|
|
6f8c14 |
extern bool watchdogdev_is_default;
|
|
|
6f8c14 |
extern char* local_uname;
|
|
|
6f8c14 |
+extern bool do_flush;
|
|
|
6f8c14 |
+extern char timeout_sysrq_char;
|
|
|
6f8c14 |
|
|
|
6f8c14 |
/* Global, non-tunable variables: */
|
|
|
6f8c14 |
extern int sector_size;
|
|
|
6f8c14 |
diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig
|
|
|
6f8c14 |
index c6d7c07..8f38426 100644
|
|
|
6f8c14 |
--- a/src/sbd.sysconfig
|
|
|
6f8c14 |
+++ b/src/sbd.sysconfig
|
|
|
6f8c14 |
@@ -71,6 +71,24 @@ SBD_WATCHDOG_DEV=/dev/watchdog
|
|
|
6f8c14 |
SBD_WATCHDOG_TIMEOUT=5
|
|
|
6f8c14 |
|
|
|
6f8c14 |
## Type: string
|
|
|
6f8c14 |
+## Default: "flush,reboot"
|
|
|
6f8c14 |
+#
|
|
|
6f8c14 |
+# Actions to be executed when the watchers don't timely report to the sbd
|
|
|
6f8c14 |
+# master process or one of the watchers detects that the master process
|
|
|
6f8c14 |
+# has died.
|
|
|
6f8c14 |
+#
|
|
|
6f8c14 |
+# Set timeout-action to comma-separated combination of
|
|
|
6f8c14 |
+# noflush|flush plus reboot|crashdump|off.
|
|
|
6f8c14 |
+# If just one of both is given the other stays at the default.
|
|
|
6f8c14 |
+#
|
|
|
6f8c14 |
+# This doesn't affect actions like off, crashdump, reboot explicitly
|
|
|
6f8c14 |
+# triggered via message slots.
|
|
|
6f8c14 |
+# And it does as well not configure the action a watchdog would
|
|
|
6f8c14 |
+# trigger should it run off (there is no generic interface).
|
|
|
6f8c14 |
+#
|
|
|
6f8c14 |
+SBD_TIMEOUT_ACTION=flush,reboot
|
|
|
6f8c14 |
+
|
|
|
6f8c14 |
+## Type: string
|
|
|
6f8c14 |
## Default: ""
|
|
|
6f8c14 |
#
|
|
|
6f8c14 |
# Additional options for starting sbd
|
|
|
6f8c14 |
--
|
|
|
6f8c14 |
1.8.3.1
|
|
|
6f8c14 |
|