|
 |
1f016a |
commit e073613f0727a3646732d0d9bb4f2050017476b3
|
|
 |
1f016a |
Author: Andrew Beekhof <andrew@beekhof.net>
|
|
 |
1f016a |
Date: Tue Oct 28 10:32:02 2014 +1100
|
|
 |
1f016a |
|
|
 |
1f016a |
Fix: watchdog: Allow startup without sbd
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/crmd/control.c b/crmd/control.c
|
|
 |
1f016a |
index 0332f10..8cc1cfa 100644
|
|
 |
1f016a |
--- a/crmd/control.c
|
|
 |
1f016a |
+++ b/crmd/control.c
|
|
 |
1f016a |
@@ -874,7 +874,7 @@ pe_cluster_option crmd_opts[] = {
|
|
 |
1f016a |
{ "crmd-integration-timeout", NULL, "time", NULL, "3min", &check_timer, "*** Advanced Use Only ***.", "If need to adjust this value, it probably indicates the presence of a bug." },
|
|
 |
1f016a |
{ "crmd-finalization-timeout", NULL, "time", NULL, "30min", &check_timer, "*** Advanced Use Only ***.", "If you need to adjust this value, it probably indicates the presence of a bug." },
|
|
 |
1f016a |
{ "crmd-transition-delay", NULL, "time", NULL, "0s", &check_timer, "*** Advanced Use Only ***\nEnabling this option will slow down cluster recovery under all conditions", "Delay cluster recovery for the configured interval to allow for additional/related events to occur.\nUseful if your configuration is sensitive to the order in which ping updates arrive." },
|
|
 |
1f016a |
- { "stonith-watchdog-timeout", NULL, "time", NULL, "0s", &check_timer,
|
|
 |
1f016a |
+ { "stonith-watchdog-timeout", NULL, "time", NULL, NULL, &check_timer,
|
|
 |
1f016a |
"How long to wait before we can assume nodes are safely down", NULL },
|
|
 |
1f016a |
{ "no-quorum-policy", "no_quorum_policy", "enum", "stop, freeze, ignore, suicide", "stop", &check_quorum, NULL, NULL },
|
|
 |
1f016a |
|
|
 |
1f016a |
@@ -911,6 +911,8 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
|
|
 |
1f016a |
const char *value = NULL;
|
|
 |
1f016a |
GHashTable *config_hash = NULL;
|
|
 |
1f016a |
crm_time_t *now = crm_time_new(NULL);
|
|
 |
1f016a |
+ long st_timeout = 0;
|
|
 |
1f016a |
+ long sbd_timeout = 0;
|
|
 |
1f016a |
|
|
 |
1f016a |
if (rc != pcmk_ok) {
|
|
 |
1f016a |
fsa_data_t *msg_data = NULL;
|
|
 |
1f016a |
@@ -946,17 +948,36 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
value = getenv("SBD_WATCHDOG_TIMEOUT");
|
|
 |
1f016a |
+ sbd_timeout = crm_get_msec(value);
|
|
 |
1f016a |
|
|
 |
1f016a |
- if(value == NULL) {
|
|
 |
1f016a |
- value = crmd_pref(config_hash, "stonith-watchdog-timeout");
|
|
 |
1f016a |
- }
|
|
 |
1f016a |
+ value = crmd_pref(config_hash, "stonith-watchdog-timeout");
|
|
 |
1f016a |
+ st_timeout = crm_get_msec(value);
|
|
 |
1f016a |
|
|
 |
1f016a |
- if(crm_get_msec(value) > 0 && !daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
+ if(st_timeout > 0 && !daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
do_crm_log_always(LOG_EMERG, "Shutting down pacemaker, no watchdog device configured");
|
|
 |
1f016a |
crmd_exit(DAEMON_RESPAWN_STOP);
|
|
 |
1f016a |
|
|
 |
1f016a |
- } else if(crm_get_msec(value) <= 0 && daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
- crm_warn("Watchdog enabled but no stonith-watchdog-timeout configured");
|
|
 |
1f016a |
+ } else if(!daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
+ crm_trace("Watchdog disabled");
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ } else if(value == NULL && sbd_timeout > 0) {
|
|
 |
1f016a |
+ char *timeout = NULL;
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ st_timeout = 2 * sbd_timeout / 1000;
|
|
 |
1f016a |
+ timeout = g_strdup_printf("%lds", st_timeout);
|
|
 |
1f016a |
+ crm_notice("Setting stonith-watchdog-timeout=%s", timeout);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ update_attr_delegate(fsa_cib_conn, cib_none, XML_CIB_TAG_CRMCONFIG, NULL, NULL, NULL, NULL,
|
|
 |
1f016a |
+ "stonith-watchdog-timeout", timeout, FALSE, NULL, NULL);
|
|
 |
1f016a |
+ free(timeout);
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ } else if(st_timeout <= 0) {
|
|
 |
1f016a |
+ crm_notice("Watchdog enabled but stonith-watchdog-timeout is disabled");
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ } else if(st_timeout < sbd_timeout) {
|
|
 |
1f016a |
+ do_crm_log_always(LOG_EMERG, "Shutting down pacemaker, stonith-watchdog-timeout (%ldms) is too short (must be greater than %ldms)",
|
|
 |
1f016a |
+ st_timeout, sbd_timeout);
|
|
 |
1f016a |
+ crmd_exit(DAEMON_RESPAWN_STOP);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
value = crmd_pref(config_hash, "no-quorum-policy");
|
|
 |
1f016a |
diff --git a/fencing/main.c b/fencing/main.c
|
|
 |
1f016a |
index fe6560d..2694452 100644
|
|
 |
1f016a |
--- a/fencing/main.c
|
|
 |
1f016a |
+++ b/fencing/main.c
|
|
 |
1f016a |
@@ -1003,7 +1003,8 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
if(daemon_option_enabled(crm_system_name, "watchdog")) {
|
|
 |
1f016a |
- const char *value = getenv("SBD_WATCHDOG_TIMEOUT");
|
|
 |
1f016a |
+ const char *value = NULL;
|
|
 |
1f016a |
+ long timeout_ms = 0;
|
|
 |
1f016a |
|
|
 |
1f016a |
if(value == NULL) {
|
|
 |
1f016a |
stonith_watchdog_xml = get_xpath_object("//nvpair[@name='stonith-watchdog-timeout']", local_cib, LOG_TRACE);
|
|
 |
1f016a |
@@ -1013,10 +1014,12 @@ update_cib_cache_cb(const char *event, xmlNode * msg)
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
if(value) {
|
|
 |
1f016a |
- stonith_watchdog_timeout_ms = crm_get_msec(value);
|
|
 |
1f016a |
+ timeout_ms = crm_get_msec(value);
|
|
 |
1f016a |
+ }
|
|
 |
1f016a |
|
|
 |
1f016a |
- } else {
|
|
 |
1f016a |
- stonith_watchdog_timeout_ms = 0;
|
|
 |
1f016a |
+ if(timeout_ms != stonith_watchdog_timeout_ms) {
|
|
 |
1f016a |
+ crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000);
|
|
 |
1f016a |
+ stonith_watchdog_timeout_ms = timeout_ms;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/lib/common/utils.c b/lib/common/utils.c
|
|
 |
1f016a |
index 6b8b12c..eacd8e9 100644
|
|
 |
1f016a |
--- a/lib/common/utils.c
|
|
 |
1f016a |
+++ b/lib/common/utils.c
|
|
 |
1f016a |
@@ -286,6 +286,9 @@ cluster_option(GHashTable * options, gboolean(*validate) (const char *),
|
|
 |
1f016a |
|
|
 |
1f016a |
if (options == NULL) {
|
|
 |
1f016a |
return def_value;
|
|
 |
1f016a |
+
|
|
 |
1f016a |
+ } else if(def_value == NULL) {
|
|
 |
1f016a |
+ return def_value;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
g_hash_table_insert(options, strdup(name), strdup(def_value));
|
|
 |
1f016a |
@@ -319,7 +322,6 @@ get_cluster_pref(GHashTable * options, pe_cluster_option * option_list, int len,
|
|
 |
1f016a |
}
|
|
 |
1f016a |
}
|
|
 |
1f016a |
CRM_CHECK(found, crm_err("No option named: %s", name));
|
|
 |
1f016a |
- CRM_ASSERT(value != NULL);
|
|
 |
1f016a |
return value;
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
diff --git a/mcp/pacemaker.c b/mcp/pacemaker.c
|
|
 |
1f016a |
index c7852c3..fa2c707 100644
|
|
 |
1f016a |
--- a/mcp/pacemaker.c
|
|
 |
1f016a |
+++ b/mcp/pacemaker.c
|
|
 |
1f016a |
@@ -1040,6 +1040,8 @@ main(int argc, char **argv)
|
|
 |
1f016a |
|
|
 |
1f016a |
if(pcmk_locate_sbd() > 0) {
|
|
 |
1f016a |
setenv("PCMK_watchdog", "true", 1);
|
|
 |
1f016a |
+ } else {
|
|
 |
1f016a |
+ setenv("PCMK_watchdog", "false", 1);
|
|
 |
1f016a |
}
|
|
 |
1f016a |
|
|
 |
1f016a |
find_and_track_existing_processes();
|