From f2103011420da4248bd8067d620ba23fd2c2cea5 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Jul 28 2020 19:26:41 +0000 Subject: import sbd-1.4.1-6.el8 --- diff --git a/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch b/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch new file mode 100644 index 0000000..2a9b144 --- /dev/null +++ b/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch @@ -0,0 +1,399 @@ +From 4c3e4049b08799094a64dac289a48deef4d3d916 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 24 Jul 2020 14:31:01 +0200 +Subject: [PATCH] Fix: sbd-cluster: match qdevice-sync_timeout against + wd-timeout + +--- + configure.ac | 13 +++ + src/sbd-cluster.c | 252 +++++++++++++++++++++++++++++++++++++++++++++--------- + 2 files changed, 223 insertions(+), 42 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 3391c5f..23547cf 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -109,6 +109,12 @@ AC_TEST_NO_QUORUM_POLICY(no_quorum_demote) + dnl check for new pe-API + AC_CHECK_FUNCS(pe_new_working_set) + ++dnl check if votequorum comes with default for qdevice-sync_timeout ++AC_CHECK_DECLS([VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT], ++ HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT=1, ++ HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT=0, ++ [#include ]) ++ + if test "$missing" = "yes"; then + AC_MSG_ERROR([Missing required libraries or functions.]) + fi +@@ -140,6 +146,13 @@ AM_CONDITIONAL(CHECK_TWO_NODE, test "$HAVE_cmap" = "1") + AC_DEFINE_UNQUOTED(CHECK_VOTEQUORUM_HANDLE, $HAVE_votequorum, Turn on periodic checking of votequorum-handle) + AM_CONDITIONAL(CHECK_VOTEQUORUM_HANDLE, test "$HAVE_votequorum" = "1") + ++AC_DEFINE_UNQUOTED(CHECK_QDEVICE_SYNC_TIMEOUT, ++ ($HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT && $HAVE_cmap), ++ Turn on checking if watchdog-timeout and qdevice-sync_timeout are matching) ++AM_CONDITIONAL(CHECK_QDEVICE_SYNC_TIMEOUT, ++ test "$HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT" = "1" && ++ test "$HAVE_cmap" = "1") ++ + CONFIGDIR="" + AC_ARG_WITH(configdir, + [ --with-configdir=DIR +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index 13fa580..b6c5512 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -33,7 +33,7 @@ + #include + #include + +-#if CHECK_TWO_NODE ++#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT + #include + #endif + +@@ -86,11 +86,20 @@ sbd_plugin_membership_dispatch(cpg_handle_t handle, + static votequorum_handle_t votequorum_handle = 0; + #endif + ++#if CHECK_TWO_NODE + static bool two_node = false; ++#endif + static bool ever_seen_both = false; + static int cpg_membership_entries = -1; + +-#if CHECK_TWO_NODE ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++#include ++static bool using_qdevice = false; ++static uint32_t qdevice_sync_timeout = /* in seconds */ ++ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; ++#endif ++ ++#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT + #include + + static cmap_handle_t cmap_handle = 0; +@@ -102,28 +111,59 @@ void + sbd_cpg_membership_health_update() + { + if(cpg_membership_entries > 0) { +- bool quorum_is_suspect = ++#if CHECK_TWO_NODE ++ bool quorum_is_suspect_two_node = + (two_node && ever_seen_both && cpg_membership_entries == 1); ++#endif ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ bool quorum_is_suspect_qdevice_timing = ++ using_qdevice && (qdevice_sync_timeout > timeout_watchdog); ++#endif + +- if (!quorum_is_suspect) { ++ do { ++#if CHECK_TWO_NODE ++ if (quorum_is_suspect_two_node) { ++ /* Alternative would be asking votequorum for number of votes. ++ * Using pacemaker's cpg as source for number of active nodes ++ * avoids binding to an additional library, is definitely ++ * less code to write and we wouldn't have to combine data ++ * from 3 sources (cmap, cpg & votequorum) in a potentially ++ * racy environment. ++ */ ++ set_servant_health(pcmk_health_noquorum, LOG_WARNING, ++ "Connected to %s but requires both nodes present", ++ name_for_cluster_type(get_cluster_type()) ++ ); ++ break; ++ } ++#endif ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (quorum_is_suspect_qdevice_timing) { ++ /* We can't really trust quorum info as qdevice-sync_timeout ++ * makes reaction of quorum too sluggish for our ++ * watchdog-timeout. ++ */ ++ set_servant_health(pcmk_health_noquorum, LOG_WARNING, ++ "Connected to %s but quorum using qdevice is distrusted " ++ "for SBD as qdevice-sync_timeout (%ds) > watchdog-timeout " ++ "(%lus).", ++ name_for_cluster_type(get_cluster_type()), ++ qdevice_sync_timeout, timeout_watchdog ++ ); ++ break; ++ } ++#endif + set_servant_health(pcmk_health_online, LOG_INFO, +- "Connected to %s (%u members)", +- name_for_cluster_type(get_cluster_type()), +- cpg_membership_entries +- ); +- } else { +- /* Alternative would be asking votequorum for number of votes. +- * Using pacemaker's cpg as source for number of active nodes +- * avoids binding to an additional library, is definitely +- * less code to write and we wouldn't have to combine data +- * from 3 sources (cmap, cpq & votequorum) in a potentially +- * racy environment. +- */ +- set_servant_health(pcmk_health_noquorum, LOG_WARNING, +- "Connected to %s but requires both nodes present", +- name_for_cluster_type(get_cluster_type()) +- ); +- } ++ "Connected to %s (%u members)%s", ++ name_for_cluster_type(get_cluster_type()), ++ cpg_membership_entries, ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ using_qdevice?" using qdevice for quorum":"" ++#else ++ "" ++#endif ++ ); ++ } while (false); + + if (cpg_membership_entries > 1) { + ever_seen_both = true; +@@ -146,7 +186,7 @@ sbd_cpg_membership_dispatch(cpg_handle_t handle, + notify_parent(); + } + +-#if CHECK_TWO_NODE ++#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT + static void sbd_cmap_notify_fn( + cmap_handle_t cmap_handle, + cmap_track_handle_t cmap_track_handle, +@@ -156,21 +196,99 @@ static void sbd_cmap_notify_fn( + struct cmap_notify_value old_val, + void *user_data) + { +- if (new_val.type == CMAP_VALUETYPE_UINT8) { +- switch (event) { +- case CMAP_TRACK_ADD: +- case CMAP_TRACK_MODIFY: +- two_node = *((uint8_t *) new_val.data); +- break; +- case CMAP_TRACK_DELETE: +- two_node = false; +- break; +- default: +- return; +- } +- sbd_cpg_membership_health_update(); +- notify_parent(); ++ switch (event) { ++ case CMAP_TRACK_ADD: ++ case CMAP_TRACK_MODIFY: ++ switch (new_val.type) { ++ case CMAP_VALUETYPE_UINT8: ++#if CHECK_TWO_NODE ++ if (!strcmp(key_name, "quorum.two_node")) { ++ two_node = *((uint8_t *) new_val.data); ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ case CMAP_VALUETYPE_STRING: ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (!strcmp(key_name, "quorum.device.model")) { ++ using_qdevice = ++ ((new_val.data) && strlen((char *) new_val.data)); ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ case CMAP_VALUETYPE_UINT32: ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (!strcmp(key_name, "quorum.device.sync_timeout")) { ++ if (new_val.data) { ++ qdevice_sync_timeout = ++ *((uint32_t *) new_val.data) / 1000; ++ } else { ++ qdevice_sync_timeout = ++ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; ++ } ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ default: ++ return; ++ } ++ break; ++ case CMAP_TRACK_DELETE: ++ switch (new_val.type) { ++ case CMAP_VALUETYPE_UINT8: ++#if CHECK_TWO_NODE ++ if (!strcmp(key_name, "quorum.two_node")) { ++ two_node = false; ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ case CMAP_VALUETYPE_STRING: ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (!strcmp(key_name, "quorum.device.model")) { ++ using_qdevice = false; ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ case CMAP_VALUETYPE_UINT32: ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (!strcmp(key_name, "quorum.device.sync_timeout")) { ++ qdevice_sync_timeout = ++ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; ++ } else { ++ return; ++ } ++ break; ++#else ++ return; ++#endif ++ default: ++ return; ++ } ++ break; ++ default: ++ return; + } ++ sbd_cpg_membership_health_update(); ++ notify_parent(); + } + + static gboolean +@@ -200,9 +318,14 @@ cmap_destroy(void) + } + + static gboolean +-sbd_get_two_node(void) ++verify_against_cmap_config(void) + { ++#if CHECK_TWO_NODE + uint8_t two_node_u8 = 0; ++#endif ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ char *qdevice_model = NULL; ++#endif + int cmap_fd; + + if (!track_handle) { +@@ -211,12 +334,31 @@ sbd_get_two_node(void) + goto out; + } + ++#if CHECK_TWO_NODE + if (cmap_track_add(cmap_handle, "quorum.two_node", + CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, + sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { + cl_log(LOG_WARNING, "Failed adding CMAP tracker for 2Node-mode\n"); + goto out; + } ++#endif ++ ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (cmap_track_add(cmap_handle, "quorum.device.model", ++ CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, ++ sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { ++ cl_log(LOG_WARNING, "Failed adding CMAP tracker for qdevice-model\n"); ++ goto out; ++ } ++ ++ if (cmap_track_add(cmap_handle, "quorum.device.sync_timeout", ++ CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, ++ sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { ++ cl_log(LOG_WARNING, ++ "Failed adding CMAP tracker for qdevice-sync_timeout\n"); ++ goto out; ++ } ++#endif + + /* add the tracker to mainloop */ + if (cmap_fd_get(cmap_handle, &cmap_fd) != CS_OK) { +@@ -232,13 +374,39 @@ sbd_get_two_node(void) + g_source_attach(cmap_source, NULL); + } + +- if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) == CS_OK) { ++#if CHECK_TWO_NODE ++ if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) ++ == CS_OK) { + cl_log(two_node_u8? LOG_NOTICE : LOG_INFO, + "Corosync is%s in 2Node-mode", two_node_u8?"":" not"); + two_node = two_node_u8; + } else { + cl_log(LOG_INFO, "quorum.two_node not present in cmap\n"); + } ++#endif ++ ++#if CHECK_QDEVICE_SYNC_TIMEOUT ++ if (cmap_get_string(cmap_handle, "quorum.device.model", ++ &qdevice_model) == CS_OK) { ++ using_qdevice = qdevice_model && strlen(qdevice_model); ++ cl_log(using_qdevice? LOG_NOTICE : LOG_INFO, ++ "Corosync is%s using qdevice", using_qdevice?"":" not"); ++ } else { ++ cl_log(LOG_INFO, "quorum.device.model not present in cmap\n"); ++ } ++ ++ if (cmap_get_uint32(cmap_handle, "quorum.device.sync_timeout", ++ &qdevice_sync_timeout) == CS_OK) { ++ qdevice_sync_timeout /= 1000; ++ cl_log(LOG_INFO, ++ "Corosync is using qdevice-sync_timeout=%ds", ++ qdevice_sync_timeout); ++ } else { ++ cl_log(LOG_INFO, ++ "quorum.device.sync_timeout not present in cmap\n"); ++ } ++#endif ++ + return TRUE; + + out: +@@ -331,15 +499,15 @@ sbd_membership_connect(void) + } else { + cl_log(LOG_INFO, "Attempting connection to %s", name_for_cluster_type(stack)); + +-#if SUPPORT_COROSYNC && CHECK_TWO_NODE +- if (sbd_get_two_node()) { ++#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) ++ if (verify_against_cmap_config()) { + #endif + + if(crm_cluster_connect(&cluster)) { + connected = true; + } + +-#if SUPPORT_COROSYNC && CHECK_TWO_NODE ++#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) + } + #endif + } +@@ -362,7 +530,7 @@ sbd_membership_destroy(gpointer user_data) + cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type())); + + if (get_cluster_type() != pcmk_cluster_unknown) { +-#if SUPPORT_COROSYNC && CHECK_TWO_NODE ++#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) + cmap_destroy(); + #endif + } +-- +1.8.3.1 + diff --git a/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch b/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch new file mode 100644 index 0000000..6d920ab --- /dev/null +++ b/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch @@ -0,0 +1,231 @@ +From 5b5ffac4cce861f3621267a73d2ad29f6d807335 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 10 Dec 2019 13:16:45 +0100 +Subject: [PATCH] Fix: sbd-pacemaker: sync with pacemakerd for robustness + +State query ping of pacemakerd prevents pacemakerd from +starting any sub-daemons (and thus services) if sbd can't +reach it via ipc. As a health-check get timestamp from +pacemakerd. On shudown fetch info about graceful +shutdown from pacemakerd. +Use new pacemakerd-api provided by pacemaker. +--- + configure.ac | 4 ++ + src/sbd-pacemaker.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 126 insertions(+), 10 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 23547cf..11d12f0 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -81,6 +81,7 @@ AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes") + AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes") + AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0) + AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0) ++AC_CHECK_LIB(crmcommon, pcmk_pacemakerd_api_ping, HAVE_pacemakerd_api=1, HAVE_pacemakerd_api=0) + + dnl pacemaker >= 1.1.8 + AC_CHECK_HEADERS(crm/cluster.h) +@@ -153,6 +154,9 @@ AM_CONDITIONAL(CHECK_QDEVICE_SYNC_TIMEOUT, + test "$HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT" = "1" && + test "$HAVE_cmap" = "1") + ++AC_DEFINE_UNQUOTED(USE_PACEMAKERD_API, $HAVE_pacemakerd_api, Turn on synchronization between sbd & pacemakerd) ++AM_CONDITIONAL(USE_PACEMAKERD_API, test "$HAVE_pacemakerd_api" = "1") ++ + CONFIGDIR="" + AC_ARG_WITH(configdir, + [ --with-configdir=DIR +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index 6e53557..1243bfc 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -83,6 +83,62 @@ pe_free_working_set(pe_working_set_t *data_set) + + #endif + ++static void clean_up(int rc); ++ ++#if USE_PACEMAKERD_API ++#include ++ ++static pcmk_ipc_api_t *pacemakerd_api = NULL; ++static time_t last_ok = (time_t) 0; ++ ++static void ++pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, ++ enum pcmk_ipc_event event_type, crm_exit_t status, ++ void *event_data, void *user_data) ++{ ++ pcmk_pacemakerd_api_reply_t *reply = event_data; ++ ++ switch (event_type) { ++ case pcmk_ipc_event_disconnect: ++ /* Unexpected */ ++ cl_log(LOG_ERR, "Lost connection to pacemakerd\n"); ++ return; ++ ++ case pcmk_ipc_event_reply: ++ break; ++ ++ default: ++ return; ++ } ++ ++ if (status != CRM_EX_OK) { ++ cl_log(LOG_ERR, "Bad reply from pacemakerd: %s", ++ crm_exit_str(status)); ++ return; ++ } ++ ++ if (reply->reply_type != pcmk_pacemakerd_reply_ping) { ++ cl_log(LOG_ERR, "Unknown reply type %d from pacemakerd\n", ++ reply->reply_type); ++ } else { ++ if ((reply->data.ping.last_good != (time_t) 0) && ++ (reply->data.ping.status == pcmk_rc_ok)) { ++ switch (reply->data.ping.state) { ++ case pcmk_pacemakerd_state_running: ++ case pcmk_pacemakerd_state_shutting_down: ++ last_ok = reply->data.ping.last_good; ++ break; ++ case pcmk_pacemakerd_state_shutdown_complete: ++ clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); ++ break; ++ default: ++ break; ++ } ++ } ++ } ++} ++#endif ++ + extern int disk_count; + + static void clean_up(int rc); +@@ -133,10 +189,13 @@ mon_cib_connection_destroy(gpointer user_data) + cib->cmds->signoff(cib); + /* retrigger as last one might have been skipped */ + mon_refresh_state(NULL); ++ ++#if !USE_PACEMAKERD_API + if (pcmk_clean_shutdown) { + /* assume a graceful pacemaker-shutdown */ + clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); + } ++#endif + /* getting here we aren't sure about the pacemaker-state + so try to use the timeout to reconnect and get + everything sorted out again +@@ -196,6 +255,13 @@ mon_timer_notify(gpointer data) + g_source_remove(timer_id_notify); + } + ++#if USE_PACEMAKERD_API ++ { ++ time_t now = time(NULL); ++ ++ if ((last_ok <= now) && (now - last_ok < timeout_watchdog)) { ++#endif ++ + if (cib_connected) { + if (counter == counter_max) { + mon_retrieve_current_cib(); +@@ -207,6 +273,16 @@ mon_timer_notify(gpointer data) + counter++; + } + } ++ ++#if USE_PACEMAKERD_API ++ } ++ } ++ if (pcmk_connect_ipc(pacemakerd_api, ++ pcmk_ipc_dispatch_main) == pcmk_rc_ok) { ++ pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); ++ } ++#endif ++ + timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL); + return FALSE; + } +@@ -526,6 +602,14 @@ clean_up(int rc) + cib = NULL; + } + ++#if USE_PACEMAKERD_API ++ if (pacemakerd_api != NULL) { ++ pcmk_ipc_api_t *capi = pacemakerd_api; ++ pacemakerd_api = NULL; // Ensure we can't free this twice ++ pcmk_free_ipc_api(capi); ++ } ++#endif ++ + if (rc >= 0) { + exit(rc); + } +@@ -535,11 +619,11 @@ clean_up(int rc) + int + servant_pcmk(const char *diskname, int mode, const void* argp) + { +- int exit_code = 0; ++ int exit_code = 0; + +- crm_system_name = strdup("sbd:pcmk"); +- cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); +- set_proc_title("sbd: watcher: Pacemaker"); ++ crm_system_name = strdup("sbd:pcmk"); ++ cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); ++ set_proc_title("sbd: watcher: Pacemaker"); + setenv("PCMK_watchdog", "true", 1); + + if(debug == 0) { +@@ -548,12 +632,40 @@ servant_pcmk(const char *diskname, int mode, const void* argp) + } + + +- if (data_set == NULL) { +- data_set = pe_new_working_set(); +- } +- if (data_set == NULL) { +- return -1; +- } ++ if (data_set == NULL) { ++ data_set = pe_new_working_set(); ++ } ++ if (data_set == NULL) { ++ return -1; ++ } ++ ++#if USE_PACEMAKERD_API ++ { ++ int rc; ++ ++ rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); ++ if (pacemakerd_api == NULL) { ++ cl_log(LOG_ERR, "Could not connect to pacemakerd: %s\n", ++ pcmk_rc_str(rc)); ++ return -1; ++ } ++ pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL); ++ do { ++ rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main); ++ if (rc != pcmk_rc_ok) { ++ cl_log(LOG_DEBUG, "Could not connect to pacemakerd: %s\n", ++ pcmk_rc_str(rc)); ++ sleep(reconnect_msec / 1000); ++ } ++ } while (rc != pcmk_rc_ok); ++ /* send a ping to pacemakerd to wake it up */ ++ pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); ++ /* cib should come up now as well so it's time ++ * to have the inquisitor have a closer look ++ */ ++ notify_parent(); ++ } ++#endif + + if (current_cib == NULL) { + cib = cib_new(); +-- +1.8.3.1 + diff --git a/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch b/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch new file mode 100644 index 0000000..0c38862 --- /dev/null +++ b/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch @@ -0,0 +1,110 @@ +From f4d38a073ce3bfa2078792f1cc85229457430292 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 21 Jul 2020 18:30:30 +0200 +Subject: [PATCH] Fix: make syncing of pacemaker resource startup configurable + +--- + src/sbd-inquisitor.c | 20 ++++++++++++++++++++ + src/sbd-pacemaker.c | 6 +++--- + src/sbd.h | 1 + + src/sbd.sysconfig | 14 ++++++++++++++ + 4 files changed, 38 insertions(+), 3 deletions(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 52ede8a..962725e 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -35,6 +35,7 @@ bool do_flush = true; + char timeout_sysrq_char = 'b'; + bool move_to_root_cgroup = true; + bool enforce_moving_to_root_cgroup = false; ++bool sync_resource_startup = false; + + int parse_device_line(const char *line); + +@@ -964,6 +965,25 @@ int main(int argc, char **argv, char **envp) + } + } + ++ value = getenv("SBD_SYNC_RESOURCE_STARTUP"); ++ if(value) { ++ sync_resource_startup = crm_is_true(value); ++ } ++#if !USE_PACEMAKERD_API ++ if (sync_resource_startup) { ++ fprintf(stderr, "Failed to sync resource-startup as " ++ "SBD was built against pacemaker not supporting pacemakerd-API.\n"); ++ exit_status = -1; ++ goto out; ++ } ++#else ++ if (!sync_resource_startup) { ++ cl_log(LOG_WARNING, "SBD built against pacemaker supporting " ++ "pacemakerd-API. Should think about enabling " ++ "SBD_SYNC_RESOURCE_STARTUP."); ++ } ++#endif ++ + while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { + switch (c) { + case 'D': +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index 1243bfc..aa1fb57 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -190,12 +190,12 @@ mon_cib_connection_destroy(gpointer user_data) + /* retrigger as last one might have been skipped */ + mon_refresh_state(NULL); + +-#if !USE_PACEMAKERD_API +- if (pcmk_clean_shutdown) { ++ ++ if ((pcmk_clean_shutdown) && (!sync_resource_startup)) { + /* assume a graceful pacemaker-shutdown */ + clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); + } +-#endif ++ + /* getting here we aren't sure about the pacemaker-state + so try to use the timeout to reconnect and get + everything sorted out again +diff --git a/src/sbd.h b/src/sbd.h +index 382e553..3b6647c 100644 +--- a/src/sbd.h ++++ b/src/sbd.h +@@ -161,6 +161,7 @@ extern bool do_flush; + extern char timeout_sysrq_char; + extern bool move_to_root_cgroup; + extern bool enforce_moving_to_root_cgroup; ++extern bool sync_resource_startup; + + /* Global, non-tunable variables: */ + extern int sector_size; +diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig +index 33b50d0..b32e826 100644 +--- a/src/sbd.sysconfig ++++ b/src/sbd.sysconfig +@@ -106,6 +106,20 @@ SBD_TIMEOUT_ACTION=flush,reboot + # + SBD_MOVE_TO_ROOT_CGROUP=auto + ++## Type: yesno ++## Default: no ++# ++# If resource startup syncing is enabled then pacemakerd is ++# gonna wait to be pinged via IPC before it starts resources. ++# On shutdown pacemakerd is going to wait in a state where it ++# has cleanly shutdown resources till sbd fetches that state. ++# ++# Default is 'no' to prevent pacemaker from waiting for a ++# ping that will never come when working together with an sbd ++# version that doesn't support the feature. ++# ++SBD_SYNC_RESOURCE_STARTUP=no ++ + ## Type: string + ## Default: "" + # +-- +1.8.3.1 + diff --git a/SPECS/sbd.spec b/SPECS/sbd.spec index 1123a9c..0e7f257 100644 --- a/SPECS/sbd.spec +++ b/SPECS/sbd.spec @@ -18,7 +18,7 @@ %global commit 25fce8a7d5e8cd5abc2379077381b10bd6cec183 %global shortcommit %(c=%{commit}; echo ${c:0:7}) %global github_owner Clusterlabs -%global buildnum 5 +%global buildnum 6 Name: sbd Summary: Storage-based death @@ -31,6 +31,9 @@ Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{n Patch1: 0001-Fix-regressions.sh-make-parameter-passing-consistent.patch Patch2: 0002-Doc-add-environment-section-to-man-page.patch Patch3: 0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch +Patch4: 0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch +Patch5: 0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch +Patch6: 0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: autoconf BuildRequires: automake @@ -46,6 +49,7 @@ BuildRequires: pkgconfig BuildRequires: systemd BuildRequires: make Conflicts: fence-agents-sbd < 4.2.1-38 +Requires: pacemaker >= 2.0.4-5 %if 0%{?rhel} > 0 ExclusiveArch: i686 x86_64 s390x ppc64le aarch64 @@ -76,6 +80,7 @@ regression-testing sbd. sed -i src/sbd.sysconfig -e "s/Default: 5/Default: 15/" sed -i src/sbd.sysconfig -e "s/SBD_WATCHDOG_TIMEOUT=5/SBD_WATCHDOG_TIMEOUT=15/" %endif +sed -i src/sbd.sysconfig -e "s/SBD_SYNC_RESOURCE_STARTUP=no/SBD_SYNC_RESOURCE_STARTUP=yes/" ########################################################### @@ -156,6 +161,13 @@ fi %{_libdir}/libsbdtestbed* %changelog +* Mon Jul 27 2020 Klaus Wenninger - 1.4.1-6 +- match qdevice-sync_timeout against wd-timeout +- sync startup/shutdown via pacemakerd-api + + Resolves: rhbz#1703128 + Resolves: rhbz#1743726 + * Wed Jun 24 2020 Klaus Wenninger - 1.4.1-5 - rebuild against pacemaker having new no_quorum_demote