diff --git a/.gitignore b/.gitignore index 28f41b4..01e5d42 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/sbd-a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f.tar.gz +SOURCES/sbd-7f33d1a409d0a4e2cd69946688c48eaa8f3c5d26.tar.gz diff --git a/.sbd.metadata b/.sbd.metadata index b9ffa89..4341adc 100644 --- a/.sbd.metadata +++ b/.sbd.metadata @@ -1 +1 @@ -c7d993891714de43c052a87b0165a97a6e27cfb5 SOURCES/sbd-a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f.tar.gz +7bf4ad26875cafa743ca96aec9dae1739bd6281b SOURCES/sbd-7f33d1a409d0a4e2cd69946688c48eaa8f3c5d26.tar.gz diff --git a/SOURCES/0001-Fix-sbd-cluster-finalize-cmap-connection-if-disconne.patch b/SOURCES/0001-Fix-sbd-cluster-finalize-cmap-connection-if-disconne.patch new file mode 100644 index 0000000..a2cc41a --- /dev/null +++ b/SOURCES/0001-Fix-sbd-cluster-finalize-cmap-connection-if-disconne.patch @@ -0,0 +1,79 @@ +From f8f980340256ab5bef5385cd3bc082fdfb7613ed Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Thu, 7 Mar 2019 15:01:26 +0100 +Subject: [PATCH] Fix: sbd-cluster: finalize cmap connection if disconnected + from cluster + +Previously if sbd cluster servant anyhow got dis-/reconnected from the +cluster, it'd start hogging CPU keeping polling the main loop source +from the old cmap connection. +--- + src/sbd-cluster.c | 38 ++++++++++++++++++++++++++------------ + 1 file changed, 26 insertions(+), 12 deletions(-) + +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index 51bb456..541212f 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -174,6 +174,25 @@ cmap_dispatch_callback (gpointer user_data) + return TRUE; + } + ++static void ++cmap_destroy(void) ++{ ++ if (cmap_source) { ++ g_source_destroy(cmap_source); ++ cmap_source = NULL; ++ } ++ ++ if (track_handle) { ++ cmap_track_delete(cmap_handle, track_handle); ++ track_handle = 0; ++ } ++ ++ if (cmap_handle) { ++ cmap_finalize(cmap_handle); ++ cmap_handle = 0; ++ } ++} ++ + static gboolean + sbd_get_two_node(void) + { +@@ -217,18 +236,7 @@ sbd_get_two_node(void) + return TRUE; + + out: +- if (cmap_source) { +- g_source_destroy(cmap_source); +- cmap_source = NULL; +- } +- if (track_handle) { +- cmap_track_delete(cmap_handle, track_handle); +- track_handle = 0; +- } +- if (cmap_handle) { +- cmap_finalize(cmap_handle); +- cmap_handle = 0; +- } ++ cmap_destroy(); + + return FALSE; + } +@@ -327,6 +335,12 @@ sbd_membership_destroy(gpointer user_data) + { + cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type())); + ++ if (get_cluster_type() != pcmk_cluster_unknown) { ++#if SUPPORT_COROSYNC && CHECK_TWO_NODE ++ cmap_destroy(); ++#endif ++ } ++ + set_servant_health(pcmk_health_unclean, LOG_ERR, "Cluster connection terminated"); + notify_parent(); + +-- +1.8.3.1 + diff --git a/SOURCES/0001-make-pacemaker-dlm-wait-for-sbd-start.patch b/SOURCES/0001-make-pacemaker-dlm-wait-for-sbd-start.patch deleted file mode 100644 index 0793297..0000000 --- a/SOURCES/0001-make-pacemaker-dlm-wait-for-sbd-start.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 5f1ac8f07fd81a2c60db39dd5a28debbadfe3ec5 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 14 Dec 2017 17:32:08 +0100 -Subject: [PATCH] Fix: systemd: make pacemaker & dlm wait for sbd-start to - complete - ---- - src/sbd.service.in | 4 +++- - src/sbd_remote.service.in | 2 +- - 2 files changed, 4 insertions(+), 2 deletions(-) - -diff --git a/src/sbd.service.in b/src/sbd.service.in -index ef1bd16..401c2d6 100644 ---- a/src/sbd.service.in -+++ b/src/sbd.service.in -@@ -1,6 +1,7 @@ - [Unit] - Description=Shared-storage based fencing daemon - Before=pacemaker.service -+Before=dlm.service - After=systemd-modules-load.service iscsi.service - PartOf=corosync.service - RefuseManualStop=true -@@ -22,4 +23,5 @@ Restart=on-abort - - [Install] - RequiredBy=corosync.service -- -+RequiredBy=pacemaker.service -+RequiredBy=dlm.service -diff --git a/src/sbd_remote.service.in b/src/sbd_remote.service.in -index e05f80e..cefd511 100644 ---- a/src/sbd_remote.service.in -+++ b/src/sbd_remote.service.in -@@ -21,4 +21,4 @@ Restart=on-abort - - [Install] - RequiredBy=pacemaker_remote.service -- -+RequiredBy=dlm.service --- -1.8.3.1 - diff --git a/SOURCES/0002-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch b/SOURCES/0002-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch new file mode 100644 index 0000000..0d70c92 --- /dev/null +++ b/SOURCES/0002-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch @@ -0,0 +1,302 @@ +From a716a8ddd3df615009bcff3bd96dd9ae64cb5f68 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 19 Mar 2019 21:36:15 +0100 +Subject: [PATCH] Fix: sbd-pacemaker: make handling of cib-connection loss more + robust + +Exit pcmk-servant on graceful pacemaker shutdown and go back +to state before pacemaker was detected initially. +Purge all cib-traces otherwise and try to reconnect within timeout. +--- + src/sbd-inquisitor.c | 24 ++++++++++++++++++++---- + src/sbd-md.c | 30 +++++++++++++++--------------- + src/sbd-pacemaker.c | 38 +++++++++++++++++++++++++++++--------- + src/sbd.h | 11 +++++++---- + 4 files changed, 71 insertions(+), 32 deletions(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 9be6c99..77c6e4f 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -490,19 +490,19 @@ void inquisitor_child(void) + if (sbd_is_disk(s)) { + if (WIFEXITED(status)) { + switch(WEXITSTATUS(status)) { +- case EXIT_MD_IO_FAIL: ++ case EXIT_MD_SERVANT_IO_FAIL: + DBGLOG(LOG_INFO, "Servant for %s requests to be disowned", + s->devname); + break; +- case EXIT_MD_REQUEST_RESET: ++ case EXIT_MD_SERVANT_REQUEST_RESET: + cl_log(LOG_WARNING, "%s requested a reset", s->devname); + do_reset(); + break; +- case EXIT_MD_REQUEST_SHUTOFF: ++ case EXIT_MD_SERVANT_REQUEST_SHUTOFF: + cl_log(LOG_WARNING, "%s requested a shutoff", s->devname); + do_off(); + break; +- case EXIT_MD_REQUEST_CRASHDUMP: ++ case EXIT_MD_SERVANT_REQUEST_CRASHDUMP: + cl_log(LOG_WARNING, "%s requested a crashdump", s->devname); + do_crashdump(); + break; +@@ -510,6 +510,22 @@ void inquisitor_child(void) + break; + } + } ++ } else if (sbd_is_pcmk(s)) { ++ if (WIFEXITED(status)) { ++ switch(WEXITSTATUS(status)) { ++ case EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN: ++ DBGLOG(LOG_INFO, "PCMK-Servant has exited gracefully"); ++ /* revert to state prior to pacemaker-detection */ ++ s->restarts = 0; ++ s->restart_blocked = 0; ++ cluster_appeared = 0; ++ s->outdated = 1; ++ s->t_last.tv_sec = 0; ++ break; ++ default: ++ break; ++ } ++ } + } + cleanup_servant_by_pid(pid); + } +diff --git a/src/sbd-md.c b/src/sbd-md.c +index ba2c34d..c51d381 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -1061,19 +1061,19 @@ int servant_md(const char *diskname, int mode, const void* argp) + + st = open_device(diskname, LOG_WARNING); + if (!st) { +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + + s_header = header_get(st); + if (!s_header) { + cl_log(LOG_ERR, "Not a valid header on %s", diskname); +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + + if (servant_check_timeout_inconsistent(s_header) < 0) { + cl_log(LOG_ERR, "Timeouts on %s do not match first device", + diskname); +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + + if (s_header->minor_version > 0) { +@@ -1086,14 +1086,14 @@ int servant_md(const char *diskname, int mode, const void* argp) + cl_log(LOG_ERR, + "No slot allocated, and automatic allocation failed for disk %s.", + diskname); +- rc = EXIT_MD_IO_FAIL; ++ rc = EXIT_MD_SERVANT_IO_FAIL; + goto out; + } + s_node = sector_alloc(); + if (slot_read(st, mbox, s_node) < 0) { + cl_log(LOG_ERR, "Unable to read node entry on %s", + diskname); +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + + cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname); +@@ -1109,7 +1109,7 @@ int servant_md(const char *diskname, int mode, const void* argp) + if (mode > 0) { + if (mbox_read(st, mbox, s_mbox) < 0) { + cl_log(LOG_ERR, "mbox read failed during start-up in servant."); +- rc = EXIT_MD_IO_FAIL; ++ rc = EXIT_MD_SERVANT_IO_FAIL; + goto out; + } + if (s_mbox->cmd != SBD_MSG_EXIT && +@@ -1125,7 +1125,7 @@ int servant_md(const char *diskname, int mode, const void* argp) + DBGLOG(LOG_INFO, "First servant start - zeroing inbox"); + memset(s_mbox, 0, sizeof(*s_mbox)); + if (mbox_write(st, mbox, s_mbox) < 0) { +- rc = EXIT_MD_IO_FAIL; ++ rc = EXIT_MD_SERVANT_IO_FAIL; + goto out; + } + } +@@ -1154,28 +1154,28 @@ int servant_md(const char *diskname, int mode, const void* argp) + s_header_retry = header_get(st); + if (!s_header_retry) { + cl_log(LOG_ERR, "No longer found a valid header on %s", diskname); +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) { + cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname); +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + free(s_header_retry); + + s_node_retry = sector_alloc(); + if (slot_read(st, mbox, s_node_retry) < 0) { + cl_log(LOG_ERR, "slot read failed in servant."); +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) { + cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname); +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + free(s_node_retry); + + if (mbox_read(st, mbox, s_mbox) < 0) { + cl_log(LOG_ERR, "mbox read failed in servant."); +- exit(EXIT_MD_IO_FAIL); ++ exit(EXIT_MD_SERVANT_IO_FAIL); + } + + if (s_mbox->cmd > 0) { +@@ -1190,14 +1190,14 @@ int servant_md(const char *diskname, int mode, const void* argp) + sigqueue(ppid, SIG_TEST, signal_value); + break; + case SBD_MSG_RESET: +- exit(EXIT_MD_REQUEST_RESET); ++ exit(EXIT_MD_SERVANT_REQUEST_RESET); + case SBD_MSG_OFF: +- exit(EXIT_MD_REQUEST_SHUTOFF); ++ exit(EXIT_MD_SERVANT_REQUEST_SHUTOFF); + case SBD_MSG_EXIT: + sigqueue(ppid, SIG_EXITREQ, signal_value); + break; + case SBD_MSG_CRASHDUMP: +- exit(EXIT_MD_REQUEST_CRASHDUMP); ++ exit(EXIT_MD_SERVANT_REQUEST_CRASHDUMP); + default: + /* FIXME: + An "unknown" message might result +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index aac355a..c69fc55 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -103,6 +103,9 @@ static pe_working_set_t *data_set = NULL; + + static long last_refresh = 0; + ++static int pcmk_clean_shutdown = 0; ++static int pcmk_shutdown = 0; ++ + static gboolean + mon_timer_reconnect(gpointer data) + { +@@ -128,10 +131,26 @@ mon_cib_connection_destroy(gpointer user_data) + { + if (cib) { + cib->cmds->signoff(cib); ++ /* retrigger as last one might have been skipped */ ++ mon_refresh_state(NULL); ++ if (pcmk_clean_shutdown) { ++ /* assume a graceful pacemaker-shutdown */ ++ clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); ++ } ++ /* getting here we aren't sure about the pacemaker-state ++ so try to use the timeout to reconnect and get ++ everything sorted out again ++ */ ++ pcmk_shutdown = 0; + set_servant_health(pcmk_health_transient, LOG_WARNING, "Disconnected from CIB"); + timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_reconnect, NULL); + } + cib_connected = 0; ++ /* no sense in looking into outdated cib, trying to apply patch, ... */ ++ if (current_cib) { ++ free_xml(current_cib); ++ current_cib = NULL; ++ } + return; + } + +@@ -171,7 +190,7 @@ static gboolean + mon_timer_notify(gpointer data) + { + static int counter = 0; +- int counter_max = timeout_watchdog / timeout_loop; ++ int counter_max = timeout_watchdog / timeout_loop / 2; + + if (timer_id_notify > 0) { + g_source_remove(timer_id_notify); +@@ -280,11 +299,6 @@ compute_status(pe_working_set_t * data_set) + } else if (node->details->pending) { + set_servant_health(pcmk_health_pending, LOG_WARNING, "Node state: pending"); + +-#if 0 +- } else if (node->details->shutdown) { +- set_servant_health(pcmk_health_shutdown, LOG_WARNING, "Node state: shutting down"); +-#endif +- + } else if (data_set->flags & pe_flag_have_quorum) { + set_servant_health(pcmk_health_online, LOG_INFO, "Node state: online"); + ever_had_quorum = TRUE; +@@ -315,6 +329,12 @@ compute_status(pe_working_set_t * data_set) + } + } + ++ if (node->details->shutdown) { ++ pcmk_shutdown = 1; ++ } ++ if (pcmk_shutdown && !(node->details->running_rsc)) { ++ pcmk_clean_shutdown = 1; ++ } + notify_parent(); + return; + } +@@ -339,7 +359,7 @@ crm_diff_update(const char *event, xmlNode * msg) + static mainloop_timer_t *refresh_timer = NULL; + + if(refresh_timer == NULL) { +- refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); ++ refresh_timer = mainloop_timer_add("refresh", reconnect_msec, FALSE, mon_trigger_refresh, NULL); + refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_state, refresh_timer); + } + +@@ -369,9 +389,9 @@ crm_diff_update(const char *event, xmlNode * msg) + } + + /* Refresh +- * - immediately if the last update was more than 5s ago ++ * - immediately if the last update was more than 1s ago + * - every 10 updates +- * - at most 2s after the last update ++ * - at most 1s after the last update + */ + if (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000)) { + mon_refresh_state(refresh_timer); +diff --git a/src/sbd.h b/src/sbd.h +index 6fe07f9..3b05a11 100644 +--- a/src/sbd.h ++++ b/src/sbd.h +@@ -54,10 +54,13 @@ + /* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */ + + /* exit status for disk-servant */ +-#define EXIT_MD_IO_FAIL 20 +-#define EXIT_MD_REQUEST_RESET 21 +-#define EXIT_MD_REQUEST_SHUTOFF 22 +-#define EXIT_MD_REQUEST_CRASHDUMP 23 ++#define EXIT_MD_SERVANT_IO_FAIL 20 ++#define EXIT_MD_SERVANT_REQUEST_RESET 21 ++#define EXIT_MD_SERVANT_REQUEST_SHUTOFF 22 ++#define EXIT_MD_SERVANT_REQUEST_CRASHDUMP 23 ++ ++/* exit status for pcmk-servant */ ++#define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30 + + #define HOG_CHAR 0xff + #define SECTOR_NAME_MAX 63 +-- +1.8.3.1 + diff --git a/SOURCES/0002-mention-timeout-caveat-with-SBD_DELAY_START.patch b/SOURCES/0002-mention-timeout-caveat-with-SBD_DELAY_START.patch deleted file mode 100644 index 807d70d..0000000 --- a/SOURCES/0002-mention-timeout-caveat-with-SBD_DELAY_START.patch +++ /dev/null @@ -1,27 +0,0 @@ -From f79d09ec8dd744f69d87008e868297b308043b56 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 14 Dec 2017 18:21:07 +0100 -Subject: [PATCH] Doc: sbd.sysconfig: mention timeout caveat with - SBD_DELAY_START - ---- - src/sbd.sysconfig | 3 +++ - 1 file changed, 3 insertions(+) - -diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig -index cbc1501..75ff980 100644 ---- a/src/sbd.sysconfig -+++ b/src/sbd.sysconfig -@@ -31,6 +31,9 @@ SBD_STARTMODE=always - # other nodes are still waiting in the fence acknowledgement phase. - # This is an occasional issue with virtual machines. - # -+# Consider that you might have to adapt the startup-timeout accordingly -+# if the default isn't sufficient. (TimeoutStartSec for systemd) -+# - # This option may be ignored at a later point, once pacemaker handles - # this case better. - # --- -1.8.3.1 - diff --git a/SOURCES/0003-Doc-sbd.8.pod-add-query-test-watchdog.patch b/SOURCES/0003-Doc-sbd.8.pod-add-query-test-watchdog.patch deleted file mode 100644 index 6695f73..0000000 --- a/SOURCES/0003-Doc-sbd.8.pod-add-query-test-watchdog.patch +++ /dev/null @@ -1,48 +0,0 @@ -From e073271f53583f2d0cf2675ea665ed50712b65dd Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Mon, 8 Jan 2018 12:07:33 +0100 -Subject: [PATCH] Doc: sbd.8.pod: add sections for query-watchdog & - test-watchdog - ---- - man/sbd.8.pod | 24 ++++++++++++++++++++++++ - 1 file changed, 24 insertions(+) - -diff --git a/man/sbd.8.pod b/man/sbd.8.pod -index c1bf67a..ffd01c2 100644 ---- a/man/sbd.8.pod -+++ b/man/sbd.8.pod -@@ -394,6 +394,30 @@ a fencing message by the cluster. - - =back - -+=head2 query-watchdog -+ -+Example usage: -+ -+ sbd query-watchdog -+ -+Check for available watchdog devices and print some info. -+ -+B: This command will arm the watchdog during query, and if your -+watchdog refuses disarming (for example, if its kernel module has the -+'nowayout' parameter set) this will reset your system. -+ -+=head2 test-watchdog -+ -+Example usage: -+ -+ sbd test-watchdog [-w /dev/watchdog3] -+ -+Test specified watchdog device (/dev/watchdog by default). -+ -+B: This command will arm the watchdog and have your system reset -+in case your watchdog is working properly! If issued from an interactive -+session, it will prompt for confirmation. -+ - =head1 Base system configuration - - =head2 Configure a watchdog --- -1.8.3.1 - diff --git a/SOURCES/0003-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch b/SOURCES/0003-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch new file mode 100644 index 0000000..776edea --- /dev/null +++ b/SOURCES/0003-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch @@ -0,0 +1,45 @@ +From 79b778debfee5b4ab2d099b2bfc7385f45597f70 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 26 Mar 2019 11:17:45 +0100 +Subject: [PATCH] Fix: sbd-pacemaker: bail out of status earlier + +Prevents possible subsequent null-pointer access and avoids +unnecessary search for node. +--- + src/sbd-pacemaker.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index c69fc55..9a8b95f 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -276,7 +276,7 @@ compute_status(pe_working_set_t * data_set) + static int updates = 0; + static int ever_had_quorum = FALSE; + +- node_t *node = pe_find_node(data_set->nodes, local_uname); ++ node_t *node = NULL; + + updates++; + +@@ -286,11 +286,15 @@ compute_status(pe_working_set_t * data_set) + return; + } + ++ node = pe_find_node(data_set->nodes, local_uname); + +- if (node == NULL) { ++ if ((node == NULL) || (node->details == NULL)) { + set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: %s is UNKNOWN", local_uname); ++ notify_parent(); ++ return; ++ } + +- } else if (node->details->online == FALSE) { ++ if (node->details->online == FALSE) { + set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: OFFLINE"); + + } else if (node->details->unclean) { +-- +1.8.3.1 + diff --git a/SOURCES/0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch b/SOURCES/0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch deleted file mode 100644 index 1c2ce98..0000000 --- a/SOURCES/0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch +++ /dev/null @@ -1,86 +0,0 @@ -From ef40f6a0fdc178828fbde6f1303e5ee58bfb822a Mon Sep 17 00:00:00 2001 -From: "Gao,Yan" -Date: Wed, 7 Mar 2018 17:50:29 +0100 -Subject: [PATCH] Build: sbd-pacemaker: Query CIB directly with the API instead - of get_cib_copy() - -get_cib_copy() has been dropped from pacemaker 2.0 branch as of: -https://github.com/ClusterLabs/pacemaker/commit/32c75b7be ---- - src/sbd-pacemaker.c | 39 +++++++++++++++++++++++++++++++++++---- - 1 file changed, 35 insertions(+), 4 deletions(-) - -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index b6a8fb6..2f06109 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -109,6 +109,38 @@ mon_cib_connection_destroy(gpointer user_data) - return; - } - -+static void -+mon_retrieve_current_cib() -+{ -+ xmlNode *xml_cib = NULL; -+ int options = cib_scope_local | cib_sync_call; -+ int rc = pcmk_ok; -+ -+ free_xml(current_cib); -+ current_cib = NULL; -+ -+ rc = cib->cmds->query(cib, NULL, &xml_cib, options); -+ -+ if (rc != pcmk_ok) { -+ crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); -+ free_xml(xml_cib); -+ return; -+ -+ } else if (xml_cib == NULL) { -+ crm_err("Couldn't retrieve the CIB: empty result"); -+ return; -+ } -+ -+ if (safe_str_eq(crm_element_name(xml_cib), XML_TAG_CIB)) { -+ current_cib = xml_cib; -+ -+ } else { -+ free_xml(xml_cib); -+ } -+ -+ return; -+} -+ - static gboolean - mon_timer_notify(gpointer data) - { -@@ -121,8 +153,7 @@ mon_timer_notify(gpointer data) - - if (cib_connected) { - if (counter == counter_max) { -- free_xml(current_cib); -- current_cib = get_cib_copy(cib); -+ mon_retrieve_current_cib(); - mon_refresh_state(NULL); - counter = 0; - } else { -@@ -163,7 +194,7 @@ cib_connect(gboolean full) - return rc; - } - -- current_cib = get_cib_copy(cib); -+ mon_retrieve_current_cib(); - mon_refresh_state(NULL); - - if (full) { -@@ -308,7 +339,7 @@ crm_diff_update(const char *event, xmlNode * msg) - } - - if (current_cib == NULL) { -- current_cib = get_cib_copy(cib); -+ mon_retrieve_current_cib(); - } - - /* Refresh --- -1.8.3.1 - diff --git a/SOURCES/0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch b/SOURCES/0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch new file mode 100644 index 0000000..ce9d698 --- /dev/null +++ b/SOURCES/0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch @@ -0,0 +1,26 @@ +From 6d4289655dacad4b72fb64373c37bd1ad33649e6 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Tue, 22 Jan 2019 16:33:04 +0100 +Subject: [PATCH] Doc: sbd.8.pod: use the generic term "cluster services" + instead of the specific "openais" + +--- + man/sbd.8.pod | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/man/sbd.8.pod b/man/sbd.8.pod +index ffd01c2..fed6bd3 100644 +--- a/man/sbd.8.pod ++++ b/man/sbd.8.pod +@@ -476,7 +476,7 @@ storage (with internal redundancy) anyway; the SBD device does not + introduce an additional single point of failure then. + + If the SBD device is not accessible, the daemon will fail to start and +-inhibit openais startup. ++inhibit startup of cluster services. + + =item Two devices + +-- +1.8.3.1 + diff --git a/SOURCES/0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch b/SOURCES/0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch new file mode 100644 index 0000000..8c096e0 --- /dev/null +++ b/SOURCES/0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch @@ -0,0 +1,27 @@ +From 091e10ae3f62239251b53bf7d81d47a57a9b82f2 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Wed, 23 Jan 2019 17:21:15 +0100 +Subject: [PATCH] Doc: sbd.sysconfig: watchdog timeout set in the on-disk + metadata takes precedence + +--- + src/sbd.sysconfig | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig +index e661f96..f163f21 100644 +--- a/src/sbd.sysconfig ++++ b/src/sbd.sysconfig +@@ -68,6 +68,9 @@ SBD_WATCHDOG_DEV=/dev/watchdog + # If your sbd device(s) reside on a multipath setup or iSCSI, this + # should be the time required to detect a path failure. + # ++# Be aware that watchdog timeout set in the on-disk metadata takes ++# precedence. ++# + SBD_WATCHDOG_TIMEOUT=5 + + ## Type: string +-- +1.8.3.1 + diff --git a/SOURCES/0005-Fix-build-error-with-glibc-2.25.patch b/SOURCES/0005-Fix-build-error-with-glibc-2.25.patch deleted file mode 100644 index 75b45a6..0000000 --- a/SOURCES/0005-Fix-build-error-with-glibc-2.25.patch +++ /dev/null @@ -1,33 +0,0 @@ -From ba3b4127f658cb59ff09939e8de93a06a138dddb Mon Sep 17 00:00:00 2001 -From: Valentin Vidic -Date: Sat, 25 Nov 2017 09:18:41 +0100 -Subject: [PATCH] Fix: build error with glibc 2.25 - -Add include for makedev, major and minor - -sbd-common.c:268:13: error: In the GNU C Library, "makedev" is defined - by . For historical compatibility, it is - currently defined by as well, but we plan to - remove this soon. To use "makedev", include - directly. If you did not intend to use a system-defined macro - "makedev", you should undefine it after including . [-Werror] - {makedev(10,130), 0}; ---- - src/sbd-common.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/src/sbd-common.c b/src/sbd-common.c -index 1d7dbc2..25aaeae 100644 ---- a/src/sbd-common.c -+++ b/src/sbd-common.c -@@ -19,6 +19,7 @@ - #include "sbd.h" - #include - #include -+#include - #include - #include - #include --- -1.8.3.1 - diff --git a/SOURCES/0006-Fix-gcc-format-string-error.patch b/SOURCES/0006-Fix-gcc-format-string-error.patch deleted file mode 100644 index 9db8c49..0000000 --- a/SOURCES/0006-Fix-gcc-format-string-error.patch +++ /dev/null @@ -1,25 +0,0 @@ -From ee232b251c7072935d0507dc0bad27f375a12492 Mon Sep 17 00:00:00 2001 -From: Valentin Vidic -Date: Wed, 8 Nov 2017 22:02:29 +0100 -Subject: [PATCH] Fix: gcc format string error - ---- - src/sbd-common.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/src/sbd-common.c b/src/sbd-common.c -index 803bc3a..1d7dbc2 100644 ---- a/src/sbd-common.c -+++ b/src/sbd-common.c -@@ -268,7 +268,7 @@ watchdog_populate_list(void) - {makedev(10,130), 0}; - int num_watchdogs = 1; - struct dirent *entry; -- char entry_name[64]; -+ char entry_name[280]; - DIR *dp; - char buf[256] = ""; - --- -1.8.3.1 - diff --git a/SOURCES/0006-Refactor-fail-earlier-on-invalid-servants.patch b/SOURCES/0006-Refactor-fail-earlier-on-invalid-servants.patch new file mode 100644 index 0000000..cb71002 --- /dev/null +++ b/SOURCES/0006-Refactor-fail-earlier-on-invalid-servants.patch @@ -0,0 +1,142 @@ +From 8301cbafed191f30656a22876941cc7c9189b623 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Thu, 31 Jan 2019 14:42:01 +0100 +Subject: [PATCH] Refactor: fail earlier on invalid servants + +--- + src/sbd-inquisitor.c | 51 ++++++++++++++++++++++++++++++++------------------- + src/sbd-md.c | 7 +------ + src/sbd.h | 2 +- + 3 files changed, 34 insertions(+), 26 deletions(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 8e0bc87..9be6c99 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -42,19 +42,36 @@ void recruit_servant(const char *devname, pid_t pid) + struct servants_list_item *newbie; + + if (lookup_servant_by_dev(devname)) { +- cl_log(LOG_DEBUG, "Servant %s already exists", devname); +- return; ++ cl_log(LOG_DEBUG, "Servant %s already exists", devname); ++ return; + } + + newbie = malloc(sizeof(*newbie)); +- if (!newbie) { +- fprintf(stderr, "malloc failed in recruit_servant.\n"); +- exit(1); ++ if (newbie) { ++ memset(newbie, 0, sizeof(*newbie)); ++ newbie->devname = strdup(devname); ++ newbie->pid = pid; ++ newbie->first_start = 1; ++ } ++ if (!newbie || !newbie->devname) { ++ fprintf(stderr, "heap allocation failed in recruit_servant.\n"); ++ exit(1); ++ } ++ ++ /* some sanity-check on our newbie */ ++ if (sbd_is_disk(newbie)) { ++ cl_log(LOG_INFO, "Monitoring %s", devname); ++ disk_count++; ++ } else if (sbd_is_pcmk(newbie) || sbd_is_cluster(newbie)) { ++ /* alive just after pcmk and cluster servants have shown up */ ++ newbie->outdated = 1; ++ } else { ++ /* toss our newbie */ ++ cl_log(LOG_ERR, "Refusing to recruit unrecognized servant %s", devname); ++ free((void *) newbie->devname); ++ free(newbie); ++ return; + } +- memset(newbie, 0, sizeof(*newbie)); +- newbie->devname = strdup(devname); +- newbie->pid = pid; +- newbie->first_start = 1; + + if (!s) { + servants_leader = newbie; +@@ -65,12 +82,6 @@ void recruit_servant(const char *devname, pid_t pid) + } + + servant_count++; +- if(sbd_is_disk(newbie)) { +- cl_log(LOG_INFO, "Monitoring %s", devname); +- disk_count++; +- } else { +- newbie->outdated = 1; +- } + } + + int assign_servant(const char* devname, functionp_t functionp, int mode, const void* argp) +@@ -148,7 +159,7 @@ void servant_start(struct servants_list_item *s) + if (sbd_is_disk(s)) { + #if SUPPORT_SHARED_DISK + DBGLOG(LOG_INFO, "Starting servant for device %s", s->devname); +- s->pid = assign_servant(s->devname, servant, start_mode, s); ++ s->pid = assign_servant(s->devname, servant_md, start_mode, s); + #else + cl_log(LOG_ERR, "Shared disk functionality not supported"); + return; +@@ -785,12 +796,14 @@ parse_device_line(const char *line) + + if (lpc > last) { + entry = calloc(1, 1 + lpc - last); ++ if (!entry) { ++ fprintf(stderr, "heap allocation failed parsing device-line.\n"); ++ exit(1); ++ } + rc = sscanf(line + last, "%[^;]", entry); + } + +- if (entry == NULL) { +- /* Skip */ +- } else if (rc != 1) { ++ if (rc != 1) { + cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, line + last); + } else { + cl_log(LOG_DEBUG, "Adding '%s'", entry); +diff --git a/src/sbd-md.c b/src/sbd-md.c +index 579d273..ba2c34d 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -1031,7 +1031,7 @@ static int servant_check_timeout_inconsistent(struct sector_header_s *hdr) + return 0; + } + +-int servant(const char *diskname, int mode, const void* argp) ++int servant_md(const char *diskname, int mode, const void* argp) + { + struct sector_mbox_s *s_mbox = NULL; + struct sector_node_s *s_node = NULL; +@@ -1046,11 +1046,6 @@ int servant(const char *diskname, int mode, const void* argp) + char uuid[37]; + const struct servants_list_item *s = argp; + +- if (!diskname) { +- cl_log(LOG_ERR, "Empty disk name %s.", diskname); +- return -1; +- } +- + cl_log(LOG_INFO, "Servant starting for device %s", diskname); + + /* Block most of the signals */ +diff --git a/src/sbd.h b/src/sbd.h +index 386c85c..6fe07f9 100644 +--- a/src/sbd.h ++++ b/src/sbd.h +@@ -175,7 +175,7 @@ int ping_via_slots(const char *name, struct servants_list_item *servants); + int dump_headers(struct servants_list_item *servants); + unsigned long get_first_msgwait(struct servants_list_item *servants); + int messenger(const char *name, const char *msg, struct servants_list_item *servants); +-int servant(const char *diskname, int mode, const void* argp); ++int servant_md(const char *diskname, int mode, const void* argp); + #endif + + int servant_pcmk(const char *diskname, int mode, const void* argp); +-- +1.8.3.1 + diff --git a/SOURCES/0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch b/SOURCES/0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch deleted file mode 100644 index f1782a3..0000000 --- a/SOURCES/0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch +++ /dev/null @@ -1,51 +0,0 @@ -From 04d32266b378f5f47088e8f34703bdd9c95f5a4c Mon Sep 17 00:00:00 2001 -From: "Gao,Yan" -Date: Thu, 30 Nov 2017 16:11:00 +0100 -Subject: [PATCH] Build: cluster-servant: Compile with pacemaker-2.0 - -Pacemaker-2.0 removed support for corosync 1 cluster layer: -https://github.com/ClusterLabs/pacemaker/commit/7a9891f29 ---- - configure.ac | 4 ++++ - src/sbd-cluster.c | 4 ++++ - 2 files changed, 8 insertions(+) - -diff --git a/configure.ac b/configure.ac -index 1eb8758..1f328c2 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -72,6 +72,10 @@ AC_CHECK_HEADERS(pacemaker/crm/cluster.h) - AC_CHECK_LIB(crmcommon, pcmk_strerror, , missing="yes") - AC_CHECK_LIB(cib, cib_apply_patch_event, , missing="yes") - -+dnl pacemaker-2.0 removed support for corosync 1 cluster layer -+AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,, -+ [#include ]) -+ - if test "$missing" = "yes"; then - AC_MSG_ERROR([Missing required libraries or functions.]) - fi -diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c -index de99d0c..ae4750e 100644 ---- a/src/sbd-cluster.c -+++ b/src/sbd-cluster.c -@@ -238,12 +238,16 @@ notify_timer_cb(gpointer data) - } - - switch (get_cluster_type()) { -+#if HAVE_DECL_PCMK_CLUSTER_CLASSIC_AIS - case pcmk_cluster_classic_ais: - send_cluster_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); - break; - -+#endif - case pcmk_cluster_corosync: -+#if HAVE_DECL_PCMK_CLUSTER_CMAN - case pcmk_cluster_cman: -+#endif - /* TODO - Make a CPG call and only call notify_parent() when we get a reply */ - notify_parent(); - break; --- -1.8.3.1 - diff --git a/SOURCES/0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch b/SOURCES/0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch new file mode 100644 index 0000000..c0ccf3d --- /dev/null +++ b/SOURCES/0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch @@ -0,0 +1,112 @@ +From d3be2caffb9edbb6bfe0e2658c66a1826f4e9c3a Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:41:51 +0200 +Subject: [PATCH] Fix: sbd-inquisitor: overhaul device-list-parser + +for readability and robustness +--- + src/sbd-inquisitor.c | 60 ++++++++++++++++++++++++++-------------------------- + 1 file changed, 30 insertions(+), 30 deletions(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 715e978..b4b5585 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -780,56 +780,56 @@ int inquisitor(void) + int + parse_device_line(const char *line) + { +- int lpc = 0; +- int last = 0; +- int max = 0; ++ size_t lpc = 0; ++ size_t last = 0; ++ size_t max = 0; + int found = 0; ++ bool skip_space = true; ++ int space_run = 0; + +- if(line) { +- max = strlen(line); ++ if (!line) { ++ return 0; + } + +- if (max <= 0) { +- return found; +- } ++ max = strlen(line); + +- cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", max, line); +- /* Skip initial whitespace */ +- for (lpc = 0; lpc <= max && isspace(line[lpc]); lpc++) { +- last = lpc + 1; +- } ++ cl_log(LOG_DEBUG, "Processing %d bytes: [%s]", (int) max, line); + +- /* Now the actual content */ + for (lpc = 0; lpc <= max; lpc++) { +- int a_space = isspace(line[lpc]); +- +- if (a_space && lpc < max && isspace(line[lpc + 1])) { +- /* fast-forward to the end of the spaces */ +- +- } else if (a_space || line[lpc] == ';' || line[lpc] == 0) { +- int rc = 1; +- char *entry = NULL; ++ if (isspace(line[lpc])) { ++ if (skip_space) { ++ last = lpc + 1; ++ } else { ++ space_run++; ++ } ++ continue; ++ } ++ skip_space = false; ++ if (line[lpc] == ';' || line[lpc] == 0) { ++ int rc = 0; ++ char *entry = calloc(1, 1 + lpc - last); + +- if (lpc > last) { +- entry = calloc(1, 1 + lpc - last); +- if (!entry) { +- fprintf(stderr, "heap allocation failed parsing device-line.\n"); +- exit(1); +- } ++ if (entry) { + rc = sscanf(line + last, "%[^;]", entry); ++ } else { ++ fprintf(stderr, "Heap allocation failed parsing device-line.\n"); ++ exit(1); + } + + if (rc != 1) { +- cl_log(LOG_WARNING, "Could not parse (%d %d): %s", last, lpc, line + last); ++ cl_log(LOG_WARNING, "Could not parse: '%s'", line + last); + } else { ++ entry[strlen(entry)-space_run] = '\0'; + cl_log(LOG_DEBUG, "Adding '%s'", entry); + recruit_servant(entry, 0); + found++; + } + + free(entry); ++ skip_space = true; + last = lpc + 1; + } ++ space_run = 0; + } + return found; + } +@@ -890,7 +890,7 @@ int main(int argc, char **argv, char **envp) + int devices = parse_device_line(value); + if(devices < 1) { + fprintf(stderr, "Invalid device line: %s\n", value); +- exit_status = -2; ++ exit_status = -2; + goto out; + } + #else +-- +1.8.3.1 + diff --git a/SOURCES/0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch b/SOURCES/0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch deleted file mode 100644 index b9d848b..0000000 --- a/SOURCES/0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch +++ /dev/null @@ -1,78 +0,0 @@ -From 1d8fd2540ccf254d90e831f612415226043fc5b3 Mon Sep 17 00:00:00 2001 -From: "Gao,Yan" -Date: Fri, 27 Apr 2018 13:41:00 +0200 -Subject: [PATCH] Log: change sbd's default logging level to LOG_NOTICE - -With the refactoring of logging parts and 1ee3503c, sbd became too -silent given the default logging level LOG_WARNING, even under the -situations where it's supposed to tell something. - -This commit changes sbd's default logging level to LOG_NOTICE. -Meanwhile pacemaker library's logging level remains at LOG_WARNING. -With "-v", sbd's logging level is set to LOG_INFO. -With "-vv", sbd's logging level is set to LOG_DEBUG. -With "-vvv", both sbd's and pacemaker library's logging levels are set -to LOG_DEBUG. ---- - src/sbd-inquisitor.c | 28 ++++++++++++++++++++++------ - 1 file changed, 22 insertions(+), 6 deletions(-) - -diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c -index 59408b3..237bf43 100644 ---- a/src/sbd-inquisitor.c -+++ b/src/sbd-inquisitor.c -@@ -803,6 +803,19 @@ parse_device_line(const char *line) - return found; - } - -+#define SBD_SOURCE_FILES "sbd-cluster.c,sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c,setproctitle.c" -+ -+static void -+sbd_log_filter_ctl(const char *files, uint8_t priority) -+{ -+ if (files == NULL) { -+ files = SBD_SOURCE_FILES; -+ } -+ -+ qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, files, priority); -+ qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, files, priority); -+} -+ - int - arg_enabled(int arg_count) - { -@@ -834,6 +847,7 @@ int main(int argc, char **argv, char **envp) - - qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); - qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); -+ sbd_log_filter_ctl(NULL, LOG_NOTICE); - - sbd_get_uname(); - -@@ -926,15 +940,17 @@ int main(int argc, char **argv, char **envp) - case 'v': - debug++; - if(debug == 1) { -- qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c", LOG_DEBUG); -- qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c", LOG_DEBUG); -- cl_log(LOG_INFO, "Verbose mode enabled."); -+ sbd_log_filter_ctl(NULL, LOG_INFO); -+ cl_log(LOG_INFO, "Verbose mode enabled."); - - } else if(debug == 2) { -+ sbd_log_filter_ctl(NULL, LOG_DEBUG); -+ cl_log(LOG_INFO, "Debug mode enabled."); -+ -+ } else if(debug == 3) { - /* Go nuts, turn on pacemaker's logging too */ -- qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_DEBUG); -- qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_DEBUG); -- cl_log(LOG_INFO, "Verbose library mode enabled."); -+ sbd_log_filter_ctl("*", LOG_DEBUG); -+ cl_log(LOG_INFO, "Debug library mode enabled."); - } - break; - case 'T': --- -1.8.3.1 - diff --git a/SOURCES/0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch b/SOURCES/0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch new file mode 100644 index 0000000..f0a6d44 --- /dev/null +++ b/SOURCES/0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch @@ -0,0 +1,47 @@ +From 8e94781169fc2f36eb49078de1978ceb53df6b6c Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:40:26 +0200 +Subject: [PATCH] Refactor: sbd-common: no reason for stack-hogger having + retval + +--- + src/sbd-common.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index 3966f25..873a76e 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -568,13 +568,13 @@ enum { + #define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) + #define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) + +-static unsigned char ++static void + sbd_stack_hogger(unsigned char * inbuf, int kbytes) + { + unsigned char buf[1024]; + + if(kbytes <= 0) { +- return HOG_CHAR; ++ return; + } + + if (inbuf == NULL) { +@@ -584,10 +584,10 @@ sbd_stack_hogger(unsigned char * inbuf, int kbytes) + } + + if (kbytes > 0) { +- return sbd_stack_hogger(buf, kbytes-1); +- } else { +- return buf[sizeof(buf)-1]; ++ sbd_stack_hogger(buf, kbytes-1); + } ++ ++ return; + } + + static void +-- +1.8.3.1 + diff --git a/SOURCES/0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch b/SOURCES/0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch deleted file mode 100644 index 9f2da2e..0000000 --- a/SOURCES/0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch +++ /dev/null @@ -1,161 +0,0 @@ -From 2dbdee29736fcbf0fe1d41c306959b22d05f72b0 Mon Sep 17 00:00:00 2001 -From: "Gao,Yan" -Date: Mon, 30 Apr 2018 18:02:04 +0200 -Subject: [PATCH] Log: upgrade important messages and downgrade unimportant - ones - -It also fixes a message that's supposed to be "quorum.two_node not -present in cmap". ---- - src/sbd-cluster.c | 11 ++++++----- - src/sbd-common.c | 4 ++-- - src/sbd-inquisitor.c | 6 +++--- - src/sbd-md.c | 6 +++--- - src/sbd-pacemaker.c | 2 +- - 5 files changed, 15 insertions(+), 14 deletions(-) - -diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c -index ae4750e..c7328af 100644 ---- a/src/sbd-cluster.c -+++ b/src/sbd-cluster.c -@@ -201,10 +201,11 @@ sbd_get_two_node(void) - } - - if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) == CS_OK) { -- cl_log(LOG_NOTICE, "Corosync is%s in 2Node-mode", two_node_u8?"":" not"); -+ cl_log(two_node_u8? LOG_NOTICE : LOG_INFO, -+ "Corosync is%s in 2Node-mode", two_node_u8?"":" not"); - two_node = two_node_u8; - } else { -- cl_log(LOG_NOTICE, "quorum.two_node present in cmap\n"); -+ cl_log(LOG_INFO, "quorum.two_node not present in cmap\n"); - } - return TRUE; - -@@ -264,7 +265,7 @@ sbd_membership_connect(void) - { - bool connected = false; - -- cl_log(LOG_NOTICE, "Attempting cluster connection"); -+ cl_log(LOG_INFO, "Attempting cluster connection"); - - cluster.destroy = sbd_membership_destroy; - -@@ -308,7 +309,7 @@ sbd_membership_connect(void) - } - } - -- set_servant_health(pcmk_health_transient, LOG_NOTICE, "Connected, waiting for initial membership"); -+ set_servant_health(pcmk_health_transient, LOG_INFO, "Connected, waiting for initial membership"); - notify_parent(); - - notify_timer_cb(NULL); -@@ -530,7 +531,7 @@ servant_cluster(const char *diskname, int mode, const void* argp) - enum cluster_type_e cluster_stack = get_cluster_type(); - - crm_system_name = strdup("sbd:cluster"); -- cl_log(LOG_INFO, "Monitoring %s cluster health", name_for_cluster_type(cluster_stack)); -+ cl_log(LOG_NOTICE, "Monitoring %s cluster health", name_for_cluster_type(cluster_stack)); - set_proc_title("sbd: watcher: Cluster"); - - sbd_membership_connect(); -diff --git a/src/sbd-common.c b/src/sbd-common.c -index f22c4f2..0ce6478 100644 ---- a/src/sbd-common.c -+++ b/src/sbd-common.c -@@ -893,7 +893,7 @@ notify_parent(void) - case pcmk_health_pending: - case pcmk_health_shutdown: - case pcmk_health_transient: -- DBGLOG(LOG_INFO, "Not notifying parent: state transient (%d)", servant_health); -+ DBGLOG(LOG_DEBUG, "Not notifying parent: state transient (%d)", servant_health); - break; - - case pcmk_health_unknown: -@@ -904,7 +904,7 @@ notify_parent(void) - break; - - case pcmk_health_online: -- DBGLOG(LOG_INFO, "Notifying parent: healthy"); -+ DBGLOG(LOG_DEBUG, "Notifying parent: healthy"); - sigqueue(ppid, SIG_LIVENESS, signal_value); - break; - -diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c -index 237bf43..90c7d26 100644 ---- a/src/sbd-inquisitor.c -+++ b/src/sbd-inquisitor.c -@@ -64,7 +64,7 @@ void recruit_servant(const char *devname, pid_t pid) - - servant_count++; - if(sbd_is_disk(newbie)) { -- cl_log(LOG_NOTICE, "Monitoring %s", devname); -+ cl_log(LOG_INFO, "Monitoring %s", devname); - disk_count++; - } else { - newbie->outdated = 1; -@@ -565,7 +565,7 @@ void inquisitor_child(void) - if(cluster_alive(true)) { - /* We LIVE! */ - if(cluster_appeared == false) { -- cl_log(LOG_NOTICE, "Active cluster detected"); -+ cl_log(LOG_INFO, "Active cluster detected"); - } - tickle = 1; - can_detach = 1; -@@ -574,7 +574,7 @@ void inquisitor_child(void) - } else if(cluster_alive(false)) { - if(!decoupled) { - /* On the way up, detach and arm the watchdog */ -- cl_log(LOG_NOTICE, "Partial cluster detected, detaching"); -+ cl_log(LOG_INFO, "Partial cluster detected, detaching"); - } - - can_detach = 1; -diff --git a/src/sbd-md.c b/src/sbd-md.c -index 6a964dd..6f152c4 100644 ---- a/src/sbd-md.c -+++ b/src/sbd-md.c -@@ -1097,7 +1097,7 @@ int servant(const char *diskname, int mode, const void* argp) - exit(EXIT_MD_IO_FAIL); - } - -- DBGLOG(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname); -+ cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname); - if (s_header->minor_version == 0) { - set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox); - } else { -@@ -1180,7 +1180,7 @@ int servant(const char *diskname, int mode, const void* argp) - } - - if (s_mbox->cmd > 0) { -- cl_log(LOG_INFO, -+ cl_log(LOG_NOTICE, - "Received command %s from %s on disk %s", - char2cmd(s_mbox->cmd), s_mbox->from, diskname); - -@@ -1222,7 +1222,7 @@ int servant(const char *diskname, int mode, const void* argp) - (int)latency, (int)timeout_watchdog_warn, - diskname); - } else if (debug) { -- DBGLOG(LOG_INFO, "Latency: %d on disk %s", (int)latency, -+ DBGLOG(LOG_DEBUG, "Latency: %d on disk %s", (int)latency, - diskname); - } - } -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index 2f06109..a435d01 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -416,7 +416,7 @@ servant_pcmk(const char *diskname, int mode, const void* argp) - int exit_code = 0; - - crm_system_name = strdup("sbd:pcmk"); -- cl_log(LOG_INFO, "Monitoring Pacemaker health"); -+ cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); - set_proc_title("sbd: watcher: Pacemaker"); - setenv("PCMK_watchdog", "true", 1); - --- -1.8.3.1 - diff --git a/SOURCES/0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch b/SOURCES/0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch new file mode 100644 index 0000000..9eae5d6 --- /dev/null +++ b/SOURCES/0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch @@ -0,0 +1,36 @@ +From 5c80753afb4abc2b5b024f4a5f2fc78669bda70b Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:39:12 +0200 +Subject: [PATCH] Sanity: sbd-inquisitor: free timeout action on bail out + +--- + src/sbd-inquisitor.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 77c6e4f..715e978 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -1088,7 +1088,8 @@ int main(int argc, char **argv, char **envp) + break; + case 'h': + usage(); +- return (0); ++ goto out; ++ break; + default: + exit_status = -2; + goto out; +@@ -1241,6 +1242,9 @@ int main(int argc, char **argv, char **envp) + } + + out: ++ if (timeout_action) { ++ free(timeout_action); ++ } + if (exit_status < 0) { + if (exit_status == -2) { + usage(); +-- +1.8.3.1 + diff --git a/SOURCES/0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch b/SOURCES/0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch deleted file mode 100644 index e3a2b62..0000000 --- a/SOURCES/0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 13295dec0f567d6795522241fff6817a68b02033 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 2 May 2018 20:07:12 +0200 -Subject: [PATCH] Refactor: sbd-cluster: let scan do the job of proc-parsing - -Now it would as well parse names containing spaces properly. ---- - src/sbd-cluster.c | 9 ++++----- - 1 file changed, 4 insertions(+), 5 deletions(-) - -diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c -index 8512f23..f5e9ff0 100644 ---- a/src/sbd-cluster.c -+++ b/src/sbd-cluster.c -@@ -338,7 +338,7 @@ sbd_membership_destroy(gpointer user_data) - * \brief Get process ID and name associated with a /proc directory entry - * - * \param[in] entry Directory entry (must be result of readdir() on /proc) -- * \param[out] name If not NULL, a char[64] to hold the process name -+ * \param[out] name If not NULL, a char[16] to hold the process name - * \param[out] pid If not NULL, will be set to process ID of entry - * - * \return 0 on success, -1 if entry is not for a process or info not found -@@ -353,7 +353,7 @@ sbd_procfs_process_info(struct dirent *entry, char *name, int *pid) - int fd, local_pid; - FILE *file; - struct stat statbuf; -- char key[16] = { 0 }, procpath[128] = { 0 }; -+ char procpath[128] = { 0 }; - - /* We're only interested in entries whose name is a PID, - * so skip anything non-numeric or that is too long. -@@ -396,8 +396,7 @@ sbd_procfs_process_info(struct dirent *entry, char *name, int *pid) - if (!file) { - return -1; - } -- if ((fscanf(file, "%15s%63s", key, name) != 2) -- || safe_str_neq(key, "Name:")) { -+ if (fscanf(file, "Name:\t%15[a-zA-Z0-9 _-]", name) != 1) { - fclose(file); - return -1; - } -@@ -484,7 +483,7 @@ static long unsigned int - find_pacemaker_remote(void) - { - DIR *dp; -- char entry_name[64]; -+ char entry_name[16]; - struct dirent *entry; - - dp = opendir("/proc"); --- -1.8.3.1 - diff --git a/SOURCES/0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch b/SOURCES/0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch new file mode 100644 index 0000000..cc8c4ee --- /dev/null +++ b/SOURCES/0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch @@ -0,0 +1,29 @@ +From f6af36a0fb05b5a37b3dfb153677e28ca5cb3fd8 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:37:42 +0200 +Subject: [PATCH] Sanity: sbd-md: prevent unrealistic overflow on sector io + calc + +--- + src/sbd-md.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/sbd-md.c b/src/sbd-md.c +index 60a1873..f437c41 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -162,9 +162,9 @@ sector_io(struct sbd_context *st, int sector, void *data, int rw) + + memset(&st->io, 0, sizeof(struct iocb)); + if (rw) { +- io_prep_pwrite(&st->io, st->devfd, data, sector_size, sector_size * sector); ++ io_prep_pwrite(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector); + } else { +- io_prep_pread(&st->io, st->devfd, data, sector_size, sector_size * sector); ++ io_prep_pread(&st->io, st->devfd, data, sector_size, (long long) sector_size * sector); + } + + if (io_submit(st->ioctx, 1, ios) != 1) { +-- +1.8.3.1 + diff --git a/SOURCES/0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch b/SOURCES/0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch deleted file mode 100644 index 44a35aa..0000000 --- a/SOURCES/0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch +++ /dev/null @@ -1,61 +0,0 @@ -From a6acd38756fc7f93afcf5c08b8cdf139a3e354e7 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 2 May 2018 13:30:42 +0200 -Subject: [PATCH] Fix: sbd-cluster: search for pacemaker-remoted with - pcmk-2.0.0rc3 - ---- - src/sbd-cluster.c | 16 ++++++++++------ - 1 file changed, 10 insertions(+), 6 deletions(-) - -diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c -index ae4750e..8512f23 100644 ---- a/src/sbd-cluster.c -+++ b/src/sbd-cluster.c -@@ -42,6 +42,13 @@ - //undef SUPPORT_PLUGIN - //define SUPPORT_PLUGIN 1 - -+/* binary for pacemaker-remote has changed with pacemaker 2 */ -+#ifdef CRM_SCORE_INFINITY -+#define PACEMAKER_REMOTE_BINARY "pacemaker-remoted" -+#else -+#define PACEMAKER_REMOTE_BINARY "pacemaker_remoted" -+#endif -+ - static bool remote_node = false; - static pid_t remoted_pid = 0; - static int reconnect_msec = 1000; -@@ -435,7 +442,7 @@ sbd_remote_check(gpointer user_data) - - } else { - int rc = 0; -- char proc_path[PATH_MAX], exe_path[PATH_MAX], expected_path[PATH_MAX]; -+ char proc_path[PATH_MAX], exe_path[PATH_MAX]; - - /* check to make sure pid hasn't been reused by another process */ - snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)remoted_pid); -@@ -447,10 +454,7 @@ sbd_remote_check(gpointer user_data) - } - exe_path[rc] = 0; - -- rc = snprintf(expected_path, sizeof(proc_path), "%s/pacemaker_remoted", SBINDIR); -- expected_path[rc] = 0; -- -- if (strcmp(exe_path, expected_path) == 0) { -+ if (strcmp(exe_path, SBINDIR "/" PACEMAKER_REMOTE_BINARY) == 0) { - cl_log(LOG_DEBUG, "Process %s (%ld) is active", - exe_path, (long)remoted_pid); - running = 1; -@@ -499,7 +503,7 @@ find_pacemaker_remote(void) - - /* entry_name is truncated to 16 characters including the nul terminator */ - cl_log(LOG_DEBUG, "Found %s at %u", entry_name, pid); -- if (strcmp(entry_name, "pacemaker_remot") == 0) { -+ if (strncmp(entry_name, PACEMAKER_REMOTE_BINARY, 15) == 0) { - cl_log(LOG_NOTICE, "Found Pacemaker Remote at PID %u", pid); - remoted_pid = pid; - remote_node = true; --- -1.8.3.1 - diff --git a/SOURCES/0011-Sanity-sbd-md-remove-some-left-over-code.patch b/SOURCES/0011-Sanity-sbd-md-remove-some-left-over-code.patch new file mode 100644 index 0000000..64dd8cb --- /dev/null +++ b/SOURCES/0011-Sanity-sbd-md-remove-some-left-over-code.patch @@ -0,0 +1,35 @@ +From a80fe9392fd910074eccc4733ff2cd3e1625e48e Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 15 Apr 2019 17:36:12 +0200 +Subject: [PATCH] Sanity: sbd-md: remove some left over code + +--- + src/sbd-md.c | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/src/sbd-md.c b/src/sbd-md.c +index c51d381..60a1873 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -373,7 +373,6 @@ init_device(struct sbd_context *st) + struct sector_header_s *s_header; + struct sector_node_s *s_node; + struct sector_mbox_s *s_mbox; +- struct stat s; + char uuid[37]; + int i; + int rc = 0; +@@ -394,10 +393,6 @@ init_device(struct sbd_context *st) + uuid_generate(s_header->uuid); + uuid_unparse_lower(s_header->uuid, uuid); + +- fstat(st->devfd, &s); +- /* printf("st_size = %ld, st_blksize = %ld, st_blocks = %ld\n", +- s.st_size, s.st_blksize, s.st_blocks); */ +- + cl_log(LOG_INFO, "Creating version %d.%d header on device %d (uuid: %s)", + s_header->version, s_header->minor_version, + st->devfd, uuid); +-- +1.8.3.1 + diff --git a/SOURCES/0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch b/SOURCES/0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch deleted file mode 100644 index 0de1f14..0000000 --- a/SOURCES/0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch +++ /dev/null @@ -1,96 +0,0 @@ -From 5d52fa8c3c903df4be0e4e954fbca9b3b15285c6 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Fri, 14 Sep 2018 17:51:50 +0200 -Subject: [PATCH] Fix: sbd-common: don't follow symlinks outside /dev for - watchdog - -This makes it easier to define a SELinux-policy that keeps -avc-log clean on /dev traversal triggered by query-watchdog. ---- - src/sbd-common.c | 42 ++++++++++++++++++++++++++++++++++++++---- - 1 file changed, 38 insertions(+), 4 deletions(-) - -diff --git a/src/sbd-common.c b/src/sbd-common.c -index 0ce6478..fcb7a31 100644 ---- a/src/sbd-common.c -+++ b/src/sbd-common.c -@@ -251,7 +251,8 @@ watchdog_close(bool disarm) - #define MAX_WATCHDOGS 64 - #define SYS_CLASS_WATCHDOG "/sys/class/watchdog" - #define SYS_CHAR_DEV_DIR "/sys/dev/char" --#define WATCHDOG_NODEDIR "/dev" -+#define WATCHDOG_NODEDIR "/dev/" -+#define WATCHDOG_NODEDIR_LEN 5 - - struct watchdog_list_item { - dev_t dev; -@@ -273,7 +274,7 @@ watchdog_populate_list(void) - struct dirent *entry; - char entry_name[280]; - DIR *dp; -- char buf[256] = ""; -+ char buf[280] = ""; - - if (watchdog_list != NULL) { - return; -@@ -313,7 +314,38 @@ watchdog_populate_list(void) - struct stat statbuf; - - snprintf(entry_name, sizeof(entry_name), -- WATCHDOG_NODEDIR "/%s", entry->d_name); -+ WATCHDOG_NODEDIR "%s", entry->d_name); -+ if (entry->d_type == DT_LNK) { -+ int len; -+ -+ /* !realpath(entry_name, buf) unfortunately does a stat on -+ * target so we can't really use it to check if links stay -+ * within /dev without triggering e.g. AVC-logs (with -+ * SELinux policy that just allows stat within /dev). -+ * Without canonicalization that doesn't actually touch the -+ * filesystem easily available introduce some limitations -+ * for simplicity: -+ * - just simple path without '..' -+ * - just one level of symlinks (avoid e.g. loop-checking) -+ */ -+ len = readlink(entry_name, buf, sizeof(buf) - 1); -+ if ((len < 1) || -+ (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) { -+ continue; -+ } -+ buf[len] = '\0'; -+ if (buf[0] != '/') { -+ memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1); -+ memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN); -+ len += WATCHDOG_NODEDIR_LEN; -+ } -+ if (strstr(buf, "/../") || -+ strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN) || -+ lstat(buf, &statbuf) || -+ !S_ISCHR(statbuf.st_mode)) { -+ continue; -+ } -+ } - if(!stat(entry_name, &statbuf) && S_ISCHR(statbuf.st_mode)) { - int i; - -@@ -322,6 +354,7 @@ watchdog_populate_list(void) - int wdfd = watchdog_init_fd(entry_name, -1); - struct watchdog_list_item *wdg = - calloc(1, sizeof(struct watchdog_list_item)); -+ int len; - - wdg->dev = watchdogs[i]; - wdg->dev_node = strdup(entry_name); -@@ -343,7 +376,8 @@ watchdog_populate_list(void) - snprintf(entry_name, sizeof(entry_name), - SYS_CHAR_DEV_DIR "/%d:%d/device/driver", - major(watchdogs[i]), minor(watchdogs[i])); -- if (readlink(entry_name, buf, sizeof(buf)) > 0) { -+ if ((len = readlink(entry_name, buf, sizeof(buf) - 1)) > 0) { -+ buf[len] = '\0'; - wdg->dev_driver = strdup(basename(buf)); - } else if ((wdg->dev_ident) && - (strcmp(wdg->dev_ident, --- -1.8.3.1 - diff --git a/SOURCES/0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch b/SOURCES/0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch new file mode 100644 index 0000000..41cf428 --- /dev/null +++ b/SOURCES/0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch @@ -0,0 +1,215 @@ +From eaeed6cca46a0223617ead834aaa576dd5ad07ff Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 31 May 2019 16:11:16 +0200 +Subject: [PATCH] Fix: sbd-common: query rt-budget > 0 otherwise try moving to + root-slice + +--- + src/sbd-common.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++ + src/sbd-inquisitor.c | 15 +++++++ + src/sbd.h | 2 + + src/sbd.sysconfig | 14 +++++++ + 4 files changed, 141 insertions(+) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index 873a76e..ebfdaa3 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -662,6 +662,112 @@ static void sbd_memlock(int stackgrowK, int heapgrowK) + #endif + } + ++static int get_realtime_budget(void) ++{ ++ FILE *f; ++ char fname[PATH_MAX]; ++ int res = -1, lnum = 0; ++ char *cgroup = NULL, *namespecs = NULL; ++ ++ snprintf(fname, PATH_MAX, "/proc/%jd/cgroup", (intmax_t)getpid()); ++ f = fopen(fname, "rt"); ++ if (f == NULL) { ++ cl_log(LOG_WARNING, "Can't open cgroup file for pid=%jd", ++ (intmax_t)getpid()); ++ goto exit_res; ++ } ++ while( fscanf(f, "%d:%m[^:]:%m[^\n]", &lnum, &namespecs, &cgroup) !=EOF ) { ++ if (namespecs && strstr(namespecs, "cpuacct")) { ++ free(namespecs); ++ break; ++ } ++ if (cgroup) { ++ free(cgroup); ++ cgroup = NULL; ++ } ++ if (namespecs) { ++ free(namespecs); ++ namespecs = NULL; ++ } ++ } ++ fclose(f); ++ if (cgroup == NULL) { ++ cl_log(LOG_WARNING, "Failed getting cgroup for pid=%jd", ++ (intmax_t)getpid()); ++ goto exit_res; ++ } ++ snprintf(fname, PATH_MAX, "/sys/fs/cgroup/cpu%s/cpu.rt_runtime_us", ++ cgroup); ++ f = fopen(fname, "rt"); ++ if (f == NULL) { ++ cl_log(LOG_WARNING, "cpu.rt_runtime_us existed for root-slice but " ++ "doesn't for '%s'", cgroup); ++ goto exit_res; ++ } ++ if (fscanf(f, "%d", &res) != 1) { ++ cl_log(LOG_WARNING, "failed reading rt-budget from %s", fname); ++ } else { ++ cl_log(LOG_INFO, "slice='%s' has rt-budget=%d", cgroup, res); ++ } ++ fclose(f); ++ ++exit_res: ++ if (cgroup) { ++ free(cgroup); ++ } ++ return res; ++} ++ ++/* stolen from corosync */ ++static int sbd_move_to_root_cgroup(bool enforce_root_cgroup) { ++ FILE *f; ++ int res = -1; ++ ++ /* ++ * /sys/fs/cgroup is hardcoded, because most of Linux distributions are now ++ * using systemd and systemd uses hardcoded path of cgroup mount point. ++ * ++ * This feature is expected to be removed as soon as systemd gets support ++ * for managing RT configuration. ++ */ ++ f = fopen("/sys/fs/cgroup/cpu/cpu.rt_runtime_us", "rt"); ++ if (f == NULL) { ++ cl_log(LOG_DEBUG, "cpu.rt_runtime_us doesn't exist -> " ++ "system without cgroup or with disabled CONFIG_RT_GROUP_SCHED"); ++ res = 0; ++ goto exit_res; ++ } ++ fclose(f); ++ ++ if ((!enforce_root_cgroup) && (get_realtime_budget() > 0)) { ++ cl_log(LOG_DEBUG, "looks as if we have rt-budget in the slice we are " ++ "-> skip moving to root-slice"); ++ res = 0; ++ goto exit_res; ++ } ++ ++ f = fopen("/sys/fs/cgroup/cpu/tasks", "w"); ++ if (f == NULL) { ++ cl_log(LOG_WARNING, "Can't open cgroups tasks file for writing"); ++ ++ goto exit_res; ++ } ++ ++ if (fprintf(f, "%jd\n", (intmax_t)getpid()) <= 0) { ++ cl_log(LOG_WARNING, "Can't write sbd pid into cgroups tasks file"); ++ goto close_and_exit_res; ++ } ++ ++close_and_exit_res: ++ if (fclose(f) != 0) { ++ cl_log(LOG_WARNING, "Can't close cgroups tasks file"); ++ goto exit_res; ++ } ++ ++exit_res: ++ return (res); ++} ++ + void + sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) + { +@@ -670,6 +776,10 @@ sbd_make_realtime(int priority, int stackgrowK, int heapgrowK) + } + + #ifdef SCHED_RR ++ if (move_to_root_cgroup) { ++ sbd_move_to_root_cgroup(enforce_moving_to_root_cgroup); ++ } ++ + { + int pcurrent = 0; + int pmin = sched_get_priority_min(SCHED_RR); +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index abde4e5..cef5cc7 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -33,6 +33,8 @@ int start_mode = 0; + char* pidfile = NULL; + bool do_flush = true; + char timeout_sysrq_char = 'b'; ++bool move_to_root_cgroup = true; ++bool enforce_moving_to_root_cgroup = false; + + int parse_device_line(const char *line); + +@@ -965,6 +967,19 @@ int main(int argc, char **argv, char **envp) + timeout_action = strdup(value); + } + ++ value = getenv("SBD_MOVE_TO_ROOT_CGROUP"); ++ if(value) { ++ move_to_root_cgroup = crm_is_true(value); ++ ++ if (move_to_root_cgroup) { ++ enforce_moving_to_root_cgroup = true; ++ } else { ++ if (strcmp(value, "auto") == 0) { ++ move_to_root_cgroup = true; ++ } ++ } ++ } ++ + while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { + switch (c) { + case 'D': +diff --git a/src/sbd.h b/src/sbd.h +index 3b05a11..ac30ec7 100644 +--- a/src/sbd.h ++++ b/src/sbd.h +@@ -159,6 +159,8 @@ extern bool watchdogdev_is_default; + extern char* local_uname; + extern bool do_flush; + extern char timeout_sysrq_char; ++extern bool move_to_root_cgroup; ++extern bool enforce_moving_to_root_cgroup; + + /* Global, non-tunable variables: */ + extern int sector_size; +diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig +index f163f21..e1a60ed 100644 +--- a/src/sbd.sysconfig ++++ b/src/sbd.sysconfig +@@ -91,6 +91,20 @@ SBD_WATCHDOG_TIMEOUT=5 + # + SBD_TIMEOUT_ACTION=flush,reboot + ++## Type: yesno / auto ++## Default: auto ++# ++# If CPUAccounting is enabled default is not to assign any RT-budget ++# to the system.slice which prevents sbd from running RR-scheduled. ++# ++# One way to escape that issue is to move sbd-processes from the ++# slice they were originally started to root-slice. ++# Of course starting sbd in a certain slice might be intentional. ++# Thus in auto-mode sbd will check if the slice has RT-budget assigned. ++# If that is the case sbd will stay in that slice while it will ++# be moved to root-slice otherwise. ++SBD_MOVE_TO_ROOT_CGROUP=auto ++ + ## Type: string + ## Default: "" + # +-- +1.8.3.1 + diff --git a/SOURCES/0013-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch b/SOURCES/0013-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch new file mode 100644 index 0000000..8c92df8 --- /dev/null +++ b/SOURCES/0013-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch @@ -0,0 +1,60 @@ +From 824fe834c67fb7bae7feb87607381f9fa8fa2945 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 7 Jun 2019 19:09:06 +0200 +Subject: [PATCH] Fix: sbd-pacemaker: assume graceful exit if leftovers are + unmanged + +--- + src/sbd-pacemaker.c | 32 +++++++++++++++++++++++++++++++- + 1 file changed, 31 insertions(+), 1 deletion(-) + +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index 9a8b95f..2b35ff6 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -333,11 +333,41 @@ compute_status(pe_working_set_t * data_set) + } + } + ++ /* If we are in shutdown-state once this will go on till the end. ++ * If we've on top reached a state of 0 locally running resources ++ * we can assume a clean shutdown. ++ * Tricky are the situations where the node is in maintenance-mode ++ * or resources are unmanaged. So if the node is in maintenance or ++ * all left-over running resources are unmanaged we assume intention. ++ */ + if (node->details->shutdown) { + pcmk_shutdown = 1; + } +- if (pcmk_shutdown && !(node->details->running_rsc)) { ++ if (pcmk_shutdown) ++ { + pcmk_clean_shutdown = 1; ++ if (!(node->details->maintenance)) { ++ GListPtr iter; ++ ++ for (iter = node->details->running_rsc; ++ iter != NULL; iter = iter->next) { ++ resource_t *rsc = (resource_t *) iter->data; ++ ++ ++ if (is_set(rsc->flags, pe_rsc_managed)) { ++ pcmk_clean_shutdown = 0; ++ crm_debug("not clean as %s managed and still running", ++ rsc->id); ++ break; ++ } ++ } ++ if (pcmk_clean_shutdown) { ++ crm_debug("pcmk_clean_shutdown because " ++ "all managed resources down"); ++ } ++ } else { ++ crm_debug("pcmk_clean_shutdown because node is in maintenance"); ++ } + } + notify_parent(); + return; +-- +1.8.3.1 + diff --git a/SOURCES/0013-Refactor-sbd-common-separate-assignment-and-comparis.patch b/SOURCES/0013-Refactor-sbd-common-separate-assignment-and-comparis.patch deleted file mode 100644 index 2108e37..0000000 --- a/SOURCES/0013-Refactor-sbd-common-separate-assignment-and-comparis.patch +++ /dev/null @@ -1,33 +0,0 @@ -From e13297f45b4c5868800b1d3fc359bfd0723fcc5f Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Mon, 17 Sep 2018 23:13:37 +0200 -Subject: [PATCH] Refactor: sbd-common: separate assignment and comparison - ---- - src/sbd-common.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - -diff --git a/src/sbd-common.c b/src/sbd-common.c -index fcb7a31..679f946 100644 ---- a/src/sbd-common.c -+++ b/src/sbd-common.c -@@ -376,12 +376,13 @@ watchdog_populate_list(void) - snprintf(entry_name, sizeof(entry_name), - SYS_CHAR_DEV_DIR "/%d:%d/device/driver", - major(watchdogs[i]), minor(watchdogs[i])); -- if ((len = readlink(entry_name, buf, sizeof(buf) - 1)) > 0) { -+ len = readlink(entry_name, buf, sizeof(buf) - 1); -+ if (len > 0) { - buf[len] = '\0'; - wdg->dev_driver = strdup(basename(buf)); - } else if ((wdg->dev_ident) && -- (strcmp(wdg->dev_ident, -- "Software Watchdog") == 0)) { -+ (strcmp(wdg->dev_ident, -+ "Software Watchdog") == 0)) { - wdg->dev_driver = strdup("softdog"); - } - break; --- -1.8.3.1 - diff --git a/SOURCES/0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch b/SOURCES/0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch new file mode 100644 index 0000000..9f4de96 --- /dev/null +++ b/SOURCES/0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch @@ -0,0 +1,123 @@ +From 1387ed890e3a9e246e9b9f780b2a7cb5379459ab Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 5 Jun 2019 11:32:49 +0200 +Subject: [PATCH] Fix: sbd-cluster: periodically check corosync-daemon liveness + +using votequorum_getinfo. +--- + configure.ac | 12 +++++++++++- + src/sbd-cluster.c | 36 ++++++++++++++++++++++++++++++++++-- + 2 files changed, 45 insertions(+), 3 deletions(-) + +diff --git a/configure.ac b/configure.ac +index fac26a8..c44e747 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -33,6 +33,7 @@ PKG_CHECK_MODULES(glib, [glib-2.0]) + dnl PKG_CHECK_MODULES(libcoroipcc, [libcoroipcc]) + + PKG_CHECK_MODULES(cmap, [libcmap], HAVE_cmap=1, HAVE_cmap=0) ++PKG_CHECK_MODULES(votequorum, [libvotequorum], HAVE_votequorum=1, HAVE_votequorum=0) + + dnl pacemaker > 1.1.8 + PKG_CHECK_MODULES(pacemaker, [pacemaker, pacemaker-cib], HAVE_pacemaker=1, HAVE_pacemaker=0) +@@ -49,7 +50,12 @@ elif test $HAVE_pacemaker = 1; then + if test $HAVE_cmap = 0; then + AC_MSG_NOTICE(No package 'cmap' found) + else +- CPPFLAGS="$CPPFLAGS $cmap_CFLAGS" ++ CPPFLAGS="$CPPFLAGS $cmap_CFLAGS" ++ fi ++ if test $HAVE_votequorum = 0; then ++ AC_MSG_NOTICE(No library 'votequorum' found) ++ else ++ CPPFLAGS="$CPPFLAGS $votequorum_CFLAGS" + fi + fi + +@@ -66,6 +72,7 @@ AC_CHECK_LIB(pe_rules, test_rule, , missing="yes") + AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes") + AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes") + AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0) ++AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0) + + dnl pacemaker >= 1.1.8 + AC_CHECK_HEADERS(pacemaker/crm/cluster.h) +@@ -107,6 +114,9 @@ fi + AC_DEFINE_UNQUOTED(CHECK_TWO_NODE, $HAVE_cmap, Turn on checking for 2-node cluster) + AM_CONDITIONAL(CHECK_TWO_NODE, test "$HAVE_cmap" = "1") + ++AC_DEFINE_UNQUOTED(CHECK_VOTEQUORUM_HANDLE, $HAVE_votequorum, Turn on periodic checking of votequorum-handle) ++AM_CONDITIONAL(CHECK_VOTEQUORUM_HANDLE, test "$HAVE_votequorum" = "1") ++ + CONFIGDIR="" + AC_ARG_WITH(configdir, + [ --with-configdir=DIR +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index 541212f..9fb6224 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -80,6 +80,12 @@ sbd_plugin_membership_dispatch(cpg_handle_t handle, + + #if SUPPORT_COROSYNC + ++#if CHECK_VOTEQUORUM_HANDLE ++#include ++ ++static votequorum_handle_t votequorum_handle = 0; ++#endif ++ + static bool two_node = false; + static bool ever_seen_both = false; + static int cpg_membership_entries = -1; +@@ -261,12 +267,32 @@ notify_timer_cb(gpointer data) + + #endif + case pcmk_cluster_corosync: ++ do { ++#if SUPPORT_COROSYNC && CHECK_VOTEQUORUM_HANDLE ++ struct votequorum_info info; ++ ++ if (votequorum_getinfo(votequorum_handle, 0, &info) != CS_OK) { ++ ++ votequorum_finalize(votequorum_handle); ++ if (votequorum_initialize(&votequorum_handle, NULL) != CS_OK) { ++ votequorum_handle = 0; ++ break; ++ } ++ if (votequorum_getinfo(votequorum_handle, 0, &info) != CS_OK) { ++ break; ++ } ++ } ++#endif ++ notify_parent(); ++ } while (0); ++ break; ++ + #if HAVE_DECL_PCMK_CLUSTER_CMAN + case pcmk_cluster_cman: +-#endif +- /* TODO - Make a CPG call and only call notify_parent() when we get a reply */ ++ + notify_parent(); + break; ++#endif + + default: + break; +@@ -533,6 +559,12 @@ find_pacemaker_remote(void) + static void + clean_up(int rc) + { ++#if CHECK_VOTEQUORUM_HANDLE ++ votequorum_finalize(votequorum_handle); ++ votequorum_handle = 0; /* there isn't really an invalid handle value ++ * just to be back where we started ++ */ ++#endif + return; + } + +-- +1.8.3.1 + diff --git a/SOURCES/0014-Fix-sbd-common-avoid-statting-potential-links.patch b/SOURCES/0014-Fix-sbd-common-avoid-statting-potential-links.patch deleted file mode 100644 index 1e61c36..0000000 --- a/SOURCES/0014-Fix-sbd-common-avoid-statting-potential-links.patch +++ /dev/null @@ -1,214 +0,0 @@ -From 5b4c866f7c0b4ef8061e131a1ee0d1c608d35054 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 19 Sep 2018 16:15:27 +0200 -Subject: [PATCH] Fix: sbd-common: avoid statting potential links - -These potential links might be anything and statting - if just -allowed to stat chr-nodes (e.g. SELinux) - them would lead -to avc-logs in the SELinux case. ---- - src/sbd-common.c | 133 +++++++++++++++++++++++++++++++++++++++---------------- - 1 file changed, 96 insertions(+), 37 deletions(-) - -diff --git a/src/sbd-common.c b/src/sbd-common.c -index 679f946..cc84cd0 100644 ---- a/src/sbd-common.c -+++ b/src/sbd-common.c -@@ -262,6 +262,12 @@ struct watchdog_list_item { - struct watchdog_list_item *next; - }; - -+struct link_list_item { -+ char *dev_node; -+ char *link_name; -+ struct link_list_item *next; -+}; -+ - static struct watchdog_list_item *watchdog_list = NULL; - static int watchdog_list_items = 0; - -@@ -275,6 +281,7 @@ watchdog_populate_list(void) - char entry_name[280]; - DIR *dp; - char buf[280] = ""; -+ struct link_list_item *link_list = NULL; - - if (watchdog_list != NULL) { - return; -@@ -288,7 +295,7 @@ watchdog_populate_list(void) - FILE *file; - - snprintf(entry_name, sizeof(entry_name), -- SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name); -+ SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name); - file = fopen(entry_name, "r"); - if (file) { - int major, minor; -@@ -309,43 +316,59 @@ watchdog_populate_list(void) - /* search for watchdog nodes in /dev */ - dp = opendir(WATCHDOG_NODEDIR); - if (dp) { -+ /* first go for links and memorize them */ - while ((entry = readdir(dp))) { -- if ((entry->d_type == DT_CHR) || (entry->d_type == DT_LNK)) { -- struct stat statbuf; -+ if (entry->d_type == DT_LNK) { -+ int len; - - snprintf(entry_name, sizeof(entry_name), -- WATCHDOG_NODEDIR "%s", entry->d_name); -- if (entry->d_type == DT_LNK) { -- int len; -- -- /* !realpath(entry_name, buf) unfortunately does a stat on -- * target so we can't really use it to check if links stay -- * within /dev without triggering e.g. AVC-logs (with -- * SELinux policy that just allows stat within /dev). -- * Without canonicalization that doesn't actually touch the -- * filesystem easily available introduce some limitations -- * for simplicity: -- * - just simple path without '..' -- * - just one level of symlinks (avoid e.g. loop-checking) -- */ -- len = readlink(entry_name, buf, sizeof(buf) - 1); -- if ((len < 1) || -- (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) { -- continue; -- } -- buf[len] = '\0'; -- if (buf[0] != '/') { -- memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1); -- memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN); -- len += WATCHDOG_NODEDIR_LEN; -- } -- if (strstr(buf, "/../") || -- strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN) || -- lstat(buf, &statbuf) || -- !S_ISCHR(statbuf.st_mode)) { -- continue; -- } -+ WATCHDOG_NODEDIR "%s", entry->d_name); -+ -+ /* !realpath(entry_name, buf) unfortunately does a stat on -+ * target so we can't really use it to check if links stay -+ * within /dev without triggering e.g. AVC-logs (with -+ * SELinux policy that just allows stat within /dev). -+ * Without canonicalization that doesn't actually touch the -+ * filesystem easily available introduce some limitations -+ * for simplicity: -+ * - just simple path without '..' -+ * - just one level of symlinks (avoid e.g. loop-checking) -+ */ -+ len = readlink(entry_name, buf, sizeof(buf) - 1); -+ if ((len < 1) || -+ (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) { -+ continue; -+ } -+ buf[len] = '\0'; -+ if (buf[0] != '/') { -+ memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1); -+ memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN); -+ len += WATCHDOG_NODEDIR_LEN; -+ } -+ if (strstr(buf, "/../") || -+ strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN)) { -+ continue; -+ } else { -+ /* just memorize to avoid statting the target - SELinux */ -+ struct link_list_item *lli = -+ calloc(1, sizeof(struct link_list_item)); -+ -+ lli->dev_node = strdup(buf); -+ lli->link_name = strdup(entry_name); -+ lli->next = link_list; -+ link_list = lli; - } -+ } -+ } -+ -+ rewinddir(dp); -+ -+ while ((entry = readdir(dp))) { -+ if (entry->d_type == DT_CHR) { -+ struct stat statbuf; -+ -+ snprintf(entry_name, sizeof(entry_name), -+ WATCHDOG_NODEDIR "%s", entry->d_name); - if(!stat(entry_name, &statbuf) && S_ISCHR(statbuf.st_mode)) { - int i; - -@@ -353,8 +376,9 @@ watchdog_populate_list(void) - if (statbuf.st_rdev == watchdogs[i]) { - int wdfd = watchdog_init_fd(entry_name, -1); - struct watchdog_list_item *wdg = -- calloc(1, sizeof(struct watchdog_list_item)); -+ calloc(1, sizeof(struct watchdog_list_item)); - int len; -+ struct link_list_item *tmp_list = NULL; - - wdg->dev = watchdogs[i]; - wdg->dev_node = strdup(entry_name); -@@ -374,8 +398,8 @@ watchdog_populate_list(void) - } - - snprintf(entry_name, sizeof(entry_name), -- SYS_CHAR_DEV_DIR "/%d:%d/device/driver", -- major(watchdogs[i]), minor(watchdogs[i])); -+ SYS_CHAR_DEV_DIR "/%d:%d/device/driver", -+ major(watchdogs[i]), minor(watchdogs[i])); - len = readlink(entry_name, buf, sizeof(buf) - 1); - if (len > 0) { - buf[len] = '\0'; -@@ -385,14 +409,49 @@ watchdog_populate_list(void) - "Software Watchdog") == 0)) { - wdg->dev_driver = strdup("softdog"); - } -+ -+ /* create dupes if we have memorized links -+ * to this node -+ */ -+ for (tmp_list = link_list; tmp_list; -+ tmp_list = tmp_list->next) { -+ if (!strcmp(tmp_list->dev_node, -+ wdg->dev_node)) { -+ struct watchdog_list_item *dupe_wdg = -+ calloc(1, sizeof(struct watchdog_list_item)); -+ -+ /* as long as we never purge watchdog_list -+ * there is no need to dupe strings -+ */ -+ *dupe_wdg = *wdg; -+ dupe_wdg->dev_node = strdup(tmp_list->link_name); -+ dupe_wdg->next = watchdog_list; -+ watchdog_list = dupe_wdg; -+ watchdog_list_items++; -+ } -+ /* for performance reasons we could remove -+ * the link_list entry -+ */ -+ } - break; - } - } - } - } - } -+ - closedir(dp); - } -+ -+ /* cleanup link list */ -+ while (link_list) { -+ struct link_list_item *tmp_list = link_list; -+ -+ link_list = link_list->next; -+ free(tmp_list->dev_node); -+ free(tmp_list->link_name); -+ free(tmp_list); -+ } - } - - int watchdog_info(void) --- -1.8.3.1 - diff --git a/SOURCES/0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch b/SOURCES/0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch deleted file mode 100644 index 602daa1..0000000 --- a/SOURCES/0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch +++ /dev/null @@ -1,139 +0,0 @@ -From a34cafa9d69194e3cbfe3af20ceb2d08848c483c Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Mon, 19 Nov 2018 20:56:35 +0100 -Subject: [PATCH] Refactor: use pacemaker's new pe api with - constructors/destructors - -For backward compatibility add some compatibility code -for if pe_new_working_set isn't available. ---- - configure.ac | 3 +++ - src/sbd-pacemaker.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++------ - 2 files changed, 57 insertions(+), 7 deletions(-) - -diff --git a/configure.ac b/configure.ac -index 1f328c2..1dc273b 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -76,6 +76,9 @@ dnl pacemaker-2.0 removed support for corosync 1 cluster layer - AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,, - [#include ]) - -+dnl check for new pe-API -+AC_CHECK_FUNCS(pe_new_working_set) -+ - if test "$missing" = "yes"; then - AC_MSG_ERROR([Missing required libraries or functions.]) - fi -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index a435d01..aac355a 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -58,6 +58,31 @@ - - #include "sbd.h" - -+#ifndef HAVE_PE_NEW_WORKING_SET -+ -+#define pe_reset_working_set(data_set) cleanup_calculations(data_set) -+ -+static pe_working_set_t * -+pe_new_working_set() -+{ -+ pe_working_set_t *data_set = calloc(1, sizeof(pe_working_set_t)); -+ if (data_set != NULL) { -+ set_working_set_defaults(data_set); -+ } -+ return data_set; -+} -+ -+static void -+pe_free_working_set(pe_working_set_t *data_set) -+{ -+ if (data_set != NULL) { -+ pe_reset_working_set(data_set); -+ free(data_set); -+ } -+} -+ -+#endif -+ - extern int disk_count; - - static void clean_up(int rc); -@@ -74,6 +99,7 @@ static int cib_connected = 0; - - static cib_t *cib = NULL; - static xmlNode *current_cib = NULL; -+static pe_working_set_t *data_set = NULL; - - static long last_refresh = 0; - -@@ -361,7 +387,6 @@ static gboolean - mon_refresh_state(gpointer user_data) - { - xmlNode *cib_copy = NULL; -- pe_working_set_t data_set; - - if(current_cib == NULL) { - return FALSE; -@@ -382,14 +407,13 @@ mon_refresh_state(gpointer user_data) - - } else { - last_refresh = time(NULL); -- set_working_set_defaults(&data_set); -- data_set.input = cib_copy; -- data_set.flags |= pe_flag_have_stonith_resource; -- cluster_status(&data_set); -+ data_set->input = cib_copy; -+ data_set->flags |= pe_flag_have_stonith_resource; -+ cluster_status(data_set); - -- compute_status(&data_set); -+ compute_status(data_set); - -- cleanup_calculations(&data_set); -+ pe_reset_working_set(data_set); - } - - return FALSE; -@@ -398,6 +422,21 @@ mon_refresh_state(gpointer user_data) - static void - clean_up(int rc) - { -+ if (timer_id_reconnect > 0) { -+ g_source_remove(timer_id_reconnect); -+ timer_id_reconnect = 0; -+ } -+ -+ if (timer_id_notify > 0) { -+ g_source_remove(timer_id_notify); -+ timer_id_notify = 0; -+ } -+ -+ if (data_set != NULL) { -+ pe_free_working_set(data_set); -+ data_set = NULL; -+ } -+ - if (cib != NULL) { - cib->cmds->signoff(cib); - cib_delete(cib); -@@ -425,6 +464,14 @@ servant_pcmk(const char *diskname, int mode, const void* argp) - set_crm_log_level(LOG_CRIT); - } - -+ -+ if (data_set == NULL) { -+ data_set = pe_new_working_set(); -+ } -+ if (data_set == NULL) { -+ return -1; -+ } -+ - if (current_cib == NULL) { - cib = cib_new(); - --- -1.8.3.1 - diff --git a/SOURCES/0015-build-say-library-when-missing-cmap-not-package-to-a.patch b/SOURCES/0015-build-say-library-when-missing-cmap-not-package-to-a.patch new file mode 100644 index 0000000..231c77c --- /dev/null +++ b/SOURCES/0015-build-say-library-when-missing-cmap-not-package-to-a.patch @@ -0,0 +1,26 @@ +From 0de14256fc873aee735117955662503b773bf71c Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 11 Jun 2019 08:05:33 +0200 +Subject: [PATCH] build: say library when missing cmap not package to avoid + confusion + +--- + configure.ac | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/configure.ac b/configure.ac +index c44e747..1c55094 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -48,7 +48,7 @@ if test $HAVE_pacemaker = 0 -a $HAVE_pcmk = 0; then + elif test $HAVE_pacemaker = 1; then + CPPFLAGS="$CPPFLAGS $glib_CFLAGS $pacemaker_CFLAGS" + if test $HAVE_cmap = 0; then +- AC_MSG_NOTICE(No package 'cmap' found) ++ AC_MSG_NOTICE(No library 'cmap' found) + else + CPPFLAGS="$CPPFLAGS $cmap_CFLAGS" + fi +-- +1.8.3.1 + diff --git a/SOURCES/0016-Feature-make-timeout-action-executed-by-sbd-configur.patch b/SOURCES/0016-Feature-make-timeout-action-executed-by-sbd-configur.patch deleted file mode 100644 index cdffdd4..0000000 --- a/SOURCES/0016-Feature-make-timeout-action-executed-by-sbd-configur.patch +++ /dev/null @@ -1,294 +0,0 @@ -From b64c30af56e7eabd63ce1db25bc5ed9b953485af Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Fri, 23 Nov 2018 14:09:22 +0100 -Subject: [PATCH] Feature: make timeout-action executed by sbd configurable - ---- - man/sbd.8.pod | 19 +++++++++++++++++++ - src/sbd-common.c | 22 ++++++++++++++++------ - src/sbd-inquisitor.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++--- - src/sbd-md.c | 2 +- - src/sbd.h | 3 +++ - src/sbd.sysconfig | 18 ++++++++++++++++++ - 6 files changed, 107 insertions(+), 10 deletions(-) - -diff --git a/man/sbd.8.pod b/man/sbd.8.pod -index ffd01c2..dbb3855 100644 ---- a/man/sbd.8.pod -+++ b/man/sbd.8.pod -@@ -333,6 +333,23 @@ prevent a successful crashdump from ever being written. - - Defaults to 240 seconds. Set to zero to disable. - -+=item B<-r> I -+ -+Actions to be executed when the watchers don't timely report to the sbd -+master process or one of the watchers detects that the master process -+has died. -+ -+Set timeout-action to comma-separated combination of -+noflush|flush plus reboot|crashdump|off. -+If just one of both is given the other stays at the default. -+ -+This doesn't affect actions like off, crashdump, reboot explicitly -+triggered via message slots. -+And it does as well not configure the action a watchdog would -+trigger should it run off (there is no generic interface). -+ -+Defaults to flush,reboot. -+ - =back - - =head2 allocate -@@ -552,6 +569,8 @@ options to pass to the daemon: - - C will fail to start if no C is specified. See the - installed template for more options that can be configured here. -+In general configuration done via parameters takes precedence over -+the configuration from the configuration file. - - =head2 Testing the sbd installation - -diff --git a/src/sbd-common.c b/src/sbd-common.c -index cc84cd0..0e8be65 100644 ---- a/src/sbd-common.c -+++ b/src/sbd-common.c -@@ -98,6 +98,8 @@ usage(void) - " (default is 1, set to 0 to disable)\n" - "-P Check Pacemaker quorum and node health (optional, watch only)\n" - "-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n" -+"-r Set timeout-action to comma-separated combination of\n" -+" noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n" - "Commands:\n" - #if SUPPORT_SHARED_DISK - "create initialize N slots on - OVERWRITES DEVICE!\n" -@@ -769,7 +771,7 @@ sysrq_trigger(char t) - - - static void --do_exit(char kind) -+do_exit(char kind, bool do_flush) - { - /* TODO: Turn debug_mode into a bit field? Delay + kdump for example */ - const char *reason = NULL; -@@ -814,7 +816,9 @@ do_exit(char kind) - } - - cl_log(LOG_EMERG, "Rebooting system: %s", reason); -- sync(); -+ if (do_flush) { -+ sync(); -+ } - - if(kind == 'c') { - watchdog_close(true); -@@ -834,19 +838,25 @@ do_exit(char kind) - void - do_crashdump(void) - { -- do_exit('c'); -+ do_exit('c', true); - } - - void - do_reset(void) - { -- do_exit('b'); -+ do_exit('b', true); - } - - void - do_off(void) - { -- do_exit('o'); -+ do_exit('o', true); -+} -+ -+void -+do_timeout_action(void) -+{ -+ do_exit(timeout_sysrq_char, do_flush); - } - - /* -@@ -980,7 +990,7 @@ notify_parent(void) - /* Our parent died unexpectedly. Triggering - * self-fence. */ - cl_log(LOG_WARNING, "Our parent is dead."); -- do_reset(); -+ do_timeout_action(); - } - - switch (servant_health) { -diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c -index 9b193d4..8e0bc87 100644 ---- a/src/sbd-inquisitor.c -+++ b/src/sbd-inquisitor.c -@@ -31,6 +31,8 @@ int servant_restart_interval = 5; - int servant_restart_count = 1; - int start_mode = 0; - char* pidfile = NULL; -+bool do_flush = true; -+char timeout_sysrq_char = 'b'; - - int parse_device_line(const char *line); - -@@ -655,7 +657,7 @@ void inquisitor_child(void) - /* At level 2 or above, we do nothing, but expect - * things to eventually return to - * normal. */ -- do_reset(); -+ do_timeout_action(); - } else { - cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!"); - } -@@ -668,7 +670,7 @@ void inquisitor_child(void) - - if (debug_mode && watchdog_use) { - /* In debug mode, trigger a reset before the watchdog can panic the machine */ -- do_reset(); -+ do_timeout_action(); - } - } - -@@ -833,6 +835,7 @@ int main(int argc, char **argv, char **envp) - int qb_facility; - const char *value = NULL; - int start_delay = 0; -+ char *timeout_action = NULL; - - if ((cmdname = strrchr(argv[0], '/')) == NULL) { - cmdname = argv[0]; -@@ -928,7 +931,12 @@ int main(int argc, char **argv, char **envp) - } - cl_log(LOG_DEBUG, "Start delay: %d (%s)", (int)start_delay, value?value:"default"); - -- while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:")) != -1) { -+ value = getenv("SBD_TIMEOUT_ACTION"); -+ if(value) { -+ timeout_action = strdup(value); -+ } -+ -+ while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { - switch (c) { - case 'D': - break; -@@ -1043,6 +1051,12 @@ int main(int argc, char **argv, char **envp) - cl_log(LOG_INFO, "Servant restart count set to %d", - (int)servant_restart_count); - break; -+ case 'r': -+ if (timeout_action) { -+ free(timeout_action); -+ } -+ timeout_action = strdup(optarg); -+ break; - case 'h': - usage(); - return (0); -@@ -1101,6 +1115,39 @@ int main(int argc, char **argv, char **envp) - goto out; - } - -+ if (timeout_action) { -+ char *p[2]; -+ int i; -+ char c; -+ int nrflags = sscanf(timeout_action, "%m[a-z],%m[a-z]%c", &p[0], &p[1], &c); -+ bool parse_error = (nrflags < 1) || (nrflags > 2); -+ -+ for (i = 0; (i < nrflags) && (i < 2); i++) { -+ if (!strcmp(p[i], "reboot")) { -+ timeout_sysrq_char = 'b'; -+ } else if (!strcmp(p[i], "crashdump")) { -+ timeout_sysrq_char = 'c'; -+ } else if (!strcmp(p[i], "off")) { -+ timeout_sysrq_char = 'o'; -+ } else if (!strcmp(p[i], "flush")) { -+ do_flush = true; -+ } else if (!strcmp(p[i], "noflush")) { -+ do_flush = false; -+ } else { -+ parse_error = true; -+ } -+ free(p[i]); -+ } -+ if (parse_error) { -+ fprintf(stderr, "Failed to parse timeout-action \"%s\".\n", -+ timeout_action); -+ exit_status = -1; -+ goto out; -+ } -+ } -+ cl_log(LOG_NOTICE, "%s flush + writing \'%c\' to sysrq on timeout", -+ do_flush?"Doing":"Skipping", timeout_sysrq_char); -+ - #if SUPPORT_SHARED_DISK - if (strcmp(argv[optind], "create") == 0) { - exit_status = init_devices(servants_leader); -diff --git a/src/sbd-md.c b/src/sbd-md.c -index a736118..579d273 100644 ---- a/src/sbd-md.c -+++ b/src/sbd-md.c -@@ -1149,7 +1149,7 @@ int servant(const char *diskname, int mode, const void* argp) - if (ppid == 1) { - /* Our parent died unexpectedly. Triggering - * self-fence. */ -- do_reset(); -+ do_timeout_action(); - } - - /* These attempts are, by definition, somewhat racy. If -diff --git a/src/sbd.h b/src/sbd.h -index 0f8847a..386c85c 100644 ---- a/src/sbd.h -+++ b/src/sbd.h -@@ -130,6 +130,7 @@ void sysrq_trigger(char t); - void do_crashdump(void); - void do_reset(void); - void do_off(void); -+void do_timeout_action(void); - pid_t make_daemon(void); - void maximize_priority(void); - void sbd_get_uname(void); -@@ -153,6 +154,8 @@ extern int debug_mode; - extern char *watchdogdev; - extern bool watchdogdev_is_default; - extern char* local_uname; -+extern bool do_flush; -+extern char timeout_sysrq_char; - - /* Global, non-tunable variables: */ - extern int sector_size; -diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig -index c6d7c07..8f38426 100644 ---- a/src/sbd.sysconfig -+++ b/src/sbd.sysconfig -@@ -71,6 +71,24 @@ SBD_WATCHDOG_DEV=/dev/watchdog - SBD_WATCHDOG_TIMEOUT=5 - - ## Type: string -+## Default: "flush,reboot" -+# -+# Actions to be executed when the watchers don't timely report to the sbd -+# master process or one of the watchers detects that the master process -+# has died. -+# -+# Set timeout-action to comma-separated combination of -+# noflush|flush plus reboot|crashdump|off. -+# If just one of both is given the other stays at the default. -+# -+# This doesn't affect actions like off, crashdump, reboot explicitly -+# triggered via message slots. -+# And it does as well not configure the action a watchdog would -+# trigger should it run off (there is no generic interface). -+# -+SBD_TIMEOUT_ACTION=flush,reboot -+ -+## Type: string - ## Default: "" - # - # Additional options for starting sbd --- -1.8.3.1 - diff --git a/SOURCES/0016-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch b/SOURCES/0016-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch new file mode 100644 index 0000000..16a50e0 --- /dev/null +++ b/SOURCES/0016-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch @@ -0,0 +1,54 @@ +From c8e3de2a7e98550ea9f27a0c59e13013ce02992d Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 16 Aug 2019 12:07:32 +0200 +Subject: [PATCH] Fix: sbd-pacemaker: check for shutdown attribute on every + cib-diff + +--- + src/sbd-pacemaker.c | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index 2b35ff6..1217acf 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -383,6 +383,24 @@ mon_trigger_refresh(gpointer user_data) + return FALSE; + } + ++#define XPATH_SHUTDOWN "//" XML_CIB_TAG_STATE "[@uname='%s']/" \ ++ XML_TAG_TRANSIENT_NODEATTRS "/" XML_TAG_ATTR_SETS "/" \ ++ XML_CIB_TAG_NVPAIR "[@name='" XML_CIB_ATTR_SHUTDOWN "']" ++ ++static gboolean ++shutdown_attr_in_cib(void) ++{ ++ xmlNode *match = NULL; ++ char *xpath_string; ++ ++ xpath_string = crm_strdup_printf(XPATH_SHUTDOWN, local_uname); ++ if (xpath_string) { ++ match = get_xpath_object(xpath_string, current_cib, LOG_TRACE); ++ free(xpath_string); ++ } ++ return (match != NULL); ++} ++ + static void + crm_diff_update(const char *event, xmlNode * msg) + { +@@ -426,8 +444,10 @@ crm_diff_update(const char *event, xmlNode * msg) + * - immediately if the last update was more than 1s ago + * - every 10 updates + * - at most 1s after the last update ++ * - shutdown attribute for our node set for the first time + */ +- if (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000)) { ++ if ((!pcmk_shutdown && shutdown_attr_in_cib()) || ++ (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000))) { + mon_refresh_state(refresh_timer); + updates = 0; + +-- +1.8.3.1 + diff --git a/SOURCES/0017-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch b/SOURCES/0017-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch deleted file mode 100644 index 0d70c92..0000000 --- a/SOURCES/0017-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch +++ /dev/null @@ -1,302 +0,0 @@ -From a716a8ddd3df615009bcff3bd96dd9ae64cb5f68 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 19 Mar 2019 21:36:15 +0100 -Subject: [PATCH] Fix: sbd-pacemaker: make handling of cib-connection loss more - robust - -Exit pcmk-servant on graceful pacemaker shutdown and go back -to state before pacemaker was detected initially. -Purge all cib-traces otherwise and try to reconnect within timeout. ---- - src/sbd-inquisitor.c | 24 ++++++++++++++++++++---- - src/sbd-md.c | 30 +++++++++++++++--------------- - src/sbd-pacemaker.c | 38 +++++++++++++++++++++++++++++--------- - src/sbd.h | 11 +++++++---- - 4 files changed, 71 insertions(+), 32 deletions(-) - -diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c -index 9be6c99..77c6e4f 100644 ---- a/src/sbd-inquisitor.c -+++ b/src/sbd-inquisitor.c -@@ -490,19 +490,19 @@ void inquisitor_child(void) - if (sbd_is_disk(s)) { - if (WIFEXITED(status)) { - switch(WEXITSTATUS(status)) { -- case EXIT_MD_IO_FAIL: -+ case EXIT_MD_SERVANT_IO_FAIL: - DBGLOG(LOG_INFO, "Servant for %s requests to be disowned", - s->devname); - break; -- case EXIT_MD_REQUEST_RESET: -+ case EXIT_MD_SERVANT_REQUEST_RESET: - cl_log(LOG_WARNING, "%s requested a reset", s->devname); - do_reset(); - break; -- case EXIT_MD_REQUEST_SHUTOFF: -+ case EXIT_MD_SERVANT_REQUEST_SHUTOFF: - cl_log(LOG_WARNING, "%s requested a shutoff", s->devname); - do_off(); - break; -- case EXIT_MD_REQUEST_CRASHDUMP: -+ case EXIT_MD_SERVANT_REQUEST_CRASHDUMP: - cl_log(LOG_WARNING, "%s requested a crashdump", s->devname); - do_crashdump(); - break; -@@ -510,6 +510,22 @@ void inquisitor_child(void) - break; - } - } -+ } else if (sbd_is_pcmk(s)) { -+ if (WIFEXITED(status)) { -+ switch(WEXITSTATUS(status)) { -+ case EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN: -+ DBGLOG(LOG_INFO, "PCMK-Servant has exited gracefully"); -+ /* revert to state prior to pacemaker-detection */ -+ s->restarts = 0; -+ s->restart_blocked = 0; -+ cluster_appeared = 0; -+ s->outdated = 1; -+ s->t_last.tv_sec = 0; -+ break; -+ default: -+ break; -+ } -+ } - } - cleanup_servant_by_pid(pid); - } -diff --git a/src/sbd-md.c b/src/sbd-md.c -index ba2c34d..c51d381 100644 ---- a/src/sbd-md.c -+++ b/src/sbd-md.c -@@ -1061,19 +1061,19 @@ int servant_md(const char *diskname, int mode, const void* argp) - - st = open_device(diskname, LOG_WARNING); - if (!st) { -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - - s_header = header_get(st); - if (!s_header) { - cl_log(LOG_ERR, "Not a valid header on %s", diskname); -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - - if (servant_check_timeout_inconsistent(s_header) < 0) { - cl_log(LOG_ERR, "Timeouts on %s do not match first device", - diskname); -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - - if (s_header->minor_version > 0) { -@@ -1086,14 +1086,14 @@ int servant_md(const char *diskname, int mode, const void* argp) - cl_log(LOG_ERR, - "No slot allocated, and automatic allocation failed for disk %s.", - diskname); -- rc = EXIT_MD_IO_FAIL; -+ rc = EXIT_MD_SERVANT_IO_FAIL; - goto out; - } - s_node = sector_alloc(); - if (slot_read(st, mbox, s_node) < 0) { - cl_log(LOG_ERR, "Unable to read node entry on %s", - diskname); -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - - cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname); -@@ -1109,7 +1109,7 @@ int servant_md(const char *diskname, int mode, const void* argp) - if (mode > 0) { - if (mbox_read(st, mbox, s_mbox) < 0) { - cl_log(LOG_ERR, "mbox read failed during start-up in servant."); -- rc = EXIT_MD_IO_FAIL; -+ rc = EXIT_MD_SERVANT_IO_FAIL; - goto out; - } - if (s_mbox->cmd != SBD_MSG_EXIT && -@@ -1125,7 +1125,7 @@ int servant_md(const char *diskname, int mode, const void* argp) - DBGLOG(LOG_INFO, "First servant start - zeroing inbox"); - memset(s_mbox, 0, sizeof(*s_mbox)); - if (mbox_write(st, mbox, s_mbox) < 0) { -- rc = EXIT_MD_IO_FAIL; -+ rc = EXIT_MD_SERVANT_IO_FAIL; - goto out; - } - } -@@ -1154,28 +1154,28 @@ int servant_md(const char *diskname, int mode, const void* argp) - s_header_retry = header_get(st); - if (!s_header_retry) { - cl_log(LOG_ERR, "No longer found a valid header on %s", diskname); -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - if (memcmp(s_header, s_header_retry, sizeof(*s_header)) != 0) { - cl_log(LOG_ERR, "Header on %s changed since start-up!", diskname); -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - free(s_header_retry); - - s_node_retry = sector_alloc(); - if (slot_read(st, mbox, s_node_retry) < 0) { - cl_log(LOG_ERR, "slot read failed in servant."); -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - if (memcmp(s_node, s_node_retry, sizeof(*s_node)) != 0) { - cl_log(LOG_ERR, "Node entry on %s changed since start-up!", diskname); -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - free(s_node_retry); - - if (mbox_read(st, mbox, s_mbox) < 0) { - cl_log(LOG_ERR, "mbox read failed in servant."); -- exit(EXIT_MD_IO_FAIL); -+ exit(EXIT_MD_SERVANT_IO_FAIL); - } - - if (s_mbox->cmd > 0) { -@@ -1190,14 +1190,14 @@ int servant_md(const char *diskname, int mode, const void* argp) - sigqueue(ppid, SIG_TEST, signal_value); - break; - case SBD_MSG_RESET: -- exit(EXIT_MD_REQUEST_RESET); -+ exit(EXIT_MD_SERVANT_REQUEST_RESET); - case SBD_MSG_OFF: -- exit(EXIT_MD_REQUEST_SHUTOFF); -+ exit(EXIT_MD_SERVANT_REQUEST_SHUTOFF); - case SBD_MSG_EXIT: - sigqueue(ppid, SIG_EXITREQ, signal_value); - break; - case SBD_MSG_CRASHDUMP: -- exit(EXIT_MD_REQUEST_CRASHDUMP); -+ exit(EXIT_MD_SERVANT_REQUEST_CRASHDUMP); - default: - /* FIXME: - An "unknown" message might result -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index aac355a..c69fc55 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -103,6 +103,9 @@ static pe_working_set_t *data_set = NULL; - - static long last_refresh = 0; - -+static int pcmk_clean_shutdown = 0; -+static int pcmk_shutdown = 0; -+ - static gboolean - mon_timer_reconnect(gpointer data) - { -@@ -128,10 +131,26 @@ mon_cib_connection_destroy(gpointer user_data) - { - if (cib) { - cib->cmds->signoff(cib); -+ /* retrigger as last one might have been skipped */ -+ mon_refresh_state(NULL); -+ if (pcmk_clean_shutdown) { -+ /* assume a graceful pacemaker-shutdown */ -+ clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); -+ } -+ /* getting here we aren't sure about the pacemaker-state -+ so try to use the timeout to reconnect and get -+ everything sorted out again -+ */ -+ pcmk_shutdown = 0; - set_servant_health(pcmk_health_transient, LOG_WARNING, "Disconnected from CIB"); - timer_id_reconnect = g_timeout_add(reconnect_msec, mon_timer_reconnect, NULL); - } - cib_connected = 0; -+ /* no sense in looking into outdated cib, trying to apply patch, ... */ -+ if (current_cib) { -+ free_xml(current_cib); -+ current_cib = NULL; -+ } - return; - } - -@@ -171,7 +190,7 @@ static gboolean - mon_timer_notify(gpointer data) - { - static int counter = 0; -- int counter_max = timeout_watchdog / timeout_loop; -+ int counter_max = timeout_watchdog / timeout_loop / 2; - - if (timer_id_notify > 0) { - g_source_remove(timer_id_notify); -@@ -280,11 +299,6 @@ compute_status(pe_working_set_t * data_set) - } else if (node->details->pending) { - set_servant_health(pcmk_health_pending, LOG_WARNING, "Node state: pending"); - --#if 0 -- } else if (node->details->shutdown) { -- set_servant_health(pcmk_health_shutdown, LOG_WARNING, "Node state: shutting down"); --#endif -- - } else if (data_set->flags & pe_flag_have_quorum) { - set_servant_health(pcmk_health_online, LOG_INFO, "Node state: online"); - ever_had_quorum = TRUE; -@@ -315,6 +329,12 @@ compute_status(pe_working_set_t * data_set) - } - } - -+ if (node->details->shutdown) { -+ pcmk_shutdown = 1; -+ } -+ if (pcmk_shutdown && !(node->details->running_rsc)) { -+ pcmk_clean_shutdown = 1; -+ } - notify_parent(); - return; - } -@@ -339,7 +359,7 @@ crm_diff_update(const char *event, xmlNode * msg) - static mainloop_timer_t *refresh_timer = NULL; - - if(refresh_timer == NULL) { -- refresh_timer = mainloop_timer_add("refresh", 2000, FALSE, mon_trigger_refresh, NULL); -+ refresh_timer = mainloop_timer_add("refresh", reconnect_msec, FALSE, mon_trigger_refresh, NULL); - refresh_trigger = mainloop_add_trigger(G_PRIORITY_LOW, mon_refresh_state, refresh_timer); - } - -@@ -369,9 +389,9 @@ crm_diff_update(const char *event, xmlNode * msg) - } - - /* Refresh -- * - immediately if the last update was more than 5s ago -+ * - immediately if the last update was more than 1s ago - * - every 10 updates -- * - at most 2s after the last update -+ * - at most 1s after the last update - */ - if (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000)) { - mon_refresh_state(refresh_timer); -diff --git a/src/sbd.h b/src/sbd.h -index 6fe07f9..3b05a11 100644 ---- a/src/sbd.h -+++ b/src/sbd.h -@@ -54,10 +54,13 @@ - /* FIXME: should add dynamic check of SIG_XX >= SIGRTMAX */ - - /* exit status for disk-servant */ --#define EXIT_MD_IO_FAIL 20 --#define EXIT_MD_REQUEST_RESET 21 --#define EXIT_MD_REQUEST_SHUTOFF 22 --#define EXIT_MD_REQUEST_CRASHDUMP 23 -+#define EXIT_MD_SERVANT_IO_FAIL 20 -+#define EXIT_MD_SERVANT_REQUEST_RESET 21 -+#define EXIT_MD_SERVANT_REQUEST_SHUTOFF 22 -+#define EXIT_MD_SERVANT_REQUEST_CRASHDUMP 23 -+ -+/* exit status for pcmk-servant */ -+#define EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN 30 - - #define HOG_CHAR 0xff - #define SECTOR_NAME_MAX 63 --- -1.8.3.1 - diff --git a/SOURCES/0018-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch b/SOURCES/0018-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch deleted file mode 100644 index 776edea..0000000 --- a/SOURCES/0018-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 79b778debfee5b4ab2d099b2bfc7385f45597f70 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 26 Mar 2019 11:17:45 +0100 -Subject: [PATCH] Fix: sbd-pacemaker: bail out of status earlier - -Prevents possible subsequent null-pointer access and avoids -unnecessary search for node. ---- - src/sbd-pacemaker.c | 10 +++++++--- - 1 file changed, 7 insertions(+), 3 deletions(-) - -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index c69fc55..9a8b95f 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -276,7 +276,7 @@ compute_status(pe_working_set_t * data_set) - static int updates = 0; - static int ever_had_quorum = FALSE; - -- node_t *node = pe_find_node(data_set->nodes, local_uname); -+ node_t *node = NULL; - - updates++; - -@@ -286,11 +286,15 @@ compute_status(pe_working_set_t * data_set) - return; - } - -+ node = pe_find_node(data_set->nodes, local_uname); - -- if (node == NULL) { -+ if ((node == NULL) || (node->details == NULL)) { - set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: %s is UNKNOWN", local_uname); -+ notify_parent(); -+ return; -+ } - -- } else if (node->details->online == FALSE) { -+ if (node->details->online == FALSE) { - set_servant_health(pcmk_health_unknown, LOG_WARNING, "Node state: OFFLINE"); - - } else if (node->details->unclean) { --- -1.8.3.1 - diff --git a/SOURCES/0019-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch b/SOURCES/0019-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch deleted file mode 100644 index 8c92df8..0000000 --- a/SOURCES/0019-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 824fe834c67fb7bae7feb87607381f9fa8fa2945 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Fri, 7 Jun 2019 19:09:06 +0200 -Subject: [PATCH] Fix: sbd-pacemaker: assume graceful exit if leftovers are - unmanged - ---- - src/sbd-pacemaker.c | 32 +++++++++++++++++++++++++++++++- - 1 file changed, 31 insertions(+), 1 deletion(-) - -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index 9a8b95f..2b35ff6 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -333,11 +333,41 @@ compute_status(pe_working_set_t * data_set) - } - } - -+ /* If we are in shutdown-state once this will go on till the end. -+ * If we've on top reached a state of 0 locally running resources -+ * we can assume a clean shutdown. -+ * Tricky are the situations where the node is in maintenance-mode -+ * or resources are unmanaged. So if the node is in maintenance or -+ * all left-over running resources are unmanaged we assume intention. -+ */ - if (node->details->shutdown) { - pcmk_shutdown = 1; - } -- if (pcmk_shutdown && !(node->details->running_rsc)) { -+ if (pcmk_shutdown) -+ { - pcmk_clean_shutdown = 1; -+ if (!(node->details->maintenance)) { -+ GListPtr iter; -+ -+ for (iter = node->details->running_rsc; -+ iter != NULL; iter = iter->next) { -+ resource_t *rsc = (resource_t *) iter->data; -+ -+ -+ if (is_set(rsc->flags, pe_rsc_managed)) { -+ pcmk_clean_shutdown = 0; -+ crm_debug("not clean as %s managed and still running", -+ rsc->id); -+ break; -+ } -+ } -+ if (pcmk_clean_shutdown) { -+ crm_debug("pcmk_clean_shutdown because " -+ "all managed resources down"); -+ } -+ } else { -+ crm_debug("pcmk_clean_shutdown because node is in maintenance"); -+ } - } - notify_parent(); - return; --- -1.8.3.1 - diff --git a/SOURCES/0020-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch b/SOURCES/0020-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch deleted file mode 100644 index 16a50e0..0000000 --- a/SOURCES/0020-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch +++ /dev/null @@ -1,54 +0,0 @@ -From c8e3de2a7e98550ea9f27a0c59e13013ce02992d Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Fri, 16 Aug 2019 12:07:32 +0200 -Subject: [PATCH] Fix: sbd-pacemaker: check for shutdown attribute on every - cib-diff - ---- - src/sbd-pacemaker.c | 22 +++++++++++++++++++++- - 1 file changed, 21 insertions(+), 1 deletion(-) - -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index 2b35ff6..1217acf 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -383,6 +383,24 @@ mon_trigger_refresh(gpointer user_data) - return FALSE; - } - -+#define XPATH_SHUTDOWN "//" XML_CIB_TAG_STATE "[@uname='%s']/" \ -+ XML_TAG_TRANSIENT_NODEATTRS "/" XML_TAG_ATTR_SETS "/" \ -+ XML_CIB_TAG_NVPAIR "[@name='" XML_CIB_ATTR_SHUTDOWN "']" -+ -+static gboolean -+shutdown_attr_in_cib(void) -+{ -+ xmlNode *match = NULL; -+ char *xpath_string; -+ -+ xpath_string = crm_strdup_printf(XPATH_SHUTDOWN, local_uname); -+ if (xpath_string) { -+ match = get_xpath_object(xpath_string, current_cib, LOG_TRACE); -+ free(xpath_string); -+ } -+ return (match != NULL); -+} -+ - static void - crm_diff_update(const char *event, xmlNode * msg) - { -@@ -426,8 +444,10 @@ crm_diff_update(const char *event, xmlNode * msg) - * - immediately if the last update was more than 1s ago - * - every 10 updates - * - at most 1s after the last update -+ * - shutdown attribute for our node set for the first time - */ -- if (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000)) { -+ if ((!pcmk_shutdown && shutdown_attr_in_cib()) || -+ (updates > 10 || (now - last_refresh) > (reconnect_msec / 1000))) { - mon_refresh_state(refresh_timer); - updates = 0; - --- -1.8.3.1 - diff --git a/SPECS/sbd.spec b/SPECS/sbd.spec index e766c7e..346f7bb 100644 --- a/SPECS/sbd.spec +++ b/SPECS/sbd.spec @@ -15,39 +15,35 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # -%global commit a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f +%global commit 7f33d1a409d0a4e2cd69946688c48eaa8f3c5d26 %global shortcommit %(c=%{commit}; echo ${c:0:7}) %global github_owner Clusterlabs -%global buildnum 18 +%global buildnum 15 Name: sbd Summary: Storage-based death License: GPLv2+ Group: System Environment/Daemons -Version: 1.3.1 -Release: %{buildnum}%{?dist}.3 +Version: 1.4.0 +Release: %{buildnum}%{?dist} Url: https://github.com/%{github_owner}/%{name} Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz -Patch0: 0001-make-pacemaker-dlm-wait-for-sbd-start.patch -Patch1: 0002-mention-timeout-caveat-with-SBD_DELAY_START.patch -Patch2: 0003-Doc-sbd.8.pod-add-query-test-watchdog.patch -Patch3: 0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch -Patch4: 0005-Fix-build-error-with-glibc-2.25.patch -Patch5: 0006-Fix-gcc-format-string-error.patch -Patch6: 0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch -Patch7: 0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch -Patch8: 0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch -Patch9: 0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch -Patch10: 0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch -Patch11: 0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch -Patch12: 0013-Refactor-sbd-common-separate-assignment-and-comparis.patch -Patch13: 0014-Fix-sbd-common-avoid-statting-potential-links.patch -Patch14: 0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch -Patch15: 0016-Feature-make-timeout-action-executed-by-sbd-configur.patch -Patch16: 0017-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch -Patch17: 0018-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch -Patch18: 0019-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch -Patch19: 0020-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch +Patch0: 0001-Fix-sbd-cluster-finalize-cmap-connection-if-disconne.patch +Patch1: 0002-Fix-sbd-pacemaker-make-handling-of-cib-connection-lo.patch +Patch2: 0003-Fix-sbd-pacemaker-bail-out-of-status-earlier.patch +Patch3: 0004-Doc-sbd.8.pod-use-the-generic-term-cluster-services-.patch +Patch4: 0005-Doc-sbd.sysconfig-watchdog-timeout-set-in-the-on-dis.patch +Patch5: 0006-Refactor-fail-earlier-on-invalid-servants.patch +Patch6: 0007-Fix-sbd-inquisitor-overhaul-device-list-parser.patch +Patch7: 0008-Refactor-sbd-common-no-reason-for-stack-hogger-havin.patch +Patch8: 0009-Sanity-sbd-inquisitor-free-timeout-action-on-bail-ou.patch +Patch9: 0010-Sanity-sbd-md-prevent-unrealistic-overflow-on-sector.patch +Patch10: 0011-Sanity-sbd-md-remove-some-left-over-code.patch +Patch11: 0012-Fix-sbd-common-query-rt-budget-0-otherwise-try-movin.patch +Patch12: 0013-Fix-sbd-pacemaker-assume-graceful-exit-if-leftovers-.patch +Patch13: 0014-Fix-sbd-cluster-periodically-check-corosync-daemon-l.patch +Patch14: 0015-build-say-library-when-missing-cmap-not-package-to-a.patch +Patch15: 0016-Fix-sbd-pacemaker-check-for-shutdown-attribute-on-ev.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: autoconf BuildRequires: automake @@ -63,7 +59,7 @@ BuildRequires: pkgconfig BuildRequires: systemd %if 0%{?rhel} > 0 -ExclusiveArch: i686 x86_64 s390x ppc64le aarch64 %{arm} +ExclusiveArch: i686 x86_64 s390x ppc64le aarch64 %endif %if %{defined systemd_requires} @@ -145,21 +141,51 @@ fi %doc COPYING %changelog -* Mon Aug 26 2019 Klaus Wenninger - 1.3.1-18.3 -- added missing patch +* Fri Aug 16 2019 Klaus Wenninger - 1.4.0-15 +- check for shutdown attribute on every cib-diff - Resolves: rhbz#1734061 + Resolves: rhbz#1718296 -* Fri Aug 16 2019 Klaus Wenninger - 1.3.1-18.2 -- check for shutdown attribute on every cib-diff +* Wed Jun 12 2019 Klaus Wenninger - 1.4.0-10 +- added missing patches to git - Resolves: rhbz#1734061 + Resolves: rhbz#1702727 + Resolves: rhbz#1718296 -* Tue Jul 30 2019 Klaus Wenninger - 1.3.1-18.1 +* Tue Jun 11 2019 Klaus Wenninger - 1.4.0-9 - assume graceful pacemaker exit if leftovers are unmanaged -- make handling of cib-connection loss more robust +- query corosync liveness via votequorum-api + + Resolves: rhbz#1702727 + Resolves: rhbz#1718296 + +* Mon Jun 3 2019 Klaus Wenninger - 1.4.0-8 +- check for rt-budget > 0 and move to root-slice otherwise + + Resolves: rhbz#1713021 - Resolves: rhbz#1734061 +* Wed Apr 10 2019 Klaus Wenninger - 1.4.0-7 +- add some minor fixes from upstream found by coverity + + Resolves: rhbz#1698056 + +* Wed Apr 10 2019 Klaus Wenninger - 1.4.0-6 +- add decision-context to gating.yaml + + Resolves: rhbz#1682137 + +* Mon Jan 14 2019 Klaus Wenninger - 1.4.0-5 +- rebase to upstream v1.4.0 +- finalize cmap connection if disconnected from cluster +- make handling of cib-connection loss more robust +- add ci test files +- use generic term cluster-services in doc +- stress in doc that on-disk metadata watchdog-timeout + takes precedence +- fail earlier on invalid servants to make gcc 9 happy + + Resolves: rhbz#1698056 + Resolves: rhbz#1682137 * Mon Dec 17 2018 Klaus Wenninger - 1.3.1-18 - make timeout-action executed by sbd configurable