diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..28f41b4 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +SOURCES/sbd-a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f.tar.gz diff --git a/.sbd.metadata b/.sbd.metadata new file mode 100644 index 0000000..b9ffa89 --- /dev/null +++ b/.sbd.metadata @@ -0,0 +1 @@ +c7d993891714de43c052a87b0165a97a6e27cfb5 SOURCES/sbd-a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f.tar.gz diff --git a/SOURCES/0001-make-pacemaker-dlm-wait-for-sbd-start.patch b/SOURCES/0001-make-pacemaker-dlm-wait-for-sbd-start.patch new file mode 100644 index 0000000..0793297 --- /dev/null +++ b/SOURCES/0001-make-pacemaker-dlm-wait-for-sbd-start.patch @@ -0,0 +1,43 @@ +From 5f1ac8f07fd81a2c60db39dd5a28debbadfe3ec5 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Thu, 14 Dec 2017 17:32:08 +0100 +Subject: [PATCH] Fix: systemd: make pacemaker & dlm wait for sbd-start to + complete + +--- + src/sbd.service.in | 4 +++- + src/sbd_remote.service.in | 2 +- + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/src/sbd.service.in b/src/sbd.service.in +index ef1bd16..401c2d6 100644 +--- a/src/sbd.service.in ++++ b/src/sbd.service.in +@@ -1,6 +1,7 @@ + [Unit] + Description=Shared-storage based fencing daemon + Before=pacemaker.service ++Before=dlm.service + After=systemd-modules-load.service iscsi.service + PartOf=corosync.service + RefuseManualStop=true +@@ -22,4 +23,5 @@ Restart=on-abort + + [Install] + RequiredBy=corosync.service +- ++RequiredBy=pacemaker.service ++RequiredBy=dlm.service +diff --git a/src/sbd_remote.service.in b/src/sbd_remote.service.in +index e05f80e..cefd511 100644 +--- a/src/sbd_remote.service.in ++++ b/src/sbd_remote.service.in +@@ -21,4 +21,4 @@ Restart=on-abort + + [Install] + RequiredBy=pacemaker_remote.service +- ++RequiredBy=dlm.service +-- +1.8.3.1 + diff --git a/SOURCES/0002-mention-timeout-caveat-with-SBD_DELAY_START.patch b/SOURCES/0002-mention-timeout-caveat-with-SBD_DELAY_START.patch new file mode 100644 index 0000000..807d70d --- /dev/null +++ b/SOURCES/0002-mention-timeout-caveat-with-SBD_DELAY_START.patch @@ -0,0 +1,27 @@ +From f79d09ec8dd744f69d87008e868297b308043b56 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Thu, 14 Dec 2017 18:21:07 +0100 +Subject: [PATCH] Doc: sbd.sysconfig: mention timeout caveat with + SBD_DELAY_START + +--- + src/sbd.sysconfig | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig +index cbc1501..75ff980 100644 +--- a/src/sbd.sysconfig ++++ b/src/sbd.sysconfig +@@ -31,6 +31,9 @@ SBD_STARTMODE=always + # other nodes are still waiting in the fence acknowledgement phase. + # This is an occasional issue with virtual machines. + # ++# Consider that you might have to adapt the startup-timeout accordingly ++# if the default isn't sufficient. (TimeoutStartSec for systemd) ++# + # This option may be ignored at a later point, once pacemaker handles + # this case better. + # +-- +1.8.3.1 + diff --git a/SOURCES/0003-Doc-sbd.8.pod-add-query-test-watchdog.patch b/SOURCES/0003-Doc-sbd.8.pod-add-query-test-watchdog.patch new file mode 100644 index 0000000..6695f73 --- /dev/null +++ b/SOURCES/0003-Doc-sbd.8.pod-add-query-test-watchdog.patch @@ -0,0 +1,48 @@ +From e073271f53583f2d0cf2675ea665ed50712b65dd Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 8 Jan 2018 12:07:33 +0100 +Subject: [PATCH] Doc: sbd.8.pod: add sections for query-watchdog & + test-watchdog + +--- + man/sbd.8.pod | 24 ++++++++++++++++++++++++ + 1 file changed, 24 insertions(+) + +diff --git a/man/sbd.8.pod b/man/sbd.8.pod +index c1bf67a..ffd01c2 100644 +--- a/man/sbd.8.pod ++++ b/man/sbd.8.pod +@@ -394,6 +394,30 @@ a fencing message by the cluster. + + =back + ++=head2 query-watchdog ++ ++Example usage: ++ ++ sbd query-watchdog ++ ++Check for available watchdog devices and print some info. ++ ++B: This command will arm the watchdog during query, and if your ++watchdog refuses disarming (for example, if its kernel module has the ++'nowayout' parameter set) this will reset your system. ++ ++=head2 test-watchdog ++ ++Example usage: ++ ++ sbd test-watchdog [-w /dev/watchdog3] ++ ++Test specified watchdog device (/dev/watchdog by default). ++ ++B: This command will arm the watchdog and have your system reset ++in case your watchdog is working properly! If issued from an interactive ++session, it will prompt for confirmation. ++ + =head1 Base system configuration + + =head2 Configure a watchdog +-- +1.8.3.1 + diff --git a/SOURCES/0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch b/SOURCES/0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch new file mode 100644 index 0000000..1c2ce98 --- /dev/null +++ b/SOURCES/0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch @@ -0,0 +1,86 @@ +From ef40f6a0fdc178828fbde6f1303e5ee58bfb822a Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Wed, 7 Mar 2018 17:50:29 +0100 +Subject: [PATCH] Build: sbd-pacemaker: Query CIB directly with the API instead + of get_cib_copy() + +get_cib_copy() has been dropped from pacemaker 2.0 branch as of: +https://github.com/ClusterLabs/pacemaker/commit/32c75b7be +--- + src/sbd-pacemaker.c | 39 +++++++++++++++++++++++++++++++++++---- + 1 file changed, 35 insertions(+), 4 deletions(-) + +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index b6a8fb6..2f06109 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -109,6 +109,38 @@ mon_cib_connection_destroy(gpointer user_data) + return; + } + ++static void ++mon_retrieve_current_cib() ++{ ++ xmlNode *xml_cib = NULL; ++ int options = cib_scope_local | cib_sync_call; ++ int rc = pcmk_ok; ++ ++ free_xml(current_cib); ++ current_cib = NULL; ++ ++ rc = cib->cmds->query(cib, NULL, &xml_cib, options); ++ ++ if (rc != pcmk_ok) { ++ crm_err("Couldn't retrieve the CIB: %s (%d)", pcmk_strerror(rc), rc); ++ free_xml(xml_cib); ++ return; ++ ++ } else if (xml_cib == NULL) { ++ crm_err("Couldn't retrieve the CIB: empty result"); ++ return; ++ } ++ ++ if (safe_str_eq(crm_element_name(xml_cib), XML_TAG_CIB)) { ++ current_cib = xml_cib; ++ ++ } else { ++ free_xml(xml_cib); ++ } ++ ++ return; ++} ++ + static gboolean + mon_timer_notify(gpointer data) + { +@@ -121,8 +153,7 @@ mon_timer_notify(gpointer data) + + if (cib_connected) { + if (counter == counter_max) { +- free_xml(current_cib); +- current_cib = get_cib_copy(cib); ++ mon_retrieve_current_cib(); + mon_refresh_state(NULL); + counter = 0; + } else { +@@ -163,7 +194,7 @@ cib_connect(gboolean full) + return rc; + } + +- current_cib = get_cib_copy(cib); ++ mon_retrieve_current_cib(); + mon_refresh_state(NULL); + + if (full) { +@@ -308,7 +339,7 @@ crm_diff_update(const char *event, xmlNode * msg) + } + + if (current_cib == NULL) { +- current_cib = get_cib_copy(cib); ++ mon_retrieve_current_cib(); + } + + /* Refresh +-- +1.8.3.1 + diff --git a/SOURCES/0005-Fix-build-error-with-glibc-2.25.patch b/SOURCES/0005-Fix-build-error-with-glibc-2.25.patch new file mode 100644 index 0000000..75b45a6 --- /dev/null +++ b/SOURCES/0005-Fix-build-error-with-glibc-2.25.patch @@ -0,0 +1,33 @@ +From ba3b4127f658cb59ff09939e8de93a06a138dddb Mon Sep 17 00:00:00 2001 +From: Valentin Vidic +Date: Sat, 25 Nov 2017 09:18:41 +0100 +Subject: [PATCH] Fix: build error with glibc 2.25 + +Add include for makedev, major and minor + +sbd-common.c:268:13: error: In the GNU C Library, "makedev" is defined + by . For historical compatibility, it is + currently defined by as well, but we plan to + remove this soon. To use "makedev", include + directly. If you did not intend to use a system-defined macro + "makedev", you should undefine it after including . [-Werror] + {makedev(10,130), 0}; +--- + src/sbd-common.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index 1d7dbc2..25aaeae 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -19,6 +19,7 @@ + #include "sbd.h" + #include + #include ++#include + #include + #include + #include +-- +1.8.3.1 + diff --git a/SOURCES/0006-Fix-gcc-format-string-error.patch b/SOURCES/0006-Fix-gcc-format-string-error.patch new file mode 100644 index 0000000..9db8c49 --- /dev/null +++ b/SOURCES/0006-Fix-gcc-format-string-error.patch @@ -0,0 +1,25 @@ +From ee232b251c7072935d0507dc0bad27f375a12492 Mon Sep 17 00:00:00 2001 +From: Valentin Vidic +Date: Wed, 8 Nov 2017 22:02:29 +0100 +Subject: [PATCH] Fix: gcc format string error + +--- + src/sbd-common.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index 803bc3a..1d7dbc2 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -268,7 +268,7 @@ watchdog_populate_list(void) + {makedev(10,130), 0}; + int num_watchdogs = 1; + struct dirent *entry; +- char entry_name[64]; ++ char entry_name[280]; + DIR *dp; + char buf[256] = ""; + +-- +1.8.3.1 + diff --git a/SOURCES/0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch b/SOURCES/0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch new file mode 100644 index 0000000..f1782a3 --- /dev/null +++ b/SOURCES/0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch @@ -0,0 +1,51 @@ +From 04d32266b378f5f47088e8f34703bdd9c95f5a4c Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Thu, 30 Nov 2017 16:11:00 +0100 +Subject: [PATCH] Build: cluster-servant: Compile with pacemaker-2.0 + +Pacemaker-2.0 removed support for corosync 1 cluster layer: +https://github.com/ClusterLabs/pacemaker/commit/7a9891f29 +--- + configure.ac | 4 ++++ + src/sbd-cluster.c | 4 ++++ + 2 files changed, 8 insertions(+) + +diff --git a/configure.ac b/configure.ac +index 1eb8758..1f328c2 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -72,6 +72,10 @@ AC_CHECK_HEADERS(pacemaker/crm/cluster.h) + AC_CHECK_LIB(crmcommon, pcmk_strerror, , missing="yes") + AC_CHECK_LIB(cib, cib_apply_patch_event, , missing="yes") + ++dnl pacemaker-2.0 removed support for corosync 1 cluster layer ++AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,, ++ [#include ]) ++ + if test "$missing" = "yes"; then + AC_MSG_ERROR([Missing required libraries or functions.]) + fi +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index de99d0c..ae4750e 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -238,12 +238,16 @@ notify_timer_cb(gpointer data) + } + + switch (get_cluster_type()) { ++#if HAVE_DECL_PCMK_CLUSTER_CLASSIC_AIS + case pcmk_cluster_classic_ais: + send_cluster_text(crm_class_quorum, NULL, TRUE, NULL, crm_msg_ais); + break; + ++#endif + case pcmk_cluster_corosync: ++#if HAVE_DECL_PCMK_CLUSTER_CMAN + case pcmk_cluster_cman: ++#endif + /* TODO - Make a CPG call and only call notify_parent() when we get a reply */ + notify_parent(); + break; +-- +1.8.3.1 + diff --git a/SOURCES/0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch b/SOURCES/0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch new file mode 100644 index 0000000..b9d848b --- /dev/null +++ b/SOURCES/0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch @@ -0,0 +1,78 @@ +From 1d8fd2540ccf254d90e831f612415226043fc5b3 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Fri, 27 Apr 2018 13:41:00 +0200 +Subject: [PATCH] Log: change sbd's default logging level to LOG_NOTICE + +With the refactoring of logging parts and 1ee3503c, sbd became too +silent given the default logging level LOG_WARNING, even under the +situations where it's supposed to tell something. + +This commit changes sbd's default logging level to LOG_NOTICE. +Meanwhile pacemaker library's logging level remains at LOG_WARNING. +With "-v", sbd's logging level is set to LOG_INFO. +With "-vv", sbd's logging level is set to LOG_DEBUG. +With "-vvv", both sbd's and pacemaker library's logging levels are set +to LOG_DEBUG. +--- + src/sbd-inquisitor.c | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 59408b3..237bf43 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -803,6 +803,19 @@ parse_device_line(const char *line) + return found; + } + ++#define SBD_SOURCE_FILES "sbd-cluster.c,sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c,setproctitle.c" ++ ++static void ++sbd_log_filter_ctl(const char *files, uint8_t priority) ++{ ++ if (files == NULL) { ++ files = SBD_SOURCE_FILES; ++ } ++ ++ qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, files, priority); ++ qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, files, priority); ++} ++ + int + arg_enabled(int arg_count) + { +@@ -834,6 +847,7 @@ int main(int argc, char **argv, char **envp) + + qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE); + qb_log_ctl(QB_LOG_STDERR, QB_LOG_CONF_ENABLED, QB_FALSE); ++ sbd_log_filter_ctl(NULL, LOG_NOTICE); + + sbd_get_uname(); + +@@ -926,15 +940,17 @@ int main(int argc, char **argv, char **envp) + case 'v': + debug++; + if(debug == 1) { +- qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c", LOG_DEBUG); +- qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "sbd-common.c,sbd-inquisitor.c,sbd-md.c,sbd-pacemaker.c", LOG_DEBUG); +- cl_log(LOG_INFO, "Verbose mode enabled."); ++ sbd_log_filter_ctl(NULL, LOG_INFO); ++ cl_log(LOG_INFO, "Verbose mode enabled."); + + } else if(debug == 2) { ++ sbd_log_filter_ctl(NULL, LOG_DEBUG); ++ cl_log(LOG_INFO, "Debug mode enabled."); ++ ++ } else if(debug == 3) { + /* Go nuts, turn on pacemaker's logging too */ +- qb_log_filter_ctl(QB_LOG_SYSLOG, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_DEBUG); +- qb_log_filter_ctl(QB_LOG_STDERR, QB_LOG_FILTER_ADD, QB_LOG_FILTER_FILE, "*", LOG_DEBUG); +- cl_log(LOG_INFO, "Verbose library mode enabled."); ++ sbd_log_filter_ctl("*", LOG_DEBUG); ++ cl_log(LOG_INFO, "Debug library mode enabled."); + } + break; + case 'T': +-- +1.8.3.1 + diff --git a/SOURCES/0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch b/SOURCES/0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch new file mode 100644 index 0000000..9f2da2e --- /dev/null +++ b/SOURCES/0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch @@ -0,0 +1,161 @@ +From 2dbdee29736fcbf0fe1d41c306959b22d05f72b0 Mon Sep 17 00:00:00 2001 +From: "Gao,Yan" +Date: Mon, 30 Apr 2018 18:02:04 +0200 +Subject: [PATCH] Log: upgrade important messages and downgrade unimportant + ones + +It also fixes a message that's supposed to be "quorum.two_node not +present in cmap". +--- + src/sbd-cluster.c | 11 ++++++----- + src/sbd-common.c | 4 ++-- + src/sbd-inquisitor.c | 6 +++--- + src/sbd-md.c | 6 +++--- + src/sbd-pacemaker.c | 2 +- + 5 files changed, 15 insertions(+), 14 deletions(-) + +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index ae4750e..c7328af 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -201,10 +201,11 @@ sbd_get_two_node(void) + } + + if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) == CS_OK) { +- cl_log(LOG_NOTICE, "Corosync is%s in 2Node-mode", two_node_u8?"":" not"); ++ cl_log(two_node_u8? LOG_NOTICE : LOG_INFO, ++ "Corosync is%s in 2Node-mode", two_node_u8?"":" not"); + two_node = two_node_u8; + } else { +- cl_log(LOG_NOTICE, "quorum.two_node present in cmap\n"); ++ cl_log(LOG_INFO, "quorum.two_node not present in cmap\n"); + } + return TRUE; + +@@ -264,7 +265,7 @@ sbd_membership_connect(void) + { + bool connected = false; + +- cl_log(LOG_NOTICE, "Attempting cluster connection"); ++ cl_log(LOG_INFO, "Attempting cluster connection"); + + cluster.destroy = sbd_membership_destroy; + +@@ -308,7 +309,7 @@ sbd_membership_connect(void) + } + } + +- set_servant_health(pcmk_health_transient, LOG_NOTICE, "Connected, waiting for initial membership"); ++ set_servant_health(pcmk_health_transient, LOG_INFO, "Connected, waiting for initial membership"); + notify_parent(); + + notify_timer_cb(NULL); +@@ -530,7 +531,7 @@ servant_cluster(const char *diskname, int mode, const void* argp) + enum cluster_type_e cluster_stack = get_cluster_type(); + + crm_system_name = strdup("sbd:cluster"); +- cl_log(LOG_INFO, "Monitoring %s cluster health", name_for_cluster_type(cluster_stack)); ++ cl_log(LOG_NOTICE, "Monitoring %s cluster health", name_for_cluster_type(cluster_stack)); + set_proc_title("sbd: watcher: Cluster"); + + sbd_membership_connect(); +diff --git a/src/sbd-common.c b/src/sbd-common.c +index f22c4f2..0ce6478 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -893,7 +893,7 @@ notify_parent(void) + case pcmk_health_pending: + case pcmk_health_shutdown: + case pcmk_health_transient: +- DBGLOG(LOG_INFO, "Not notifying parent: state transient (%d)", servant_health); ++ DBGLOG(LOG_DEBUG, "Not notifying parent: state transient (%d)", servant_health); + break; + + case pcmk_health_unknown: +@@ -904,7 +904,7 @@ notify_parent(void) + break; + + case pcmk_health_online: +- DBGLOG(LOG_INFO, "Notifying parent: healthy"); ++ DBGLOG(LOG_DEBUG, "Notifying parent: healthy"); + sigqueue(ppid, SIG_LIVENESS, signal_value); + break; + +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 237bf43..90c7d26 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -64,7 +64,7 @@ void recruit_servant(const char *devname, pid_t pid) + + servant_count++; + if(sbd_is_disk(newbie)) { +- cl_log(LOG_NOTICE, "Monitoring %s", devname); ++ cl_log(LOG_INFO, "Monitoring %s", devname); + disk_count++; + } else { + newbie->outdated = 1; +@@ -565,7 +565,7 @@ void inquisitor_child(void) + if(cluster_alive(true)) { + /* We LIVE! */ + if(cluster_appeared == false) { +- cl_log(LOG_NOTICE, "Active cluster detected"); ++ cl_log(LOG_INFO, "Active cluster detected"); + } + tickle = 1; + can_detach = 1; +@@ -574,7 +574,7 @@ void inquisitor_child(void) + } else if(cluster_alive(false)) { + if(!decoupled) { + /* On the way up, detach and arm the watchdog */ +- cl_log(LOG_NOTICE, "Partial cluster detected, detaching"); ++ cl_log(LOG_INFO, "Partial cluster detected, detaching"); + } + + can_detach = 1; +diff --git a/src/sbd-md.c b/src/sbd-md.c +index 6a964dd..6f152c4 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -1097,7 +1097,7 @@ int servant(const char *diskname, int mode, const void* argp) + exit(EXIT_MD_IO_FAIL); + } + +- DBGLOG(LOG_INFO, "Monitoring slot %d on disk %s", mbox, diskname); ++ cl_log(LOG_NOTICE, "Monitoring slot %d on disk %s", mbox, diskname); + if (s_header->minor_version == 0) { + set_proc_title("sbd: watcher: %s - slot: %d", diskname, mbox); + } else { +@@ -1180,7 +1180,7 @@ int servant(const char *diskname, int mode, const void* argp) + } + + if (s_mbox->cmd > 0) { +- cl_log(LOG_INFO, ++ cl_log(LOG_NOTICE, + "Received command %s from %s on disk %s", + char2cmd(s_mbox->cmd), s_mbox->from, diskname); + +@@ -1222,7 +1222,7 @@ int servant(const char *diskname, int mode, const void* argp) + (int)latency, (int)timeout_watchdog_warn, + diskname); + } else if (debug) { +- DBGLOG(LOG_INFO, "Latency: %d on disk %s", (int)latency, ++ DBGLOG(LOG_DEBUG, "Latency: %d on disk %s", (int)latency, + diskname); + } + } +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index 2f06109..a435d01 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -416,7 +416,7 @@ servant_pcmk(const char *diskname, int mode, const void* argp) + int exit_code = 0; + + crm_system_name = strdup("sbd:pcmk"); +- cl_log(LOG_INFO, "Monitoring Pacemaker health"); ++ cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); + set_proc_title("sbd: watcher: Pacemaker"); + setenv("PCMK_watchdog", "true", 1); + +-- +1.8.3.1 + diff --git a/SOURCES/0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch b/SOURCES/0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch new file mode 100644 index 0000000..e3a2b62 --- /dev/null +++ b/SOURCES/0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch @@ -0,0 +1,54 @@ +From 13295dec0f567d6795522241fff6817a68b02033 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 2 May 2018 20:07:12 +0200 +Subject: [PATCH] Refactor: sbd-cluster: let scan do the job of proc-parsing + +Now it would as well parse names containing spaces properly. +--- + src/sbd-cluster.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index 8512f23..f5e9ff0 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -338,7 +338,7 @@ sbd_membership_destroy(gpointer user_data) + * \brief Get process ID and name associated with a /proc directory entry + * + * \param[in] entry Directory entry (must be result of readdir() on /proc) +- * \param[out] name If not NULL, a char[64] to hold the process name ++ * \param[out] name If not NULL, a char[16] to hold the process name + * \param[out] pid If not NULL, will be set to process ID of entry + * + * \return 0 on success, -1 if entry is not for a process or info not found +@@ -353,7 +353,7 @@ sbd_procfs_process_info(struct dirent *entry, char *name, int *pid) + int fd, local_pid; + FILE *file; + struct stat statbuf; +- char key[16] = { 0 }, procpath[128] = { 0 }; ++ char procpath[128] = { 0 }; + + /* We're only interested in entries whose name is a PID, + * so skip anything non-numeric or that is too long. +@@ -396,8 +396,7 @@ sbd_procfs_process_info(struct dirent *entry, char *name, int *pid) + if (!file) { + return -1; + } +- if ((fscanf(file, "%15s%63s", key, name) != 2) +- || safe_str_neq(key, "Name:")) { ++ if (fscanf(file, "Name:\t%15[a-zA-Z0-9 _-]", name) != 1) { + fclose(file); + return -1; + } +@@ -484,7 +483,7 @@ static long unsigned int + find_pacemaker_remote(void) + { + DIR *dp; +- char entry_name[64]; ++ char entry_name[16]; + struct dirent *entry; + + dp = opendir("/proc"); +-- +1.8.3.1 + diff --git a/SOURCES/0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch b/SOURCES/0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch new file mode 100644 index 0000000..44a35aa --- /dev/null +++ b/SOURCES/0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch @@ -0,0 +1,61 @@ +From a6acd38756fc7f93afcf5c08b8cdf139a3e354e7 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 2 May 2018 13:30:42 +0200 +Subject: [PATCH] Fix: sbd-cluster: search for pacemaker-remoted with + pcmk-2.0.0rc3 + +--- + src/sbd-cluster.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c +index ae4750e..8512f23 100644 +--- a/src/sbd-cluster.c ++++ b/src/sbd-cluster.c +@@ -42,6 +42,13 @@ + //undef SUPPORT_PLUGIN + //define SUPPORT_PLUGIN 1 + ++/* binary for pacemaker-remote has changed with pacemaker 2 */ ++#ifdef CRM_SCORE_INFINITY ++#define PACEMAKER_REMOTE_BINARY "pacemaker-remoted" ++#else ++#define PACEMAKER_REMOTE_BINARY "pacemaker_remoted" ++#endif ++ + static bool remote_node = false; + static pid_t remoted_pid = 0; + static int reconnect_msec = 1000; +@@ -435,7 +442,7 @@ sbd_remote_check(gpointer user_data) + + } else { + int rc = 0; +- char proc_path[PATH_MAX], exe_path[PATH_MAX], expected_path[PATH_MAX]; ++ char proc_path[PATH_MAX], exe_path[PATH_MAX]; + + /* check to make sure pid hasn't been reused by another process */ + snprintf(proc_path, sizeof(proc_path), "/proc/%lu/exe", (long unsigned int)remoted_pid); +@@ -447,10 +454,7 @@ sbd_remote_check(gpointer user_data) + } + exe_path[rc] = 0; + +- rc = snprintf(expected_path, sizeof(proc_path), "%s/pacemaker_remoted", SBINDIR); +- expected_path[rc] = 0; +- +- if (strcmp(exe_path, expected_path) == 0) { ++ if (strcmp(exe_path, SBINDIR "/" PACEMAKER_REMOTE_BINARY) == 0) { + cl_log(LOG_DEBUG, "Process %s (%ld) is active", + exe_path, (long)remoted_pid); + running = 1; +@@ -499,7 +503,7 @@ find_pacemaker_remote(void) + + /* entry_name is truncated to 16 characters including the nul terminator */ + cl_log(LOG_DEBUG, "Found %s at %u", entry_name, pid); +- if (strcmp(entry_name, "pacemaker_remot") == 0) { ++ if (strncmp(entry_name, PACEMAKER_REMOTE_BINARY, 15) == 0) { + cl_log(LOG_NOTICE, "Found Pacemaker Remote at PID %u", pid); + remoted_pid = pid; + remote_node = true; +-- +1.8.3.1 + diff --git a/SOURCES/0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch b/SOURCES/0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch new file mode 100644 index 0000000..0de1f14 --- /dev/null +++ b/SOURCES/0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch @@ -0,0 +1,96 @@ +From 5d52fa8c3c903df4be0e4e954fbca9b3b15285c6 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 14 Sep 2018 17:51:50 +0200 +Subject: [PATCH] Fix: sbd-common: don't follow symlinks outside /dev for + watchdog + +This makes it easier to define a SELinux-policy that keeps +avc-log clean on /dev traversal triggered by query-watchdog. +--- + src/sbd-common.c | 42 ++++++++++++++++++++++++++++++++++++++---- + 1 file changed, 38 insertions(+), 4 deletions(-) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index 0ce6478..fcb7a31 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -251,7 +251,8 @@ watchdog_close(bool disarm) + #define MAX_WATCHDOGS 64 + #define SYS_CLASS_WATCHDOG "/sys/class/watchdog" + #define SYS_CHAR_DEV_DIR "/sys/dev/char" +-#define WATCHDOG_NODEDIR "/dev" ++#define WATCHDOG_NODEDIR "/dev/" ++#define WATCHDOG_NODEDIR_LEN 5 + + struct watchdog_list_item { + dev_t dev; +@@ -273,7 +274,7 @@ watchdog_populate_list(void) + struct dirent *entry; + char entry_name[280]; + DIR *dp; +- char buf[256] = ""; ++ char buf[280] = ""; + + if (watchdog_list != NULL) { + return; +@@ -313,7 +314,38 @@ watchdog_populate_list(void) + struct stat statbuf; + + snprintf(entry_name, sizeof(entry_name), +- WATCHDOG_NODEDIR "/%s", entry->d_name); ++ WATCHDOG_NODEDIR "%s", entry->d_name); ++ if (entry->d_type == DT_LNK) { ++ int len; ++ ++ /* !realpath(entry_name, buf) unfortunately does a stat on ++ * target so we can't really use it to check if links stay ++ * within /dev without triggering e.g. AVC-logs (with ++ * SELinux policy that just allows stat within /dev). ++ * Without canonicalization that doesn't actually touch the ++ * filesystem easily available introduce some limitations ++ * for simplicity: ++ * - just simple path without '..' ++ * - just one level of symlinks (avoid e.g. loop-checking) ++ */ ++ len = readlink(entry_name, buf, sizeof(buf) - 1); ++ if ((len < 1) || ++ (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) { ++ continue; ++ } ++ buf[len] = '\0'; ++ if (buf[0] != '/') { ++ memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1); ++ memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN); ++ len += WATCHDOG_NODEDIR_LEN; ++ } ++ if (strstr(buf, "/../") || ++ strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN) || ++ lstat(buf, &statbuf) || ++ !S_ISCHR(statbuf.st_mode)) { ++ continue; ++ } ++ } + if(!stat(entry_name, &statbuf) && S_ISCHR(statbuf.st_mode)) { + int i; + +@@ -322,6 +354,7 @@ watchdog_populate_list(void) + int wdfd = watchdog_init_fd(entry_name, -1); + struct watchdog_list_item *wdg = + calloc(1, sizeof(struct watchdog_list_item)); ++ int len; + + wdg->dev = watchdogs[i]; + wdg->dev_node = strdup(entry_name); +@@ -343,7 +376,8 @@ watchdog_populate_list(void) + snprintf(entry_name, sizeof(entry_name), + SYS_CHAR_DEV_DIR "/%d:%d/device/driver", + major(watchdogs[i]), minor(watchdogs[i])); +- if (readlink(entry_name, buf, sizeof(buf)) > 0) { ++ if ((len = readlink(entry_name, buf, sizeof(buf) - 1)) > 0) { ++ buf[len] = '\0'; + wdg->dev_driver = strdup(basename(buf)); + } else if ((wdg->dev_ident) && + (strcmp(wdg->dev_ident, +-- +1.8.3.1 + diff --git a/SOURCES/0013-Refactor-sbd-common-separate-assignment-and-comparis.patch b/SOURCES/0013-Refactor-sbd-common-separate-assignment-and-comparis.patch new file mode 100644 index 0000000..2108e37 --- /dev/null +++ b/SOURCES/0013-Refactor-sbd-common-separate-assignment-and-comparis.patch @@ -0,0 +1,33 @@ +From e13297f45b4c5868800b1d3fc359bfd0723fcc5f Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 17 Sep 2018 23:13:37 +0200 +Subject: [PATCH] Refactor: sbd-common: separate assignment and comparison + +--- + src/sbd-common.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index fcb7a31..679f946 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -376,12 +376,13 @@ watchdog_populate_list(void) + snprintf(entry_name, sizeof(entry_name), + SYS_CHAR_DEV_DIR "/%d:%d/device/driver", + major(watchdogs[i]), minor(watchdogs[i])); +- if ((len = readlink(entry_name, buf, sizeof(buf) - 1)) > 0) { ++ len = readlink(entry_name, buf, sizeof(buf) - 1); ++ if (len > 0) { + buf[len] = '\0'; + wdg->dev_driver = strdup(basename(buf)); + } else if ((wdg->dev_ident) && +- (strcmp(wdg->dev_ident, +- "Software Watchdog") == 0)) { ++ (strcmp(wdg->dev_ident, ++ "Software Watchdog") == 0)) { + wdg->dev_driver = strdup("softdog"); + } + break; +-- +1.8.3.1 + diff --git a/SOURCES/0014-Fix-sbd-common-avoid-statting-potential-links.patch b/SOURCES/0014-Fix-sbd-common-avoid-statting-potential-links.patch new file mode 100644 index 0000000..1e61c36 --- /dev/null +++ b/SOURCES/0014-Fix-sbd-common-avoid-statting-potential-links.patch @@ -0,0 +1,214 @@ +From 5b4c866f7c0b4ef8061e131a1ee0d1c608d35054 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Wed, 19 Sep 2018 16:15:27 +0200 +Subject: [PATCH] Fix: sbd-common: avoid statting potential links + +These potential links might be anything and statting - if just +allowed to stat chr-nodes (e.g. SELinux) - them would lead +to avc-logs in the SELinux case. +--- + src/sbd-common.c | 133 +++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 96 insertions(+), 37 deletions(-) + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index 679f946..cc84cd0 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -262,6 +262,12 @@ struct watchdog_list_item { + struct watchdog_list_item *next; + }; + ++struct link_list_item { ++ char *dev_node; ++ char *link_name; ++ struct link_list_item *next; ++}; ++ + static struct watchdog_list_item *watchdog_list = NULL; + static int watchdog_list_items = 0; + +@@ -275,6 +281,7 @@ watchdog_populate_list(void) + char entry_name[280]; + DIR *dp; + char buf[280] = ""; ++ struct link_list_item *link_list = NULL; + + if (watchdog_list != NULL) { + return; +@@ -288,7 +295,7 @@ watchdog_populate_list(void) + FILE *file; + + snprintf(entry_name, sizeof(entry_name), +- SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name); ++ SYS_CLASS_WATCHDOG "/%s/dev", entry->d_name); + file = fopen(entry_name, "r"); + if (file) { + int major, minor; +@@ -309,43 +316,59 @@ watchdog_populate_list(void) + /* search for watchdog nodes in /dev */ + dp = opendir(WATCHDOG_NODEDIR); + if (dp) { ++ /* first go for links and memorize them */ + while ((entry = readdir(dp))) { +- if ((entry->d_type == DT_CHR) || (entry->d_type == DT_LNK)) { +- struct stat statbuf; ++ if (entry->d_type == DT_LNK) { ++ int len; + + snprintf(entry_name, sizeof(entry_name), +- WATCHDOG_NODEDIR "%s", entry->d_name); +- if (entry->d_type == DT_LNK) { +- int len; +- +- /* !realpath(entry_name, buf) unfortunately does a stat on +- * target so we can't really use it to check if links stay +- * within /dev without triggering e.g. AVC-logs (with +- * SELinux policy that just allows stat within /dev). +- * Without canonicalization that doesn't actually touch the +- * filesystem easily available introduce some limitations +- * for simplicity: +- * - just simple path without '..' +- * - just one level of symlinks (avoid e.g. loop-checking) +- */ +- len = readlink(entry_name, buf, sizeof(buf) - 1); +- if ((len < 1) || +- (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) { +- continue; +- } +- buf[len] = '\0'; +- if (buf[0] != '/') { +- memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1); +- memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN); +- len += WATCHDOG_NODEDIR_LEN; +- } +- if (strstr(buf, "/../") || +- strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN) || +- lstat(buf, &statbuf) || +- !S_ISCHR(statbuf.st_mode)) { +- continue; +- } ++ WATCHDOG_NODEDIR "%s", entry->d_name); ++ ++ /* !realpath(entry_name, buf) unfortunately does a stat on ++ * target so we can't really use it to check if links stay ++ * within /dev without triggering e.g. AVC-logs (with ++ * SELinux policy that just allows stat within /dev). ++ * Without canonicalization that doesn't actually touch the ++ * filesystem easily available introduce some limitations ++ * for simplicity: ++ * - just simple path without '..' ++ * - just one level of symlinks (avoid e.g. loop-checking) ++ */ ++ len = readlink(entry_name, buf, sizeof(buf) - 1); ++ if ((len < 1) || ++ (len > sizeof(buf) - WATCHDOG_NODEDIR_LEN - 1)) { ++ continue; ++ } ++ buf[len] = '\0'; ++ if (buf[0] != '/') { ++ memmove(&buf[WATCHDOG_NODEDIR_LEN], buf, len+1); ++ memcpy(buf, WATCHDOG_NODEDIR, WATCHDOG_NODEDIR_LEN); ++ len += WATCHDOG_NODEDIR_LEN; ++ } ++ if (strstr(buf, "/../") || ++ strncmp(WATCHDOG_NODEDIR, buf, WATCHDOG_NODEDIR_LEN)) { ++ continue; ++ } else { ++ /* just memorize to avoid statting the target - SELinux */ ++ struct link_list_item *lli = ++ calloc(1, sizeof(struct link_list_item)); ++ ++ lli->dev_node = strdup(buf); ++ lli->link_name = strdup(entry_name); ++ lli->next = link_list; ++ link_list = lli; + } ++ } ++ } ++ ++ rewinddir(dp); ++ ++ while ((entry = readdir(dp))) { ++ if (entry->d_type == DT_CHR) { ++ struct stat statbuf; ++ ++ snprintf(entry_name, sizeof(entry_name), ++ WATCHDOG_NODEDIR "%s", entry->d_name); + if(!stat(entry_name, &statbuf) && S_ISCHR(statbuf.st_mode)) { + int i; + +@@ -353,8 +376,9 @@ watchdog_populate_list(void) + if (statbuf.st_rdev == watchdogs[i]) { + int wdfd = watchdog_init_fd(entry_name, -1); + struct watchdog_list_item *wdg = +- calloc(1, sizeof(struct watchdog_list_item)); ++ calloc(1, sizeof(struct watchdog_list_item)); + int len; ++ struct link_list_item *tmp_list = NULL; + + wdg->dev = watchdogs[i]; + wdg->dev_node = strdup(entry_name); +@@ -374,8 +398,8 @@ watchdog_populate_list(void) + } + + snprintf(entry_name, sizeof(entry_name), +- SYS_CHAR_DEV_DIR "/%d:%d/device/driver", +- major(watchdogs[i]), minor(watchdogs[i])); ++ SYS_CHAR_DEV_DIR "/%d:%d/device/driver", ++ major(watchdogs[i]), minor(watchdogs[i])); + len = readlink(entry_name, buf, sizeof(buf) - 1); + if (len > 0) { + buf[len] = '\0'; +@@ -385,14 +409,49 @@ watchdog_populate_list(void) + "Software Watchdog") == 0)) { + wdg->dev_driver = strdup("softdog"); + } ++ ++ /* create dupes if we have memorized links ++ * to this node ++ */ ++ for (tmp_list = link_list; tmp_list; ++ tmp_list = tmp_list->next) { ++ if (!strcmp(tmp_list->dev_node, ++ wdg->dev_node)) { ++ struct watchdog_list_item *dupe_wdg = ++ calloc(1, sizeof(struct watchdog_list_item)); ++ ++ /* as long as we never purge watchdog_list ++ * there is no need to dupe strings ++ */ ++ *dupe_wdg = *wdg; ++ dupe_wdg->dev_node = strdup(tmp_list->link_name); ++ dupe_wdg->next = watchdog_list; ++ watchdog_list = dupe_wdg; ++ watchdog_list_items++; ++ } ++ /* for performance reasons we could remove ++ * the link_list entry ++ */ ++ } + break; + } + } + } + } + } ++ + closedir(dp); + } ++ ++ /* cleanup link list */ ++ while (link_list) { ++ struct link_list_item *tmp_list = link_list; ++ ++ link_list = link_list->next; ++ free(tmp_list->dev_node); ++ free(tmp_list->link_name); ++ free(tmp_list); ++ } + } + + int watchdog_info(void) +-- +1.8.3.1 + diff --git a/SOURCES/0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch b/SOURCES/0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch new file mode 100644 index 0000000..602daa1 --- /dev/null +++ b/SOURCES/0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch @@ -0,0 +1,139 @@ +From a34cafa9d69194e3cbfe3af20ceb2d08848c483c Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Mon, 19 Nov 2018 20:56:35 +0100 +Subject: [PATCH] Refactor: use pacemaker's new pe api with + constructors/destructors + +For backward compatibility add some compatibility code +for if pe_new_working_set isn't available. +--- + configure.ac | 3 +++ + src/sbd-pacemaker.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++------ + 2 files changed, 57 insertions(+), 7 deletions(-) + +diff --git a/configure.ac b/configure.ac +index 1f328c2..1dc273b 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -76,6 +76,9 @@ dnl pacemaker-2.0 removed support for corosync 1 cluster layer + AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,, + [#include ]) + ++dnl check for new pe-API ++AC_CHECK_FUNCS(pe_new_working_set) ++ + if test "$missing" = "yes"; then + AC_MSG_ERROR([Missing required libraries or functions.]) + fi +diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c +index a435d01..aac355a 100644 +--- a/src/sbd-pacemaker.c ++++ b/src/sbd-pacemaker.c +@@ -58,6 +58,31 @@ + + #include "sbd.h" + ++#ifndef HAVE_PE_NEW_WORKING_SET ++ ++#define pe_reset_working_set(data_set) cleanup_calculations(data_set) ++ ++static pe_working_set_t * ++pe_new_working_set() ++{ ++ pe_working_set_t *data_set = calloc(1, sizeof(pe_working_set_t)); ++ if (data_set != NULL) { ++ set_working_set_defaults(data_set); ++ } ++ return data_set; ++} ++ ++static void ++pe_free_working_set(pe_working_set_t *data_set) ++{ ++ if (data_set != NULL) { ++ pe_reset_working_set(data_set); ++ free(data_set); ++ } ++} ++ ++#endif ++ + extern int disk_count; + + static void clean_up(int rc); +@@ -74,6 +99,7 @@ static int cib_connected = 0; + + static cib_t *cib = NULL; + static xmlNode *current_cib = NULL; ++static pe_working_set_t *data_set = NULL; + + static long last_refresh = 0; + +@@ -361,7 +387,6 @@ static gboolean + mon_refresh_state(gpointer user_data) + { + xmlNode *cib_copy = NULL; +- pe_working_set_t data_set; + + if(current_cib == NULL) { + return FALSE; +@@ -382,14 +407,13 @@ mon_refresh_state(gpointer user_data) + + } else { + last_refresh = time(NULL); +- set_working_set_defaults(&data_set); +- data_set.input = cib_copy; +- data_set.flags |= pe_flag_have_stonith_resource; +- cluster_status(&data_set); ++ data_set->input = cib_copy; ++ data_set->flags |= pe_flag_have_stonith_resource; ++ cluster_status(data_set); + +- compute_status(&data_set); ++ compute_status(data_set); + +- cleanup_calculations(&data_set); ++ pe_reset_working_set(data_set); + } + + return FALSE; +@@ -398,6 +422,21 @@ mon_refresh_state(gpointer user_data) + static void + clean_up(int rc) + { ++ if (timer_id_reconnect > 0) { ++ g_source_remove(timer_id_reconnect); ++ timer_id_reconnect = 0; ++ } ++ ++ if (timer_id_notify > 0) { ++ g_source_remove(timer_id_notify); ++ timer_id_notify = 0; ++ } ++ ++ if (data_set != NULL) { ++ pe_free_working_set(data_set); ++ data_set = NULL; ++ } ++ + if (cib != NULL) { + cib->cmds->signoff(cib); + cib_delete(cib); +@@ -425,6 +464,14 @@ servant_pcmk(const char *diskname, int mode, const void* argp) + set_crm_log_level(LOG_CRIT); + } + ++ ++ if (data_set == NULL) { ++ data_set = pe_new_working_set(); ++ } ++ if (data_set == NULL) { ++ return -1; ++ } ++ + if (current_cib == NULL) { + cib = cib_new(); + +-- +1.8.3.1 + diff --git a/SOURCES/0016-Feature-make-timeout-action-executed-by-sbd-configur.patch b/SOURCES/0016-Feature-make-timeout-action-executed-by-sbd-configur.patch new file mode 100644 index 0000000..cdffdd4 --- /dev/null +++ b/SOURCES/0016-Feature-make-timeout-action-executed-by-sbd-configur.patch @@ -0,0 +1,294 @@ +From b64c30af56e7eabd63ce1db25bc5ed9b953485af Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Fri, 23 Nov 2018 14:09:22 +0100 +Subject: [PATCH] Feature: make timeout-action executed by sbd configurable + +--- + man/sbd.8.pod | 19 +++++++++++++++++++ + src/sbd-common.c | 22 ++++++++++++++++------ + src/sbd-inquisitor.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++--- + src/sbd-md.c | 2 +- + src/sbd.h | 3 +++ + src/sbd.sysconfig | 18 ++++++++++++++++++ + 6 files changed, 107 insertions(+), 10 deletions(-) + +diff --git a/man/sbd.8.pod b/man/sbd.8.pod +index ffd01c2..dbb3855 100644 +--- a/man/sbd.8.pod ++++ b/man/sbd.8.pod +@@ -333,6 +333,23 @@ prevent a successful crashdump from ever being written. + + Defaults to 240 seconds. Set to zero to disable. + ++=item B<-r> I ++ ++Actions to be executed when the watchers don't timely report to the sbd ++master process or one of the watchers detects that the master process ++has died. ++ ++Set timeout-action to comma-separated combination of ++noflush|flush plus reboot|crashdump|off. ++If just one of both is given the other stays at the default. ++ ++This doesn't affect actions like off, crashdump, reboot explicitly ++triggered via message slots. ++And it does as well not configure the action a watchdog would ++trigger should it run off (there is no generic interface). ++ ++Defaults to flush,reboot. ++ + =back + + =head2 allocate +@@ -552,6 +569,8 @@ options to pass to the daemon: + + C will fail to start if no C is specified. See the + installed template for more options that can be configured here. ++In general configuration done via parameters takes precedence over ++the configuration from the configuration file. + + =head2 Testing the sbd installation + +diff --git a/src/sbd-common.c b/src/sbd-common.c +index cc84cd0..0e8be65 100644 +--- a/src/sbd-common.c ++++ b/src/sbd-common.c +@@ -98,6 +98,8 @@ usage(void) + " (default is 1, set to 0 to disable)\n" + "-P Check Pacemaker quorum and node health (optional, watch only)\n" + "-Z Enable trace mode. WARNING: UNSAFE FOR PRODUCTION!\n" ++"-r Set timeout-action to comma-separated combination of\n" ++" noflush|flush plus reboot|crashdump|off (default is flush,reboot)\n" + "Commands:\n" + #if SUPPORT_SHARED_DISK + "create initialize N slots on - OVERWRITES DEVICE!\n" +@@ -769,7 +771,7 @@ sysrq_trigger(char t) + + + static void +-do_exit(char kind) ++do_exit(char kind, bool do_flush) + { + /* TODO: Turn debug_mode into a bit field? Delay + kdump for example */ + const char *reason = NULL; +@@ -814,7 +816,9 @@ do_exit(char kind) + } + + cl_log(LOG_EMERG, "Rebooting system: %s", reason); +- sync(); ++ if (do_flush) { ++ sync(); ++ } + + if(kind == 'c') { + watchdog_close(true); +@@ -834,19 +838,25 @@ do_exit(char kind) + void + do_crashdump(void) + { +- do_exit('c'); ++ do_exit('c', true); + } + + void + do_reset(void) + { +- do_exit('b'); ++ do_exit('b', true); + } + + void + do_off(void) + { +- do_exit('o'); ++ do_exit('o', true); ++} ++ ++void ++do_timeout_action(void) ++{ ++ do_exit(timeout_sysrq_char, do_flush); + } + + /* +@@ -980,7 +990,7 @@ notify_parent(void) + /* Our parent died unexpectedly. Triggering + * self-fence. */ + cl_log(LOG_WARNING, "Our parent is dead."); +- do_reset(); ++ do_timeout_action(); + } + + switch (servant_health) { +diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c +index 9b193d4..8e0bc87 100644 +--- a/src/sbd-inquisitor.c ++++ b/src/sbd-inquisitor.c +@@ -31,6 +31,8 @@ int servant_restart_interval = 5; + int servant_restart_count = 1; + int start_mode = 0; + char* pidfile = NULL; ++bool do_flush = true; ++char timeout_sysrq_char = 'b'; + + int parse_device_line(const char *line); + +@@ -655,7 +657,7 @@ void inquisitor_child(void) + /* At level 2 or above, we do nothing, but expect + * things to eventually return to + * normal. */ +- do_reset(); ++ do_timeout_action(); + } else { + cl_log(LOG_ERR, "SBD: DEBUG MODE: Would have fenced due to timeout!"); + } +@@ -668,7 +670,7 @@ void inquisitor_child(void) + + if (debug_mode && watchdog_use) { + /* In debug mode, trigger a reset before the watchdog can panic the machine */ +- do_reset(); ++ do_timeout_action(); + } + } + +@@ -833,6 +835,7 @@ int main(int argc, char **argv, char **envp) + int qb_facility; + const char *value = NULL; + int start_delay = 0; ++ char *timeout_action = NULL; + + if ((cmdname = strrchr(argv[0], '/')) == NULL) { + cmdname = argv[0]; +@@ -928,7 +931,12 @@ int main(int argc, char **argv, char **envp) + } + cl_log(LOG_DEBUG, "Start delay: %d (%s)", (int)start_delay, value?value:"default"); + +- while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:")) != -1) { ++ value = getenv("SBD_TIMEOUT_ACTION"); ++ if(value) { ++ timeout_action = strdup(value); ++ } ++ ++ while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { + switch (c) { + case 'D': + break; +@@ -1043,6 +1051,12 @@ int main(int argc, char **argv, char **envp) + cl_log(LOG_INFO, "Servant restart count set to %d", + (int)servant_restart_count); + break; ++ case 'r': ++ if (timeout_action) { ++ free(timeout_action); ++ } ++ timeout_action = strdup(optarg); ++ break; + case 'h': + usage(); + return (0); +@@ -1101,6 +1115,39 @@ int main(int argc, char **argv, char **envp) + goto out; + } + ++ if (timeout_action) { ++ char *p[2]; ++ int i; ++ char c; ++ int nrflags = sscanf(timeout_action, "%m[a-z],%m[a-z]%c", &p[0], &p[1], &c); ++ bool parse_error = (nrflags < 1) || (nrflags > 2); ++ ++ for (i = 0; (i < nrflags) && (i < 2); i++) { ++ if (!strcmp(p[i], "reboot")) { ++ timeout_sysrq_char = 'b'; ++ } else if (!strcmp(p[i], "crashdump")) { ++ timeout_sysrq_char = 'c'; ++ } else if (!strcmp(p[i], "off")) { ++ timeout_sysrq_char = 'o'; ++ } else if (!strcmp(p[i], "flush")) { ++ do_flush = true; ++ } else if (!strcmp(p[i], "noflush")) { ++ do_flush = false; ++ } else { ++ parse_error = true; ++ } ++ free(p[i]); ++ } ++ if (parse_error) { ++ fprintf(stderr, "Failed to parse timeout-action \"%s\".\n", ++ timeout_action); ++ exit_status = -1; ++ goto out; ++ } ++ } ++ cl_log(LOG_NOTICE, "%s flush + writing \'%c\' to sysrq on timeout", ++ do_flush?"Doing":"Skipping", timeout_sysrq_char); ++ + #if SUPPORT_SHARED_DISK + if (strcmp(argv[optind], "create") == 0) { + exit_status = init_devices(servants_leader); +diff --git a/src/sbd-md.c b/src/sbd-md.c +index a736118..579d273 100644 +--- a/src/sbd-md.c ++++ b/src/sbd-md.c +@@ -1149,7 +1149,7 @@ int servant(const char *diskname, int mode, const void* argp) + if (ppid == 1) { + /* Our parent died unexpectedly. Triggering + * self-fence. */ +- do_reset(); ++ do_timeout_action(); + } + + /* These attempts are, by definition, somewhat racy. If +diff --git a/src/sbd.h b/src/sbd.h +index 0f8847a..386c85c 100644 +--- a/src/sbd.h ++++ b/src/sbd.h +@@ -130,6 +130,7 @@ void sysrq_trigger(char t); + void do_crashdump(void); + void do_reset(void); + void do_off(void); ++void do_timeout_action(void); + pid_t make_daemon(void); + void maximize_priority(void); + void sbd_get_uname(void); +@@ -153,6 +154,8 @@ extern int debug_mode; + extern char *watchdogdev; + extern bool watchdogdev_is_default; + extern char* local_uname; ++extern bool do_flush; ++extern char timeout_sysrq_char; + + /* Global, non-tunable variables: */ + extern int sector_size; +diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig +index c6d7c07..8f38426 100644 +--- a/src/sbd.sysconfig ++++ b/src/sbd.sysconfig +@@ -71,6 +71,24 @@ SBD_WATCHDOG_DEV=/dev/watchdog + SBD_WATCHDOG_TIMEOUT=5 + + ## Type: string ++## Default: "flush,reboot" ++# ++# Actions to be executed when the watchers don't timely report to the sbd ++# master process or one of the watchers detects that the master process ++# has died. ++# ++# Set timeout-action to comma-separated combination of ++# noflush|flush plus reboot|crashdump|off. ++# If just one of both is given the other stays at the default. ++# ++# This doesn't affect actions like off, crashdump, reboot explicitly ++# triggered via message slots. ++# And it does as well not configure the action a watchdog would ++# trigger should it run off (there is no generic interface). ++# ++SBD_TIMEOUT_ACTION=flush,reboot ++ ++## Type: string + ## Default: "" + # + # Additional options for starting sbd +-- +1.8.3.1 + diff --git a/SPECS/sbd.spec b/SPECS/sbd.spec new file mode 100644 index 0000000..28492d0 --- /dev/null +++ b/SPECS/sbd.spec @@ -0,0 +1,294 @@ +# +# spec file for package sbd +# +# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. +# Copyright (c) 2013 Lars Marowsky-Bree +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via http://bugs.opensuse.org/ +# +%global commit a74b4d25a3eb93fe1abbe6e3ebfd2b16cf48873f +%global shortcommit %(c=%{commit}; echo ${c:0:7}) +%global github_owner Clusterlabs +%global buildnum 18 + +Name: sbd +Summary: Storage-based death +License: GPLv2+ +Group: System Environment/Daemons +Version: 1.3.1 +Release: %{buildnum}%{?dist} +Url: https://github.com/%{github_owner}/%{name} +Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz +Patch0: 0001-make-pacemaker-dlm-wait-for-sbd-start.patch +Patch1: 0002-mention-timeout-caveat-with-SBD_DELAY_START.patch +Patch2: 0003-Doc-sbd.8.pod-add-query-test-watchdog.patch +Patch3: 0004-Build-sbd-pacemaker-Query-CIB-directly-with-the-API.patch +Patch4: 0005-Fix-build-error-with-glibc-2.25.patch +Patch5: 0006-Fix-gcc-format-string-error.patch +Patch6: 0007-Build-cluster-servant-Compile-with-pacemaker-2.0.patch +Patch7: 0008-Log-change-sbd-s-default-logging-level-to-LOG_NOTICE.patch +Patch8: 0009-Log-upgrade-important-messages-and-downgrade-unimpor.patch +Patch9: 0010-Refactor-sbd-cluster-let-scan-do-the-job-of-proc-par.patch +Patch10: 0011-Fix-search-for-pacemaker-remoted-with-pacemaker-2.0.patch +Patch11: 0012-Fix-sbd-common-don-t-follow-symlinks-outside-dev-for.patch +Patch12: 0013-Refactor-sbd-common-separate-assignment-and-comparis.patch +Patch13: 0014-Fix-sbd-common-avoid-statting-potential-links.patch +Patch14: 0015-Refactor-use-pacemaker-s-new-pe-api-with-constructor.patch +Patch15: 0016-Feature-make-timeout-action-executed-by-sbd-configur.patch +BuildRoot: %{_tmppath}/%{name}-%{version}-build +BuildRequires: autoconf +BuildRequires: automake +BuildRequires: libuuid-devel +BuildRequires: glib2-devel +BuildRequires: libaio-devel +BuildRequires: corosynclib-devel +BuildRequires: pacemaker-libs-devel > 1.1.12 +BuildRequires: libtool +BuildRequires: libuuid-devel +BuildRequires: libxml2-devel +BuildRequires: pkgconfig +BuildRequires: systemd + +%if 0%{?rhel} > 0 +ExclusiveArch: i686 x86_64 s390x ppc64le aarch64 +%endif + +%if %{defined systemd_requires} +%systemd_requires +%endif + +%description + +This package contains the storage-based death functionality. + +########################################################### + +%prep +%autosetup -n %{name}-%{commit} -p1 + +########################################################### + +%build +autoreconf -i +export CFLAGS="$RPM_OPT_FLAGS -Wall -Werror" +%configure +make %{?_smp_mflags} + +########################################################### + +%install + +make DESTDIR=$RPM_BUILD_ROOT LIBDIR=%{_libdir} install +rm -rf ${RPM_BUILD_ROOT}%{_libdir}/stonith + +%if %{defined _unitdir} +install -D -m 0644 src/sbd.service $RPM_BUILD_ROOT/%{_unitdir}/sbd.service +install -D -m 0644 src/sbd_remote.service $RPM_BUILD_ROOT/%{_unitdir}/sbd_remote.service +%endif + +mkdir -p ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig +install -m 644 src/sbd.sysconfig ${RPM_BUILD_ROOT}%{_sysconfdir}/sysconfig/sbd + +########################################################### + +%clean +rm -rf %{buildroot} + +%if %{defined _unitdir} +%post +%systemd_post sbd.service +%systemd_post sbd_remote.service +if [ $1 -ne 1 ] ; then + if systemctl --quiet is-enabled sbd.service 2>/dev/null + then + systemctl --quiet reenable sbd.service 2>/dev/null || : + fi + if systemctl --quiet is-enabled sbd_remote.service 2>/dev/null + then + systemctl --quiet reenable sbd_remote.service 2>/dev/null || : + fi +fi + +%preun +%systemd_preun sbd.service +%systemd_preun sbd_remote.service + +%postun +%systemd_postun sbd.service +%systemd_postun sbd_remote.service +%endif + +%files +########################################################### +%defattr(-,root,root) +%config(noreplace) %{_sysconfdir}/sysconfig/sbd +%{_sbindir}/sbd +#%{_datadir}/sbd +%doc %{_mandir}/man8/sbd* +%if %{defined _unitdir} +%{_unitdir}/sbd.service +%{_unitdir}/sbd_remote.service +%endif +%doc COPYING + +%changelog +* Mon Dec 17 2018 Klaus Wenninger - 1.3.1-18 +- make timeout-action executed by sbd configurable + + Resolves: rhbz#1660147 + +* Mon Dec 3 2018 Klaus Wenninger - 1.3.1-17 +- use pacemaker's new pe api with constructors/destructors + + Resolves: rhbz#1650663 + +* Wed Sep 19 2018 Klaus Wenninger - 1.3.1-16 +- avoid statting potential symlink-targets in /dev + + Resolves: rhbz#1629020 + +* Wed Sep 19 2018 Klaus Wenninger - 1.3.1-15 +- rebuild against new versions of libqb (1.0.3-7.el8), + corosync (2.99.3-4.el8) and pacemaker (2.0.0-9.el8) + + Related: rhbz#1615945 + +* Fri Sep 14 2018 Klaus Wenninger - 1.3.1-14 +- skip symlinks pointing to dev-nodes outside of /dev + + Resolves: rhbz#1629020 + +* Wed Sep 5 2018 Klaus Wenninger - 1.3.1-13 +- Require systemd-package during build to have the macros + + Resolves: rhbz#1625553 + +* Mon Jul 30 2018 Florian Weimer - 1.3.1-12 +- Rebuild with fixed binutils + +* Tue Jul 3 2018 - 1.3.1-11 +- replaced tarball by version downloaded from github + +* Mon Jul 2 2018 - 1.3.1-10 +- removed unneeded python build-dependency +- updated legacy corosync-devel to corosynclib-devel + + Resolves: rhbz#1595856 + +* Fri May 4 2018 - 1.3.1-9 +- use cib-api directly as get_cib_copy gone with + pacemaker 2.0.0 +- add sys/sysmacros.h to build with glibc-2.25 +- enlarge string buffer to satisfy newer gcc +- no corosync 1 support with pacemaker 2.0.0 +- set default to LOG_NOTICE + overhaul levels +- refactor proc-parsing +- adaptions for daemon-names changed with + pacemaker 2.0.0 rc3 +- added .do-not-sync-with-fedora + Resolves: rhbz#1571797 + +* Mon Apr 16 2018 - 1.3.1-8 +- Added aarch64 target + + Resolves: rhbz#1568029 + +* Mon Jan 15 2018 - 1.3.1-7 +- reenable sbd on upgrade so that additional + links to make pacemaker properly depend on + sbd are created + + Resolves: rhbz#1525981 + +* Wed Jan 10 2018 - 1.3.1-5 +- add man sections for query- & test-watchdog + + Resolves: rhbz#1462002 + +* Wed Dec 20 2017 - 1.3.1-3 +- mention timeout caveat with SBD_DELAY_START + in configuration template +- make systemd wait for sbd-start to finish + before starting pacemaker or dlm + + Resolves: rhbz#1525981 + +* Fri Nov 3 2017 - 1.3.1-2 +- rebase to upstream v1.3.1 + + Resolves: rhbz#1499864 + rhbz#1468580 + rhbz#1462002 + +* Wed Jun 7 2017 - 1.3.0-3 +- prevent creation of duplicate servants +- check 2Node flag in corosync to support + 2-node-clusters with shared disk fencing +- move disk-triggered reboot/off/crashdump + to inquisitor to have sysrq observed by watchdog + + Resolves: rhbz#1413951 + +* Sun Mar 26 2017 - 1.3.0-1 +- rebase to upstream v1.3.0 +- remove watchdog-limitation from description + Resolves: rhbz#1413951 + +* Mon Feb 27 2017 - 1.2.1-23 +- if shared-storage enabled check for node-name <= 63 chars + Resolves: rhbz#1413951 + +* Tue Jan 31 2017 - 1.2.1-22 +- Rebuild with shared-storage enabled +- Package original manpage +- Added ppc64le target + Resolves: rhbz#1413951 + +* Fri Apr 15 2016 - 1.2.1-21 +- Rebuild for new pacemaker + Resolves: rhbz#1320400 + +* Fri Apr 15 2016 - 1.2.1-20 +- tarball updated to c511b0692784a7085df4b1ae35748fb318fa79ee + from https://github.com/Clusterlabs/sbd + Resolves: rhbz#1324240 + +* Thu Jul 23 2015 - 1.2.1-5 +- Rebuild for pacemaker + +* Tue Jun 02 2015 - 1.2.1-4 +- Include the dist tag in the release string +- Rebuild for new pacemaker + +* Mon Jan 12 2015 - 1.2.1-3 +- Correctly parse SBD_WATCHDOG_TIMEOUT into seconds (not milliseconds) + +* Mon Oct 27 2014 - 1.2.1-2 +- Correctly enable /proc/pid validation for sbd_lock_running() + +* Fri Oct 24 2014 - 1.2.1-1 +- Further improve integration with the el7 environment + +* Thu Oct 16 2014 - 1.2.1-0.5.872e82f3.git +- Disable unsupported functionality (for now) + +* Wed Oct 15 2014 - 1.2.1-0.4.872e82f3.git +- Improved integration with the el7 environment + +* Tue Sep 30 2014 - 1.2.1-0.3.8f912945.git +- Only build on archs supported by the HA Add-on + +* Fri Aug 29 2014 - 1.2.1-0.2.8f912945.git +- Remove some additional SUSE-isms + +* Fri Aug 29 2014 - 1.2.1-0.1.8f912945.git +- Prepare for package review