diff --git a/.gitignore b/.gitignore index 2e2cf84..facb02b 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/sbd-25fce8a7d5e8cd5abc2379077381b10bd6cec183.tar.gz +SOURCES/sbd-bfeee963f7363720da91a018045ca6746d822ba0.tar.gz diff --git a/.sbd.metadata b/.sbd.metadata index 7513edf..7b728ea 100644 --- a/.sbd.metadata +++ b/.sbd.metadata @@ -1 +1 @@ -4b2d6feee6235758e2e3000bcad71ff059246b13 SOURCES/sbd-25fce8a7d5e8cd5abc2379077381b10bd6cec183.tar.gz +e68c21479e24b1660731125391ed8473e361bb95 SOURCES/sbd-bfeee963f7363720da91a018045ca6746d822ba0.tar.gz diff --git a/SOURCES/0001-Fix-regressions.sh-make-parameter-passing-consistent.patch b/SOURCES/0001-Fix-regressions.sh-make-parameter-passing-consistent.patch deleted file mode 100644 index 6f17a5a..0000000 --- a/SOURCES/0001-Fix-regressions.sh-make-parameter-passing-consistent.patch +++ /dev/null @@ -1,82 +0,0 @@ -From 1d2a7b8d059d4f090b351b8decca0ddf274c82a0 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 20 Nov 2019 15:20:19 +0100 -Subject: [PATCH] Fix: regressions.sh: make parameter passing consistent - ---- - tests/regressions.sh | 24 ++++++++++++------------ - 1 file changed, 12 insertions(+), 12 deletions(-) - -diff --git a/tests/regressions.sh b/tests/regressions.sh -index 6cfb303..7ab80be 100755 ---- a/tests/regressions.sh -+++ b/tests/regressions.sh -@@ -32,7 +32,7 @@ - : ${SBD_USE_DM:="yes"} - - sbd() { -- LD_PRELOAD=${SBD_PRELOAD} SBD_WATCHDOG_TIMEOUT=5 SBD_DEVICE="${SBD_DEVICE}" SBD_PRELOAD_LOG=${SBD_PRELOAD_LOG} SBD_WATCHDOG_DEV=/dev/watchdog setsid ${SBD_BINARY} -p ${SBD_PIDFILE} $* -+ LD_PRELOAD=${SBD_PRELOAD} SBD_WATCHDOG_TIMEOUT=5 SBD_DEVICE="${SBD_DEVICE}" SBD_PRELOAD_LOG=${SBD_PRELOAD_LOG} SBD_WATCHDOG_DEV=/dev/watchdog setsid ${SBD_BINARY} -p ${SBD_PIDFILE} "$@" - } - - sbd_wipe_disk() { -@@ -98,26 +98,26 @@ sbd_daemon_cleanup() { - pkill -TERM --pidfile ${SBD_PIDFILE} 2>/dev/null - sleep 5 - pkill -KILL --pidfile ${SBD_PIDFILE} 2>/dev/null -- pkill -KILL --parent $(cat ${SBD_PIDFILE} 2>/dev/null) 2>/dev/null -+ pkill -KILL --parent "$(cat ${SBD_PIDFILE} 2>/dev/null)" 2>/dev/null - echo > ${SBD_PIDFILE} - } - - _ok() { -- echo -- $@ -- $@ -+ echo "-- $*" -+ "$@" - rc=$? - if [ $rc -ne 0 ]; then -- echo "$@ failed with $rc" -+ echo "$* failed with $rc" - exit $rc - fi - } - - _no() { -- echo -- $@ -- $@ -+ echo "-- $*" -+ "$@" - rc=$? - if [ $rc -eq 0 ]; then -- echo "$@ did NOT fail ($rc)" -+ echo "$* did NOT fail ($rc)" - exit $rc - fi - return 0 -@@ -126,7 +126,7 @@ _no() { - _in_log() { - grep "$@" ${SBD_PRELOAD_LOG} >/dev/null - if [ $? -ne 0 ]; then -- echo "didn't find '$@' in log:" -+ echo "didn't find '$*' in log:" - cat ${SBD_PRELOAD_LOG} - sbd_daemon_cleanup - exit 1 -@@ -227,10 +227,10 @@ test_stall_inquisitor() { - sbd_daemon_cleanup - sbd -d ${D[1]} -d ${D[2]} -d ${D[3]} -n test-1 watch - sleep 10 -- _ok kill -0 $(cat ${SBD_PIDFILE}) -- kill -STOP $(cat ${SBD_PIDFILE}) -+ _ok kill -0 "$(cat ${SBD_PIDFILE})" -+ kill -STOP "$(cat ${SBD_PIDFILE})" - sleep 10 -- kill -CONT $(cat ${SBD_PIDFILE}) 2>/dev/null -+ kill -CONT "$(cat ${SBD_PIDFILE})" 2>/dev/null - _in_log "watchdog fired" - } - --- -1.8.3.1 - diff --git a/SOURCES/0002-Doc-add-environment-section-to-man-page.patch b/SOURCES/0002-Doc-add-environment-section-to-man-page.patch deleted file mode 100644 index 2ad9556..0000000 --- a/SOURCES/0002-Doc-add-environment-section-to-man-page.patch +++ /dev/null @@ -1,1459 +0,0 @@ -From 9dd82a8b4daa5a7bd8ab3afa43b081f212efb1ac Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 29 Jan 2020 20:34:18 +0100 -Subject: [PATCH] Doc: add environment section to man-page - -Environment section is auto-generated from sbd.sysconfig. ---- - .gitignore | 1 + - Makefile.am | 6 +- - README.md | 3 +- - man/Makefile.am | 8 +- - man/sbd.8.pod | 668 ----------------------------------------------------- - man/sbd.8.pod.in | 675 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ - src/sbd.sysconfig | 3 +- - 7 files changed, 690 insertions(+), 674 deletions(-) - delete mode 100644 man/sbd.8.pod - create mode 100644 man/sbd.8.pod.in - -diff --git a/Makefile.am b/Makefile.am -index 1c29f75..bd4346d 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -9,8 +9,8 @@ TARFILE = $(distdir).tar.gz - DIST_ARCHIVES = $(TARFILE) - KEEP_EXISTING_TAR = no - INJECT_GIT_COMMIT = yes --DISTCLEANFILES = sbd-* sbd-*/ - CLEANFILES = *.rpm *.tar.* sbd-* -+DISTCLEANFILES = sbd-* sbd-*/ - - RPM_ROOT = $(shell pwd) - RPM_OPTS = --define "_sourcedir $(RPM_ROOT)" \ -@@ -31,7 +31,7 @@ export SBD_BINARY := src/sbd - export SBD_PRELOAD := tests/.libs/libsbdtestbed.so - export SBD_USE_DM := no - --EXTRA_DIST = sbd.spec tests/regressions.sh -+EXTRA_DIST = sbd.spec tests/regressions.sh man/sbd.8.pod.in - - export: - rm -f $(PACKAGE)-HEAD.tar.* -@@ -43,7 +43,7 @@ export: - echo `date`: Using existing tarball: $(TARFILE); \ - else \ - rm -f $(PACKAGE).tar.*; \ -- (git archive --prefix=$(distdir)/ $(shell echo $(TAG)|cut -f1 -d-) || tar -c --transform="s,^,$(distdir)/," --exclude="*.tar.*" --exclude="$(distdir)" --exclude="*.o" --exclude="*.8" --exclude="config.*" --exclude="libtool" --exclusive="ltmain.sh*" --exclude="Makefile" --exclude="Makefile.in" --exclude="stamp-*" --exclude="*.service" --exclude="sbd" --exclude="*.m4" --exclude="*.cache" --exclude="configure" --exclude="*.list" --exclude="depcomp" --exclude="install-sh" --exclude="missing" --exclude="compile" --exclude="sbd.sh" --exclude="~" --exclude="*.swp" --exclude="*.patch" --exclude="*.diff" --exclude="*.orig" --exclude="*.rej" --exclude="*.rpm" --exclude=".deps" --exclude="test-driver" *) | gzip > $(TARFILE); \ -+ (git archive --prefix=$(distdir)/ $(shell echo $(TAG)|cut -f1 -d-) || tar -c --transform="s,^,$(distdir)/," --exclude="*.tar.*" --exclude="$(distdir)" --exclude="*.o" --exclude="*.8" --exclude="config.*" --exclude="libtool" --exclude="ltmain.sh*" --exclude="Makefile" --exclude="Makefile.in" --exclude="stamp-*" --exclude="*.service" --exclude="sbd" --exclude="*.m4" --exclude="*.cache" --exclude="configure" --exclude="*.list" --exclude="depcomp" --exclude="install-sh" --exclude="missing" --exclude="compile" --exclude="sbd.sh" --exclude="~" --exclude="*.swp" --exclude="*.patch" --exclude="*.diff" --exclude="*.orig" --exclude="*.rej" --exclude="*.rpm" --exclude="*.pod" --exclude=".deps" --exclude="test-driver" *) | gzip > $(TARFILE); \ - if test -n "$$(git status -s)" || test "$(INJECT_GIT_COMMIT)" = "yes"; then \ - if test -n "$$(git status -s)"; then git diff HEAD --name-only|grep -v "^\."|xargs -n1 git diff HEAD > uncommitted.diff; fi; \ - rm -rf $(distdir); tar -xzf $(TARFILE); rm $(TARFILE); \ -diff --git a/README.md b/README.md -index d02a8bd..42a3fde 100644 ---- a/README.md -+++ b/README.md -@@ -5,5 +5,6 @@ A highly reliable fencing or Shoot-the-other-node-in-the-head (STONITH) mechanis - The component works with Pacemaker clusters, and is currently known to - compile and function on Pacemaker 1.1.7+ and corosync 1.4.x or 2.3.x. - --Please see https://github.com/l-mb/sbd/blob/master/man/sbd.8.pod for the full documentation. -+Please see https://github.com/clusterlabs/sbd/blob/master/man/sbd.8.pod.in & -+https://github.com/clusterlabs/sbd/blob/master/src/sbd.sysconfig for the full documentation. - -diff --git a/man/Makefile.am b/man/Makefile.am -index 3f89085..995712d 100644 ---- a/man/Makefile.am -+++ b/man/Makefile.am -@@ -1,6 +1,12 @@ - dist_man_MANS = sbd.8 - --EXTRA_DIST = sbd.8.pod -+DISTCLEANFILES = sbd.8.pod sbd.8 sbd.sysconfig.pod -+ -+sbd.sysconfig.pod: ../src/sbd.sysconfig -+ sed -r -n -e "s/^## Type: (.*)/Allows C<\1>/;t type;s/^## Default: (.*)/ defaulting to C<\1>/;t default;s/^#*(.*)=.*/=item B<\1>\n/;t variable;s/^#*//;s/^ *//;H;d;:type;h;d;:default;H;x;s/\n//;x;d;:variable;G;p" $< > $@ -+ -+sbd.8.pod: sbd.8.pod.in sbd.sysconfig.pod -+ sed -e "s/@environment_section@//;t insert;p;d;:insert;rsbd.sysconfig.pod" $< > $@ - - sbd.8: sbd.8.pod - @POD2MAN@ -s 8 -c "STONITH Block Device" -r "SBD" -n "SBD" $< $@ -diff --git a/man/sbd.8.pod b/man/sbd.8.pod -deleted file mode 100644 -index 377c579..0000000 ---- a/man/sbd.8.pod -+++ /dev/null -@@ -1,668 +0,0 @@ --=head1 NAME -- --sbd - STONITH Block Device daemon -- --=head1 SYNOPSIS -- --sbd <-d F> [options] C -- --=head1 SUMMARY -- --SBD provides a node fencing mechanism (Shoot the other node in the head, --STONITH) for Pacemaker-based clusters through the exchange of messages --via shared block storage such as for example a SAN, iSCSI, FCoE. This --isolates the fencing mechanism from changes in firmware version or --dependencies on specific firmware controllers, and it can be used as a --STONITH mechanism in all configurations that have reliable shared --storage. -- --SBD can also be used without any shared storage. In this mode, the --watchdog device will be used to reset the node if it loses quorum, if --any monitored daemon is lost and not recovered or if Pacemaker decides --that the node requires fencing. -- --The F binary implements both the daemon that watches the message --slots as well as the management tool for interacting with the block --storage device(s). This mode of operation is specified via the --C parameter; some of these modes take additional parameters. -- --To use SBD with shared storage, you must first C the messaging --layout on one to three block devices. Second, configure --F to list those devices (and possibly adjust other --options), and restart the cluster stack on each node to ensure that --C is started. Third, configure the C fencing --resource in the Pacemaker CIB. -- --Each of these steps is documented in more detail below the description --of the command options. -- --C can only be used as root. -- --=head2 GENERAL OPTIONS -- --=over -- --=item B<-d> F -- --Specify the block device(s) to be used. If you have more than one, --specify this option up to three times. This parameter is mandatory for --all modes, since SBD always needs a block device to interact with. -- --This man page uses F, F, and F as --example device names for brevity. However, in your production --environment, you should instead always refer to them by using the long, --stable device name (e.g., --F). -- --=item B<-v|-vv|-vvv> -- --Enable verbose|debug|debug-library logging (optional) -- --=item B<-h> -- --Display a concise summary of C options. -- --=item B<-n> I -- --Set local node name; defaults to C. This should not need to be --set. -- --=item B<-R> -- --Do B enable realtime priority. By default, C runs at realtime --priority, locks itself into memory, and also acquires highest IO --priority to protect itself against interference from other processes on --the system. This is a debugging-only option. -- --=item B<-I> I -- --Async IO timeout (defaults to 3 seconds, optional). You should not need --to adjust this unless your IO setup is really very slow. -- --(In daemon mode, the watchdog is refreshed when the majority of devices --could be read within this time.) -- --=back -- --=head2 create -- --Example usage: -- -- sbd -d /dev/sdc2 -d /dev/sdd3 create -- --If you specify the I command, sbd will write a metadata header --to the device(s) specified and also initialize the messaging slots for --up to 255 nodes. -- --B: This command will not prompt for confirmation. Roughly the --first megabyte of the specified block device(s) will be overwritten --immediately and without backup. -- --This command accepts a few options to adjust the default timings that --are written to the metadata (to ensure they are identical across all --nodes accessing the device). -- --=over -- --=item B<-1> I -- --Set watchdog timeout to N seconds. This depends mostly on your storage --latency; the majority of devices must be successfully read within this --time, or else the node will self-fence. -- --If your sbd device(s) reside on a multipath setup or iSCSI, this should --be the time required to detect a path failure. You may be able to reduce --this if your device outages are independent, or if you are using the --Pacemaker integration. -- --=item B<-2> I -- --Set slot allocation timeout to N seconds. You should not need to tune --this. -- --=item B<-3> I -- --Set daemon loop timeout to N seconds. You should not need to tune this. -- --=item B<-4> I -- --Set I timeout to N seconds. This should be twice the I --timeout. This is the time after which a message written to a node's slot --will be considered delivered. (Or long enough for the node to detect --that it needed to self-fence.) -- --This also affects the I in Pacemaker's CIB; see below. -- --=back -- --=head2 list -- --Example usage: -- -- # sbd -d /dev/sda1 list -- 0 hex-0 clear -- 1 hex-7 clear -- 2 hex-9 clear -- --List all allocated slots on device, and messages. You should see all --cluster nodes that have ever been started against this device. Nodes --that are currently running should have a I state; nodes that have --been fenced, but not yet restarted, will show the appropriate fencing --message. -- --=head2 dump -- --Example usage: -- -- # sbd -d /dev/sda1 dump -- ==Dumping header on disk /dev/sda1 -- Header version : 2 -- Number of slots : 255 -- Sector size : 512 -- Timeout (watchdog) : 15 -- Timeout (allocate) : 2 -- Timeout (loop) : 1 -- Timeout (msgwait) : 30 -- ==Header on disk /dev/sda1 is dumped -- --Dump meta-data header from device. -- --=head2 watch -- --Example usage: -- -- sbd -d /dev/sdc2 -d /dev/sdd3 -P watch -- --This command will make C start in daemon mode. It will constantly monitor --the message slot of the local node for incoming messages, reachability, and --optionally take Pacemaker's state into account. -- --C B be started on boot before the cluster stack! See below --for enabling this according to your boot environment. -- --The options for this mode are rarely specified directly on the --commandline directly, but most frequently set via F. -- --It also constantly monitors connectivity to the storage device, and --self-fences in case the partition becomes unreachable, guaranteeing that it --does not disconnect from fencing messages. -- --A node slot is automatically allocated on the device(s) the first time --the daemon starts watching the device; hence, manual allocation is not --usually required. -- --If a watchdog is used together with the C as is strongly --recommended, the watchdog is activated at initial start of the sbd --daemon. The watchdog is refreshed every time the majority of SBD devices --has been successfully read. Using a watchdog provides additional --protection against C crashing. -- --If the Pacemaker integration is activated, C will B self-fence --if device majority is lost, if: -- --=over -- --=item 1. -- --The partition the node is in is still quorate according to the CIB; -- --=item 2. -- --it is still quorate according to Corosync's node count; -- --=item 3. -- --the node itself is considered online and healthy by Pacemaker. -- --=back -- --This allows C to survive temporary outages of the majority of --devices. However, while the cluster is in such a degraded state, it can --neither successfully fence nor be shutdown cleanly (as taking the --cluster below the quorum threshold will immediately cause all remaining --nodes to self-fence). In short, it will not tolerate any further faults. --Please repair the system before continuing. -- --There is one C process that acts as a master to which all watchers --report; one per device to monitor the node's slot; and, optionally, one --that handles the Pacemaker integration. -- --=over -- --=item B<-W> -- --Enable or disable use of the system watchdog to protect against the sbd --processes failing and the node being left in an undefined state. Specify --this once to enable, twice to disable. -- --Defaults to I. -- --=item B<-w> F -- --This can be used to override the default watchdog device used and should not --usually be necessary. -- --=item B<-p> F -- --This option can be used to specify a pidfile for the main sbd process. -- --=item B<-F> I -- --Number of failures before a failing servant process will not be restarted --immediately until the dampening delay has expired. If set to zero, servants --will be restarted immediately and indefinitely. If set to one, a failed --servant will be restarted once every B<-t> seconds. If set to a different --value, the servant will be restarted that many times within the dampening --period and then delay. -- --Defaults to I<1>. -- --=item B<-t> I -- --Dampening delay before faulty servants are restarted. Combined with C<-F 1>, --the most logical way to tune the restart frequency of servant processes. --Default is 5 seconds. -- --If set to zero, processes will be restarted indefinitely and immediately. -- --=item B<-P> -- --Enable Pacemaker integration which checks Pacemaker quorum and node health. --Specify this once to enable, twice to disable. -- --Defaults to I. -- --=item B<-S> I -- --Set the start mode. (Defaults to I<0>.) -- --If this is set to zero, sbd will always start up unconditionally, --regardless of whether the node was previously fenced or not. -- --If set to one, sbd will only start if the node was previously shutdown --cleanly (as indicated by an exit request message in the slot), or if the --slot is empty. A reset, crashdump, or power-off request in any slot will --halt the start up. -- --This is useful to prevent nodes from rejoining if they were faulty. The --node must be manually "unfenced" by sending an empty message to it: -- -- sbd -d /dev/sda1 message node1 clear -- --=item B<-s> I -- --Set the start-up wait time for devices. (Defaults to I<120>.) -- --Dynamic block devices such as iSCSI might not be fully initialized and --present yet. This allows one to set a timeout for waiting for devices to --appear on start-up. If set to 0, start-up will be aborted immediately if --no devices are available. -- --=item B<-Z> -- --Enable trace mode. B Specifying this once will turn all reboots or power-offs, be --they caused by self-fence decisions or messages, into a crashdump. --Specifying this twice will just log them but not continue running. -- --=item B<-T> -- --By default, the daemon will set the watchdog timeout as specified in the --device metadata. However, this does not work for every watchdog device. --In this case, you must manually ensure that the watchdog timeout used by --the system correctly matches the SBD settings, and then specify this --option to allow C to continue with start-up. -- --=item B<-5> I -- --Warn if the time interval for tickling the watchdog exceeds this many seconds. --Since the node is unable to log the watchdog expiry (it reboots immediately --without a chance to write its logs to disk), this is very useful for getting --an indication that the watchdog timeout is too short for the IO load of the --system. -- --Default is 3 seconds, set to zero to disable. -- --=item B<-C> I -- --Watchdog timeout to set before crashdumping. If SBD is set to crashdump --instead of reboot - either via the trace mode settings or the I --fencing agent's parameter -, SBD will adjust the watchdog timeout to this --setting before triggering the dump. Otherwise, the watchdog might trigger and --prevent a successful crashdump from ever being written. -- --Set to zero (= default) to disable. -- --=item B<-r> I -- --Actions to be executed when the watchers don't timely report to the sbd --master process or one of the watchers detects that the master process --has died. -- --Set timeout-action to comma-separated combination of --noflush|flush plus reboot|crashdump|off. --If just one of both is given the other stays at the default. -- --This doesn't affect actions like off, crashdump, reboot explicitly --triggered via message slots. --And it does as well not configure the action a watchdog would --trigger should it run off (there is no generic interface). -- --Defaults to flush,reboot. -- --=back -- --=head2 allocate -- --Example usage: -- -- sbd -d /dev/sda1 allocate node1 -- --Explicitly allocates a slot for the specified node name. This should --rarely be necessary, as every node will automatically allocate itself a --slot the first time it starts up on watch mode. -- --=head2 message -- --Example usage: -- -- sbd -d /dev/sda1 message node1 test -- --Writes the specified message to node's slot. This is rarely done --directly, but rather abstracted via the C fencing agent --configured as a cluster resource. -- --Supported message types are: -- --=over -- --=item test -- --This only generates a log message on the receiving node and can be used --to check if SBD is seeing the device. Note that this could overwrite a --fencing request send by the cluster, so should not be used during --production. -- --=item reset -- --Reset the target upon receipt of this message. -- --=item off -- --Power-off the target. -- --=item crashdump -- --Cause the target node to crashdump. -- --=item exit -- --This will make the C daemon exit cleanly on the target. You should --B send this message manually; this is handled properly during --shutdown of the cluster stack. Manually stopping the daemon means the --node is unprotected! -- --=item clear -- --This message indicates that no real message has been sent to the node. --You should not set this manually; C will clear the message slot --automatically during start-up, and setting this manually could overwrite --a fencing message by the cluster. -- --=back -- --=head2 query-watchdog -- --Example usage: -- -- sbd query-watchdog -- --Check for available watchdog devices and print some info. -- --B: This command will arm the watchdog during query, and if your --watchdog refuses disarming (for example, if its kernel module has the --'nowayout' parameter set) this will reset your system. -- --=head2 test-watchdog -- --Example usage: -- -- sbd test-watchdog [-w /dev/watchdog3] -- --Test specified watchdog device (/dev/watchdog by default). -- --B: This command will arm the watchdog and have your system reset --in case your watchdog is working properly! If issued from an interactive --session, it will prompt for confirmation. -- --=head1 Base system configuration -- --=head2 Configure a watchdog -- --It is highly recommended that you configure your Linux system to load a --watchdog driver with hardware assistance (as is available on most modern --systems), such as I, I, or others. As a fall-back, you --can use the I module. -- --No other software must access the watchdog timer; it can only be --accessed by one process at any given time. Some hardware vendors ship --systems management software that use the watchdog for system resets --(f.e. HP ASR daemon). Such software has to be disabled if the watchdog --is to be used by SBD. -- --=head2 Choosing and initializing the block device(s) -- --First, you have to decide if you want to use one, two, or three devices. -- --If you are using multiple ones, they should reside on independent --storage setups. Putting all three of them on the same logical unit for --example would not provide any additional redundancy. -- --The SBD device can be connected via Fibre Channel, Fibre Channel over --Ethernet, or even iSCSI. Thus, an iSCSI target can become a sort-of --network-based quorum server; the advantage is that it does not require --a smart host at your third location, just block storage. -- --The SBD partitions themselves B be mirrored (via MD, --DRBD, or the storage layer itself), since this could result in a --split-mirror scenario. Nor can they reside on cLVM2 volume groups, since --they must be accessed by the cluster stack before it has started the --cLVM2 daemons; hence, these should be either raw partitions or logical --units on (multipath) storage. -- --The block device(s) must be accessible from all nodes. (While it is not --necessary that they share the same path name on all nodes, this is --considered a very good idea.) -- --SBD will only use about one megabyte per device, so you can easily --create a small partition, or very small logical units. (The size of the --SBD device depends on the block size of the underlying device. Thus, 1MB --is fine on plain SCSI devices and SAN storage with 512 byte blocks. On --the IBM s390x architecture in particular, disks default to 4k blocks, --and thus require roughly 4MB.) -- --The number of devices will affect the operation of SBD as follows: -- --=over -- --=item One device -- --In its most simple implementation, you use one device only. This is --appropriate for clusters where all your data is on the same shared --storage (with internal redundancy) anyway; the SBD device does not --introduce an additional single point of failure then. -- --If the SBD device is not accessible, the daemon will fail to start and --inhibit startup of cluster services. -- --=item Two devices -- --This configuration is a trade-off, primarily aimed at environments where --host-based mirroring is used, but no third storage device is available. -- --SBD will not commit suicide if it loses access to one mirror leg; this --allows the cluster to continue to function even in the face of one outage. -- --However, SBD will not fence the other side while only one mirror leg is --available, since it does not have enough knowledge to detect an asymmetric --split of the storage. So it will not be able to automatically tolerate a --second failure while one of the storage arrays is down. (Though you --can use the appropriate crm command to acknowledge the fence manually.) -- --It will not start unless both devices are accessible on boot. -- --=item Three devices -- --In this most reliable and recommended configuration, SBD will only --self-fence if more than one device is lost; hence, this configuration is --resilient against temporary single device outages (be it due to failures --or maintenance). Fencing messages can still be successfully relayed if --at least two devices remain accessible. -- --This configuration is appropriate for more complex scenarios where --storage is not confined to a single array. For example, host-based --mirroring solutions could have one SBD per mirror leg (not mirrored --itself), and an additional tie-breaker on iSCSI. -- --It will only start if at least two devices are accessible on boot. -- --=back -- --After you have chosen the devices and created the appropriate partitions --and perhaps multipath alias names to ease management, use the C --command described above to initialize the SBD metadata on them. -- --=head3 Sharing the block device(s) between multiple clusters -- --It is possible to share the block devices between multiple clusters, --provided the total number of nodes accessing them does not exceed I<255> --nodes, and they all must share the same SBD timeouts (since these are --part of the metadata). -- --If you are using multiple devices this can reduce the setup overhead --required. However, you should B share devices between clusters in --different security domains. -- --=head2 Configure SBD to start on boot -- --On systems using C, the C or C system --start-up scripts must handle starting or stopping C as required --before starting the rest of the cluster stack. -- --For C, sbd simply has to be enabled using -- -- systemctl enable sbd.service -- --The daemon is brought online on each node before corosync and Pacemaker --are started, and terminated only after all other cluster components have --been shut down - ensuring that cluster resources are never activated --without SBD supervision. -- --=head2 Configuration via sysconfig -- --The system instance of C is configured via F. --In this file, you must specify the device(s) used, as well as any --options to pass to the daemon: -- -- SBD_DEVICE="/dev/sda1;/dev/sdb1;/dev/sdc1" -- SBD_PACEMAKER="true" -- --C will fail to start if no C is specified. See the --installed template for more options that can be configured here. --In general configuration done via parameters takes precedence over --the configuration from the configuration file. -- --=head2 Testing the sbd installation -- --After a restart of the cluster stack on this node, you can now try --sending a test message to it as root, from this or any other node: -- -- sbd -d /dev/sda1 message node1 test -- --The node will acknowledge the receipt of the message in the system logs: -- -- Aug 29 14:10:00 node1 sbd: [13412]: info: Received command test from node2 -- --This confirms that SBD is indeed up and running on the node, and that it --is ready to receive messages. -- --Make B that F is identical on all cluster --nodes, and that all cluster nodes are running the daemon. -- --=head1 Pacemaker CIB integration -- --=head2 Fencing resource -- --Pacemaker can only interact with SBD to issue a node fence if there is a --configure fencing resource. This should be a primitive, not a clone, as --follows: -- -- primitive fencing-sbd stonith:external/sbd \ -- params pcmk_delay_max=30 -- --This will automatically use the same devices as configured in --F. -- --While you should not configure this as a clone (as Pacemaker will register --the fencing device on each node automatically), the I --setting enables random fencing delay which ensures, in a scenario where a --split-brain scenario did occur in a two node cluster, that one of the nodes --has a better chance to survive to avoid double fencing. -- --SBD also supports turning the reset request into a crash request, which --may be helpful for debugging if you have kernel crashdumping configured; --then, every fence request will cause the node to dump core. You can --enable this via the C parameter on the fencing --resource. This is B recommended for production use, but only for --debugging phases. -- --=head2 General cluster properties -- --You must also enable STONITH in general, and set the STONITH timeout to --be at least twice the I timeout you have configured, to allow --enough time for the fencing message to be delivered. If your I --timeout is 60 seconds, this is a possible configuration: -- -- property stonith-enabled="true" -- property stonith-timeout="120s" -- --B: if I is too low for I and the --system overhead, sbd will never be able to successfully complete a fence --request. This will create a fencing loop. -- --Note that the sbd fencing agent will try to detect this and --automatically extend the I setting to a reasonable --value, on the assumption that sbd modifying your configuration is --preferable to not fencing. -- --=head1 Management tasks -- --=head2 Recovering from temporary SBD device outage -- --If you have multiple devices, failure of a single device is not immediately --fatal. C will retry to restart the monitor for the device every 5 --seconds by default. However, you can tune this via the options to the --I command. -- --In case you wish the immediately force a restart of all currently --disabled monitor processes, you can send a I to the SBD --I process. -- -- --=head1 LICENSE -- --Copyright (C) 2008-2013 Lars Marowsky-Bree -- --This program is free software; you can redistribute it and/or --modify it under the terms of the GNU General Public --License as published by the Free Software Foundation; either --version 2 of the License, or (at your option) any later version. -- --This software is distributed in the hope that it will be useful, --but WITHOUT ANY WARRANTY; without even the implied warranty of --MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --General Public License for more details. -- --For details see the GNU General Public License at --http://www.gnu.org/licenses/gpl-2.0.html (version 2) and/or --http://www.gnu.org/licenses/gpl.html (the newest as per "any later"). -diff --git a/man/sbd.8.pod.in b/man/sbd.8.pod.in -new file mode 100644 -index 0000000..ff89c82 ---- /dev/null -+++ b/man/sbd.8.pod.in -@@ -0,0 +1,675 @@ -+=head1 NAME -+ -+sbd - STONITH Block Device daemon -+ -+=head1 SYNOPSIS -+ -+sbd <-d F> [options] C -+ -+=head1 SUMMARY -+ -+SBD provides a node fencing mechanism (Shoot the other node in the head, -+STONITH) for Pacemaker-based clusters through the exchange of messages -+via shared block storage such as for example a SAN, iSCSI, FCoE. This -+isolates the fencing mechanism from changes in firmware version or -+dependencies on specific firmware controllers, and it can be used as a -+STONITH mechanism in all configurations that have reliable shared -+storage. -+ -+SBD can also be used without any shared storage. In this mode, the -+watchdog device will be used to reset the node if it loses quorum, if -+any monitored daemon is lost and not recovered or if Pacemaker decides -+that the node requires fencing. -+ -+The F binary implements both the daemon that watches the message -+slots as well as the management tool for interacting with the block -+storage device(s). This mode of operation is specified via the -+C parameter; some of these modes take additional parameters. -+ -+To use SBD with shared storage, you must first C the messaging -+layout on one to three block devices. Second, configure -+F to list those devices (and possibly adjust other -+options), and restart the cluster stack on each node to ensure that -+C is started. Third, configure the C fencing -+resource in the Pacemaker CIB. -+ -+Each of these steps is documented in more detail below the description -+of the command options. -+ -+C can only be used as root. -+ -+=head2 GENERAL OPTIONS -+ -+=over -+ -+=item B<-d> F -+ -+Specify the block device(s) to be used. If you have more than one, -+specify this option up to three times. This parameter is mandatory for -+all modes, since SBD always needs a block device to interact with. -+ -+This man page uses F, F, and F as -+example device names for brevity. However, in your production -+environment, you should instead always refer to them by using the long, -+stable device name (e.g., -+F). -+ -+=item B<-v|-vv|-vvv> -+ -+Enable verbose|debug|debug-library logging (optional) -+ -+=item B<-h> -+ -+Display a concise summary of C options. -+ -+=item B<-n> I -+ -+Set local node name; defaults to C. This should not need to be -+set. -+ -+=item B<-R> -+ -+Do B enable realtime priority. By default, C runs at realtime -+priority, locks itself into memory, and also acquires highest IO -+priority to protect itself against interference from other processes on -+the system. This is a debugging-only option. -+ -+=item B<-I> I -+ -+Async IO timeout (defaults to 3 seconds, optional). You should not need -+to adjust this unless your IO setup is really very slow. -+ -+(In daemon mode, the watchdog is refreshed when the majority of devices -+could be read within this time.) -+ -+=back -+ -+=head2 create -+ -+Example usage: -+ -+ sbd -d /dev/sdc2 -d /dev/sdd3 create -+ -+If you specify the I command, sbd will write a metadata header -+to the device(s) specified and also initialize the messaging slots for -+up to 255 nodes. -+ -+B: This command will not prompt for confirmation. Roughly the -+first megabyte of the specified block device(s) will be overwritten -+immediately and without backup. -+ -+This command accepts a few options to adjust the default timings that -+are written to the metadata (to ensure they are identical across all -+nodes accessing the device). -+ -+=over -+ -+=item B<-1> I -+ -+Set watchdog timeout to N seconds. This depends mostly on your storage -+latency; the majority of devices must be successfully read within this -+time, or else the node will self-fence. -+ -+If your sbd device(s) reside on a multipath setup or iSCSI, this should -+be the time required to detect a path failure. You may be able to reduce -+this if your device outages are independent, or if you are using the -+Pacemaker integration. -+ -+=item B<-2> I -+ -+Set slot allocation timeout to N seconds. You should not need to tune -+this. -+ -+=item B<-3> I -+ -+Set daemon loop timeout to N seconds. You should not need to tune this. -+ -+=item B<-4> I -+ -+Set I timeout to N seconds. This should be twice the I -+timeout. This is the time after which a message written to a node's slot -+will be considered delivered. (Or long enough for the node to detect -+that it needed to self-fence.) -+ -+This also affects the I in Pacemaker's CIB; see below. -+ -+=back -+ -+=head2 list -+ -+Example usage: -+ -+ # sbd -d /dev/sda1 list -+ 0 hex-0 clear -+ 1 hex-7 clear -+ 2 hex-9 clear -+ -+List all allocated slots on device, and messages. You should see all -+cluster nodes that have ever been started against this device. Nodes -+that are currently running should have a I state; nodes that have -+been fenced, but not yet restarted, will show the appropriate fencing -+message. -+ -+=head2 dump -+ -+Example usage: -+ -+ # sbd -d /dev/sda1 dump -+ ==Dumping header on disk /dev/sda1 -+ Header version : 2 -+ Number of slots : 255 -+ Sector size : 512 -+ Timeout (watchdog) : 15 -+ Timeout (allocate) : 2 -+ Timeout (loop) : 1 -+ Timeout (msgwait) : 30 -+ ==Header on disk /dev/sda1 is dumped -+ -+Dump meta-data header from device. -+ -+=head2 watch -+ -+Example usage: -+ -+ sbd -d /dev/sdc2 -d /dev/sdd3 -P watch -+ -+This command will make C start in daemon mode. It will constantly monitor -+the message slot of the local node for incoming messages, reachability, and -+optionally take Pacemaker's state into account. -+ -+C B be started on boot before the cluster stack! See below -+for enabling this according to your boot environment. -+ -+The options for this mode are rarely specified directly on the -+commandline directly, but most frequently set via F. -+ -+It also constantly monitors connectivity to the storage device, and -+self-fences in case the partition becomes unreachable, guaranteeing that it -+does not disconnect from fencing messages. -+ -+A node slot is automatically allocated on the device(s) the first time -+the daemon starts watching the device; hence, manual allocation is not -+usually required. -+ -+If a watchdog is used together with the C as is strongly -+recommended, the watchdog is activated at initial start of the sbd -+daemon. The watchdog is refreshed every time the majority of SBD devices -+has been successfully read. Using a watchdog provides additional -+protection against C crashing. -+ -+If the Pacemaker integration is activated, C will B self-fence -+if device majority is lost, if: -+ -+=over -+ -+=item 1. -+ -+The partition the node is in is still quorate according to the CIB; -+ -+=item 2. -+ -+it is still quorate according to Corosync's node count; -+ -+=item 3. -+ -+the node itself is considered online and healthy by Pacemaker. -+ -+=back -+ -+This allows C to survive temporary outages of the majority of -+devices. However, while the cluster is in such a degraded state, it can -+neither successfully fence nor be shutdown cleanly (as taking the -+cluster below the quorum threshold will immediately cause all remaining -+nodes to self-fence). In short, it will not tolerate any further faults. -+Please repair the system before continuing. -+ -+There is one C process that acts as a master to which all watchers -+report; one per device to monitor the node's slot; and, optionally, one -+that handles the Pacemaker integration. -+ -+=over -+ -+=item B<-W> -+ -+Enable or disable use of the system watchdog to protect against the sbd -+processes failing and the node being left in an undefined state. Specify -+this once to enable, twice to disable. -+ -+Defaults to I. -+ -+=item B<-w> F -+ -+This can be used to override the default watchdog device used and should not -+usually be necessary. -+ -+=item B<-p> F -+ -+This option can be used to specify a pidfile for the main sbd process. -+ -+=item B<-F> I -+ -+Number of failures before a failing servant process will not be restarted -+immediately until the dampening delay has expired. If set to zero, servants -+will be restarted immediately and indefinitely. If set to one, a failed -+servant will be restarted once every B<-t> seconds. If set to a different -+value, the servant will be restarted that many times within the dampening -+period and then delay. -+ -+Defaults to I<1>. -+ -+=item B<-t> I -+ -+Dampening delay before faulty servants are restarted. Combined with C<-F 1>, -+the most logical way to tune the restart frequency of servant processes. -+Default is 5 seconds. -+ -+If set to zero, processes will be restarted indefinitely and immediately. -+ -+=item B<-P> -+ -+Enable Pacemaker integration which checks Pacemaker quorum and node health. -+Specify this once to enable, twice to disable. -+ -+Defaults to I. -+ -+=item B<-S> I -+ -+Set the start mode. (Defaults to I<0>.) -+ -+If this is set to zero, sbd will always start up unconditionally, -+regardless of whether the node was previously fenced or not. -+ -+If set to one, sbd will only start if the node was previously shutdown -+cleanly (as indicated by an exit request message in the slot), or if the -+slot is empty. A reset, crashdump, or power-off request in any slot will -+halt the start up. -+ -+This is useful to prevent nodes from rejoining if they were faulty. The -+node must be manually "unfenced" by sending an empty message to it: -+ -+ sbd -d /dev/sda1 message node1 clear -+ -+=item B<-s> I -+ -+Set the start-up wait time for devices. (Defaults to I<120>.) -+ -+Dynamic block devices such as iSCSI might not be fully initialized and -+present yet. This allows one to set a timeout for waiting for devices to -+appear on start-up. If set to 0, start-up will be aborted immediately if -+no devices are available. -+ -+=item B<-Z> -+ -+Enable trace mode. B Specifying this once will turn all reboots or power-offs, be -+they caused by self-fence decisions or messages, into a crashdump. -+Specifying this twice will just log them but not continue running. -+ -+=item B<-T> -+ -+By default, the daemon will set the watchdog timeout as specified in the -+device metadata. However, this does not work for every watchdog device. -+In this case, you must manually ensure that the watchdog timeout used by -+the system correctly matches the SBD settings, and then specify this -+option to allow C to continue with start-up. -+ -+=item B<-5> I -+ -+Warn if the time interval for tickling the watchdog exceeds this many seconds. -+Since the node is unable to log the watchdog expiry (it reboots immediately -+without a chance to write its logs to disk), this is very useful for getting -+an indication that the watchdog timeout is too short for the IO load of the -+system. -+ -+Default is 3 seconds, set to zero to disable. -+ -+=item B<-C> I -+ -+Watchdog timeout to set before crashdumping. If SBD is set to crashdump -+instead of reboot - either via the trace mode settings or the I -+fencing agent's parameter -, SBD will adjust the watchdog timeout to this -+setting before triggering the dump. Otherwise, the watchdog might trigger and -+prevent a successful crashdump from ever being written. -+ -+Set to zero (= default) to disable. -+ -+=item B<-r> I -+ -+Actions to be executed when the watchers don't timely report to the sbd -+master process or one of the watchers detects that the master process -+has died. -+ -+Set timeout-action to comma-separated combination of -+noflush|flush plus reboot|crashdump|off. -+If just one of both is given the other stays at the default. -+ -+This doesn't affect actions like off, crashdump, reboot explicitly -+triggered via message slots. -+And it does as well not configure the action a watchdog would -+trigger should it run off (there is no generic interface). -+ -+Defaults to flush,reboot. -+ -+=back -+ -+=head2 allocate -+ -+Example usage: -+ -+ sbd -d /dev/sda1 allocate node1 -+ -+Explicitly allocates a slot for the specified node name. This should -+rarely be necessary, as every node will automatically allocate itself a -+slot the first time it starts up on watch mode. -+ -+=head2 message -+ -+Example usage: -+ -+ sbd -d /dev/sda1 message node1 test -+ -+Writes the specified message to node's slot. This is rarely done -+directly, but rather abstracted via the C fencing agent -+configured as a cluster resource. -+ -+Supported message types are: -+ -+=over -+ -+=item test -+ -+This only generates a log message on the receiving node and can be used -+to check if SBD is seeing the device. Note that this could overwrite a -+fencing request send by the cluster, so should not be used during -+production. -+ -+=item reset -+ -+Reset the target upon receipt of this message. -+ -+=item off -+ -+Power-off the target. -+ -+=item crashdump -+ -+Cause the target node to crashdump. -+ -+=item exit -+ -+This will make the C daemon exit cleanly on the target. You should -+B send this message manually; this is handled properly during -+shutdown of the cluster stack. Manually stopping the daemon means the -+node is unprotected! -+ -+=item clear -+ -+This message indicates that no real message has been sent to the node. -+You should not set this manually; C will clear the message slot -+automatically during start-up, and setting this manually could overwrite -+a fencing message by the cluster. -+ -+=back -+ -+=head2 query-watchdog -+ -+Example usage: -+ -+ sbd query-watchdog -+ -+Check for available watchdog devices and print some info. -+ -+B: This command will arm the watchdog during query, and if your -+watchdog refuses disarming (for example, if its kernel module has the -+'nowayout' parameter set) this will reset your system. -+ -+=head2 test-watchdog -+ -+Example usage: -+ -+ sbd test-watchdog [-w /dev/watchdog3] -+ -+Test specified watchdog device (/dev/watchdog by default). -+ -+B: This command will arm the watchdog and have your system reset -+in case your watchdog is working properly! If issued from an interactive -+session, it will prompt for confirmation. -+ -+=head1 Base system configuration -+ -+=head2 Configure a watchdog -+ -+It is highly recommended that you configure your Linux system to load a -+watchdog driver with hardware assistance (as is available on most modern -+systems), such as I, I, or others. As a fall-back, you -+can use the I module. -+ -+No other software must access the watchdog timer; it can only be -+accessed by one process at any given time. Some hardware vendors ship -+systems management software that use the watchdog for system resets -+(f.e. HP ASR daemon). Such software has to be disabled if the watchdog -+is to be used by SBD. -+ -+=head2 Choosing and initializing the block device(s) -+ -+First, you have to decide if you want to use one, two, or three devices. -+ -+If you are using multiple ones, they should reside on independent -+storage setups. Putting all three of them on the same logical unit for -+example would not provide any additional redundancy. -+ -+The SBD device can be connected via Fibre Channel, Fibre Channel over -+Ethernet, or even iSCSI. Thus, an iSCSI target can become a sort-of -+network-based quorum server; the advantage is that it does not require -+a smart host at your third location, just block storage. -+ -+The SBD partitions themselves B be mirrored (via MD, -+DRBD, or the storage layer itself), since this could result in a -+split-mirror scenario. Nor can they reside on cLVM2 volume groups, since -+they must be accessed by the cluster stack before it has started the -+cLVM2 daemons; hence, these should be either raw partitions or logical -+units on (multipath) storage. -+ -+The block device(s) must be accessible from all nodes. (While it is not -+necessary that they share the same path name on all nodes, this is -+considered a very good idea.) -+ -+SBD will only use about one megabyte per device, so you can easily -+create a small partition, or very small logical units. (The size of the -+SBD device depends on the block size of the underlying device. Thus, 1MB -+is fine on plain SCSI devices and SAN storage with 512 byte blocks. On -+the IBM s390x architecture in particular, disks default to 4k blocks, -+and thus require roughly 4MB.) -+ -+The number of devices will affect the operation of SBD as follows: -+ -+=over -+ -+=item One device -+ -+In its most simple implementation, you use one device only. This is -+appropriate for clusters where all your data is on the same shared -+storage (with internal redundancy) anyway; the SBD device does not -+introduce an additional single point of failure then. -+ -+If the SBD device is not accessible, the daemon will fail to start and -+inhibit startup of cluster services. -+ -+=item Two devices -+ -+This configuration is a trade-off, primarily aimed at environments where -+host-based mirroring is used, but no third storage device is available. -+ -+SBD will not commit suicide if it loses access to one mirror leg; this -+allows the cluster to continue to function even in the face of one outage. -+ -+However, SBD will not fence the other side while only one mirror leg is -+available, since it does not have enough knowledge to detect an asymmetric -+split of the storage. So it will not be able to automatically tolerate a -+second failure while one of the storage arrays is down. (Though you -+can use the appropriate crm command to acknowledge the fence manually.) -+ -+It will not start unless both devices are accessible on boot. -+ -+=item Three devices -+ -+In this most reliable and recommended configuration, SBD will only -+self-fence if more than one device is lost; hence, this configuration is -+resilient against temporary single device outages (be it due to failures -+or maintenance). Fencing messages can still be successfully relayed if -+at least two devices remain accessible. -+ -+This configuration is appropriate for more complex scenarios where -+storage is not confined to a single array. For example, host-based -+mirroring solutions could have one SBD per mirror leg (not mirrored -+itself), and an additional tie-breaker on iSCSI. -+ -+It will only start if at least two devices are accessible on boot. -+ -+=back -+ -+After you have chosen the devices and created the appropriate partitions -+and perhaps multipath alias names to ease management, use the C -+command described above to initialize the SBD metadata on them. -+ -+=head3 Sharing the block device(s) between multiple clusters -+ -+It is possible to share the block devices between multiple clusters, -+provided the total number of nodes accessing them does not exceed I<255> -+nodes, and they all must share the same SBD timeouts (since these are -+part of the metadata). -+ -+If you are using multiple devices this can reduce the setup overhead -+required. However, you should B share devices between clusters in -+different security domains. -+ -+=head2 Configure SBD to start on boot -+ -+On systems using C, the C or C system -+start-up scripts must handle starting or stopping C as required -+before starting the rest of the cluster stack. -+ -+For C, sbd simply has to be enabled using -+ -+ systemctl enable sbd.service -+ -+The daemon is brought online on each node before corosync and Pacemaker -+are started, and terminated only after all other cluster components have -+been shut down - ensuring that cluster resources are never activated -+without SBD supervision. -+ -+=head2 Configuration via sysconfig -+ -+The system instance of C is configured via F. -+In this file, you must specify the device(s) used, as well as any -+options to pass to the daemon: -+ -+ SBD_DEVICE="/dev/sda1;/dev/sdb1;/dev/sdc1" -+ SBD_PACEMAKER="true" -+ -+C will fail to start if no C is specified. See the -+installed template or section for configuration via environment -+for more options that can be configured here. -+In general configuration done via parameters takes precedence over -+the configuration from the configuration file. -+ -+=head2 Configuration via environment -+ -+=over -+@environment_section@ -+=back -+ -+=head2 Testing the sbd installation -+ -+After a restart of the cluster stack on this node, you can now try -+sending a test message to it as root, from this or any other node: -+ -+ sbd -d /dev/sda1 message node1 test -+ -+The node will acknowledge the receipt of the message in the system logs: -+ -+ Aug 29 14:10:00 node1 sbd: [13412]: info: Received command test from node2 -+ -+This confirms that SBD is indeed up and running on the node, and that it -+is ready to receive messages. -+ -+Make B that F is identical on all cluster -+nodes, and that all cluster nodes are running the daemon. -+ -+=head1 Pacemaker CIB integration -+ -+=head2 Fencing resource -+ -+Pacemaker can only interact with SBD to issue a node fence if there is a -+configure fencing resource. This should be a primitive, not a clone, as -+follows: -+ -+ primitive fencing-sbd stonith:external/sbd \ -+ params pcmk_delay_max=30 -+ -+This will automatically use the same devices as configured in -+F. -+ -+While you should not configure this as a clone (as Pacemaker will register -+the fencing device on each node automatically), the I -+setting enables random fencing delay which ensures, in a scenario where a -+split-brain scenario did occur in a two node cluster, that one of the nodes -+has a better chance to survive to avoid double fencing. -+ -+SBD also supports turning the reset request into a crash request, which -+may be helpful for debugging if you have kernel crashdumping configured; -+then, every fence request will cause the node to dump core. You can -+enable this via the C parameter on the fencing -+resource. This is B recommended for production use, but only for -+debugging phases. -+ -+=head2 General cluster properties -+ -+You must also enable STONITH in general, and set the STONITH timeout to -+be at least twice the I timeout you have configured, to allow -+enough time for the fencing message to be delivered. If your I -+timeout is 60 seconds, this is a possible configuration: -+ -+ property stonith-enabled="true" -+ property stonith-timeout="120s" -+ -+B: if I is too low for I and the -+system overhead, sbd will never be able to successfully complete a fence -+request. This will create a fencing loop. -+ -+Note that the sbd fencing agent will try to detect this and -+automatically extend the I setting to a reasonable -+value, on the assumption that sbd modifying your configuration is -+preferable to not fencing. -+ -+=head1 Management tasks -+ -+=head2 Recovering from temporary SBD device outage -+ -+If you have multiple devices, failure of a single device is not immediately -+fatal. C will retry to restart the monitor for the device every 5 -+seconds by default. However, you can tune this via the options to the -+I command. -+ -+In case you wish the immediately force a restart of all currently -+disabled monitor processes, you can send a I to the SBD -+I process. -+ -+ -+=head1 LICENSE -+ -+Copyright (C) 2008-2013 Lars Marowsky-Bree -+ -+This program is free software; you can redistribute it and/or -+modify it under the terms of the GNU General Public -+License as published by the Free Software Foundation; either -+version 2 of the License, or (at your option) any later version. -+ -+This software is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+General Public License for more details. -+ -+For details see the GNU General Public License at -+http://www.gnu.org/licenses/gpl-2.0.html (version 2) and/or -+http://www.gnu.org/licenses/gpl.html (the newest as per "any later"). -diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig -index e1a60ed..33b50d0 100644 ---- a/src/sbd.sysconfig -+++ b/src/sbd.sysconfig -@@ -14,7 +14,7 @@ - # - SBD_PACEMAKER=yes - --## Type: list(always,clean) -+## Type: always / clean - ## Default: always - # - # Specify the start mode for sbd. Setting this to "clean" will only -@@ -103,6 +103,7 @@ SBD_TIMEOUT_ACTION=flush,reboot - # Thus in auto-mode sbd will check if the slice has RT-budget assigned. - # If that is the case sbd will stay in that slice while it will - # be moved to root-slice otherwise. -+# - SBD_MOVE_TO_ROOT_CGROUP=auto - - ## Type: string --- -1.8.3.1 - diff --git a/SOURCES/0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch b/SOURCES/0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch deleted file mode 100644 index 10602b7..0000000 --- a/SOURCES/0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 3048119bf4a0ddb2da01d4ca827ae659a089b622 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 24 Jun 2020 14:33:21 +0200 -Subject: [PATCH] Fix: sbd-pacemaker: handle new no_quorum_demote - -and be robust against unknown no-quorum-policies handling them -as would be done with no_quorum_suicide ---- - configure.ac | 17 ++++++++++++++++- - src/sbd-pacemaker.c | 11 ++++++++++- - 2 files changed, 26 insertions(+), 2 deletions(-) - -diff --git a/configure.ac b/configure.ac -index 02e2678..3391c5f 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -89,7 +89,22 @@ AC_CHECK_LIB(cib, cib_apply_patch_event, , missing="yes") - - dnl pacemaker-2.0 removed support for corosync 1 cluster layer - AC_CHECK_DECLS([pcmk_cluster_classic_ais, pcmk_cluster_cman],,, -- [#include ]) -+ [#include ]) -+ -+dnl check for additional no-quorum-policies -+dnl AC_TEST_NO_QUORUM_POLICY(POLICY) -+AC_DEFUN([AC_TEST_NO_QUORUM_POLICY],[ -+ AC_MSG_CHECKING([whether enum pe_quorum_policy defines value $1]) -+ AC_LANG_PUSH([C]) -+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM( -+ [#include ], -+ [enum pe_quorum_policy policy = $1; return policy;])], -+ AC_DEFINE_UNQUOTED(m4_toupper(HAVE_ENUM_$1), 1, -+ [Does pe_types.h have $1 value in enum pe_quorum_policy?]) -+ AC_MSG_RESULT([yes]), AC_MSG_RESULT([no])) -+ AC_LANG_POP([C]) -+]) -+AC_TEST_NO_QUORUM_POLICY(no_quorum_demote) - - dnl check for new pe-API - AC_CHECK_FUNCS(pe_new_working_set) -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index 11e104d..6e53557 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -321,13 +321,22 @@ compute_status(pe_working_set_t * data_set) - case no_quorum_freeze: - set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Freeze resources"); - break; -+#if HAVE_ENUM_NO_QUORUM_DEMOTE -+ case no_quorum_demote: -+ set_servant_health(pcmk_health_transient, LOG_INFO, -+ "Quorum lost: Demote promotable resources and stop others"); -+ break; -+#endif - case no_quorum_stop: - set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Stop ALL resources"); - break; - case no_quorum_ignore: - set_servant_health(pcmk_health_transient, LOG_INFO, "Quorum lost: Ignore"); - break; -- case no_quorum_suicide: -+ default: -+ /* immediate reboot is the most excessive action we take -+ use for no_quorum_suicide and everything we don't know yet -+ */ - set_servant_health(pcmk_health_unclean, LOG_INFO, "Quorum lost: Self-fence"); - break; - } --- -1.8.3.1 - diff --git a/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch b/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch deleted file mode 100644 index 2a9b144..0000000 --- a/SOURCES/0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch +++ /dev/null @@ -1,399 +0,0 @@ -From 4c3e4049b08799094a64dac289a48deef4d3d916 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Fri, 24 Jul 2020 14:31:01 +0200 -Subject: [PATCH] Fix: sbd-cluster: match qdevice-sync_timeout against - wd-timeout - ---- - configure.ac | 13 +++ - src/sbd-cluster.c | 252 +++++++++++++++++++++++++++++++++++++++++++++--------- - 2 files changed, 223 insertions(+), 42 deletions(-) - -diff --git a/configure.ac b/configure.ac -index 3391c5f..23547cf 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -109,6 +109,12 @@ AC_TEST_NO_QUORUM_POLICY(no_quorum_demote) - dnl check for new pe-API - AC_CHECK_FUNCS(pe_new_working_set) - -+dnl check if votequorum comes with default for qdevice-sync_timeout -+AC_CHECK_DECLS([VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT], -+ HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT=1, -+ HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT=0, -+ [#include ]) -+ - if test "$missing" = "yes"; then - AC_MSG_ERROR([Missing required libraries or functions.]) - fi -@@ -140,6 +146,13 @@ AM_CONDITIONAL(CHECK_TWO_NODE, test "$HAVE_cmap" = "1") - AC_DEFINE_UNQUOTED(CHECK_VOTEQUORUM_HANDLE, $HAVE_votequorum, Turn on periodic checking of votequorum-handle) - AM_CONDITIONAL(CHECK_VOTEQUORUM_HANDLE, test "$HAVE_votequorum" = "1") - -+AC_DEFINE_UNQUOTED(CHECK_QDEVICE_SYNC_TIMEOUT, -+ ($HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT && $HAVE_cmap), -+ Turn on checking if watchdog-timeout and qdevice-sync_timeout are matching) -+AM_CONDITIONAL(CHECK_QDEVICE_SYNC_TIMEOUT, -+ test "$HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT" = "1" && -+ test "$HAVE_cmap" = "1") -+ - CONFIGDIR="" - AC_ARG_WITH(configdir, - [ --with-configdir=DIR -diff --git a/src/sbd-cluster.c b/src/sbd-cluster.c -index 13fa580..b6c5512 100644 ---- a/src/sbd-cluster.c -+++ b/src/sbd-cluster.c -@@ -33,7 +33,7 @@ - #include - #include - --#if CHECK_TWO_NODE -+#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT - #include - #endif - -@@ -86,11 +86,20 @@ sbd_plugin_membership_dispatch(cpg_handle_t handle, - static votequorum_handle_t votequorum_handle = 0; - #endif - -+#if CHECK_TWO_NODE - static bool two_node = false; -+#endif - static bool ever_seen_both = false; - static int cpg_membership_entries = -1; - --#if CHECK_TWO_NODE -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+#include -+static bool using_qdevice = false; -+static uint32_t qdevice_sync_timeout = /* in seconds */ -+ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; -+#endif -+ -+#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT - #include - - static cmap_handle_t cmap_handle = 0; -@@ -102,28 +111,59 @@ void - sbd_cpg_membership_health_update() - { - if(cpg_membership_entries > 0) { -- bool quorum_is_suspect = -+#if CHECK_TWO_NODE -+ bool quorum_is_suspect_two_node = - (two_node && ever_seen_both && cpg_membership_entries == 1); -+#endif -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ bool quorum_is_suspect_qdevice_timing = -+ using_qdevice && (qdevice_sync_timeout > timeout_watchdog); -+#endif - -- if (!quorum_is_suspect) { -+ do { -+#if CHECK_TWO_NODE -+ if (quorum_is_suspect_two_node) { -+ /* Alternative would be asking votequorum for number of votes. -+ * Using pacemaker's cpg as source for number of active nodes -+ * avoids binding to an additional library, is definitely -+ * less code to write and we wouldn't have to combine data -+ * from 3 sources (cmap, cpg & votequorum) in a potentially -+ * racy environment. -+ */ -+ set_servant_health(pcmk_health_noquorum, LOG_WARNING, -+ "Connected to %s but requires both nodes present", -+ name_for_cluster_type(get_cluster_type()) -+ ); -+ break; -+ } -+#endif -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ if (quorum_is_suspect_qdevice_timing) { -+ /* We can't really trust quorum info as qdevice-sync_timeout -+ * makes reaction of quorum too sluggish for our -+ * watchdog-timeout. -+ */ -+ set_servant_health(pcmk_health_noquorum, LOG_WARNING, -+ "Connected to %s but quorum using qdevice is distrusted " -+ "for SBD as qdevice-sync_timeout (%ds) > watchdog-timeout " -+ "(%lus).", -+ name_for_cluster_type(get_cluster_type()), -+ qdevice_sync_timeout, timeout_watchdog -+ ); -+ break; -+ } -+#endif - set_servant_health(pcmk_health_online, LOG_INFO, -- "Connected to %s (%u members)", -- name_for_cluster_type(get_cluster_type()), -- cpg_membership_entries -- ); -- } else { -- /* Alternative would be asking votequorum for number of votes. -- * Using pacemaker's cpg as source for number of active nodes -- * avoids binding to an additional library, is definitely -- * less code to write and we wouldn't have to combine data -- * from 3 sources (cmap, cpq & votequorum) in a potentially -- * racy environment. -- */ -- set_servant_health(pcmk_health_noquorum, LOG_WARNING, -- "Connected to %s but requires both nodes present", -- name_for_cluster_type(get_cluster_type()) -- ); -- } -+ "Connected to %s (%u members)%s", -+ name_for_cluster_type(get_cluster_type()), -+ cpg_membership_entries, -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ using_qdevice?" using qdevice for quorum":"" -+#else -+ "" -+#endif -+ ); -+ } while (false); - - if (cpg_membership_entries > 1) { - ever_seen_both = true; -@@ -146,7 +186,7 @@ sbd_cpg_membership_dispatch(cpg_handle_t handle, - notify_parent(); - } - --#if CHECK_TWO_NODE -+#if CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT - static void sbd_cmap_notify_fn( - cmap_handle_t cmap_handle, - cmap_track_handle_t cmap_track_handle, -@@ -156,21 +196,99 @@ static void sbd_cmap_notify_fn( - struct cmap_notify_value old_val, - void *user_data) - { -- if (new_val.type == CMAP_VALUETYPE_UINT8) { -- switch (event) { -- case CMAP_TRACK_ADD: -- case CMAP_TRACK_MODIFY: -- two_node = *((uint8_t *) new_val.data); -- break; -- case CMAP_TRACK_DELETE: -- two_node = false; -- break; -- default: -- return; -- } -- sbd_cpg_membership_health_update(); -- notify_parent(); -+ switch (event) { -+ case CMAP_TRACK_ADD: -+ case CMAP_TRACK_MODIFY: -+ switch (new_val.type) { -+ case CMAP_VALUETYPE_UINT8: -+#if CHECK_TWO_NODE -+ if (!strcmp(key_name, "quorum.two_node")) { -+ two_node = *((uint8_t *) new_val.data); -+ } else { -+ return; -+ } -+ break; -+#else -+ return; -+#endif -+ case CMAP_VALUETYPE_STRING: -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ if (!strcmp(key_name, "quorum.device.model")) { -+ using_qdevice = -+ ((new_val.data) && strlen((char *) new_val.data)); -+ } else { -+ return; -+ } -+ break; -+#else -+ return; -+#endif -+ case CMAP_VALUETYPE_UINT32: -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ if (!strcmp(key_name, "quorum.device.sync_timeout")) { -+ if (new_val.data) { -+ qdevice_sync_timeout = -+ *((uint32_t *) new_val.data) / 1000; -+ } else { -+ qdevice_sync_timeout = -+ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; -+ } -+ } else { -+ return; -+ } -+ break; -+#else -+ return; -+#endif -+ default: -+ return; -+ } -+ break; -+ case CMAP_TRACK_DELETE: -+ switch (new_val.type) { -+ case CMAP_VALUETYPE_UINT8: -+#if CHECK_TWO_NODE -+ if (!strcmp(key_name, "quorum.two_node")) { -+ two_node = false; -+ } else { -+ return; -+ } -+ break; -+#else -+ return; -+#endif -+ case CMAP_VALUETYPE_STRING: -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ if (!strcmp(key_name, "quorum.device.model")) { -+ using_qdevice = false; -+ } else { -+ return; -+ } -+ break; -+#else -+ return; -+#endif -+ case CMAP_VALUETYPE_UINT32: -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ if (!strcmp(key_name, "quorum.device.sync_timeout")) { -+ qdevice_sync_timeout = -+ VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT / 1000; -+ } else { -+ return; -+ } -+ break; -+#else -+ return; -+#endif -+ default: -+ return; -+ } -+ break; -+ default: -+ return; - } -+ sbd_cpg_membership_health_update(); -+ notify_parent(); - } - - static gboolean -@@ -200,9 +318,14 @@ cmap_destroy(void) - } - - static gboolean --sbd_get_two_node(void) -+verify_against_cmap_config(void) - { -+#if CHECK_TWO_NODE - uint8_t two_node_u8 = 0; -+#endif -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ char *qdevice_model = NULL; -+#endif - int cmap_fd; - - if (!track_handle) { -@@ -211,12 +334,31 @@ sbd_get_two_node(void) - goto out; - } - -+#if CHECK_TWO_NODE - if (cmap_track_add(cmap_handle, "quorum.two_node", - CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, - sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { - cl_log(LOG_WARNING, "Failed adding CMAP tracker for 2Node-mode\n"); - goto out; - } -+#endif -+ -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ if (cmap_track_add(cmap_handle, "quorum.device.model", -+ CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, -+ sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { -+ cl_log(LOG_WARNING, "Failed adding CMAP tracker for qdevice-model\n"); -+ goto out; -+ } -+ -+ if (cmap_track_add(cmap_handle, "quorum.device.sync_timeout", -+ CMAP_TRACK_DELETE|CMAP_TRACK_MODIFY|CMAP_TRACK_ADD, -+ sbd_cmap_notify_fn, NULL, &track_handle) != CS_OK) { -+ cl_log(LOG_WARNING, -+ "Failed adding CMAP tracker for qdevice-sync_timeout\n"); -+ goto out; -+ } -+#endif - - /* add the tracker to mainloop */ - if (cmap_fd_get(cmap_handle, &cmap_fd) != CS_OK) { -@@ -232,13 +374,39 @@ sbd_get_two_node(void) - g_source_attach(cmap_source, NULL); - } - -- if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) == CS_OK) { -+#if CHECK_TWO_NODE -+ if (cmap_get_uint8(cmap_handle, "quorum.two_node", &two_node_u8) -+ == CS_OK) { - cl_log(two_node_u8? LOG_NOTICE : LOG_INFO, - "Corosync is%s in 2Node-mode", two_node_u8?"":" not"); - two_node = two_node_u8; - } else { - cl_log(LOG_INFO, "quorum.two_node not present in cmap\n"); - } -+#endif -+ -+#if CHECK_QDEVICE_SYNC_TIMEOUT -+ if (cmap_get_string(cmap_handle, "quorum.device.model", -+ &qdevice_model) == CS_OK) { -+ using_qdevice = qdevice_model && strlen(qdevice_model); -+ cl_log(using_qdevice? LOG_NOTICE : LOG_INFO, -+ "Corosync is%s using qdevice", using_qdevice?"":" not"); -+ } else { -+ cl_log(LOG_INFO, "quorum.device.model not present in cmap\n"); -+ } -+ -+ if (cmap_get_uint32(cmap_handle, "quorum.device.sync_timeout", -+ &qdevice_sync_timeout) == CS_OK) { -+ qdevice_sync_timeout /= 1000; -+ cl_log(LOG_INFO, -+ "Corosync is using qdevice-sync_timeout=%ds", -+ qdevice_sync_timeout); -+ } else { -+ cl_log(LOG_INFO, -+ "quorum.device.sync_timeout not present in cmap\n"); -+ } -+#endif -+ - return TRUE; - - out: -@@ -331,15 +499,15 @@ sbd_membership_connect(void) - } else { - cl_log(LOG_INFO, "Attempting connection to %s", name_for_cluster_type(stack)); - --#if SUPPORT_COROSYNC && CHECK_TWO_NODE -- if (sbd_get_two_node()) { -+#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) -+ if (verify_against_cmap_config()) { - #endif - - if(crm_cluster_connect(&cluster)) { - connected = true; - } - --#if SUPPORT_COROSYNC && CHECK_TWO_NODE -+#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) - } - #endif - } -@@ -362,7 +530,7 @@ sbd_membership_destroy(gpointer user_data) - cl_log(LOG_WARNING, "Lost connection to %s", name_for_cluster_type(get_cluster_type())); - - if (get_cluster_type() != pcmk_cluster_unknown) { --#if SUPPORT_COROSYNC && CHECK_TWO_NODE -+#if SUPPORT_COROSYNC && (CHECK_TWO_NODE || CHECK_QDEVICE_SYNC_TIMEOUT) - cmap_destroy(); - #endif - } --- -1.8.3.1 - diff --git a/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch b/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch deleted file mode 100644 index 6d920ab..0000000 --- a/SOURCES/0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch +++ /dev/null @@ -1,231 +0,0 @@ -From 5b5ffac4cce861f3621267a73d2ad29f6d807335 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 10 Dec 2019 13:16:45 +0100 -Subject: [PATCH] Fix: sbd-pacemaker: sync with pacemakerd for robustness - -State query ping of pacemakerd prevents pacemakerd from -starting any sub-daemons (and thus services) if sbd can't -reach it via ipc. As a health-check get timestamp from -pacemakerd. On shudown fetch info about graceful -shutdown from pacemakerd. -Use new pacemakerd-api provided by pacemaker. ---- - configure.ac | 4 ++ - src/sbd-pacemaker.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++---- - 2 files changed, 126 insertions(+), 10 deletions(-) - -diff --git a/configure.ac b/configure.ac -index 23547cf..11d12f0 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -81,6 +81,7 @@ AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes") - AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes") - AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0) - AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0) -+AC_CHECK_LIB(crmcommon, pcmk_pacemakerd_api_ping, HAVE_pacemakerd_api=1, HAVE_pacemakerd_api=0) - - dnl pacemaker >= 1.1.8 - AC_CHECK_HEADERS(crm/cluster.h) -@@ -153,6 +154,9 @@ AM_CONDITIONAL(CHECK_QDEVICE_SYNC_TIMEOUT, - test "$HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT" = "1" && - test "$HAVE_cmap" = "1") - -+AC_DEFINE_UNQUOTED(USE_PACEMAKERD_API, $HAVE_pacemakerd_api, Turn on synchronization between sbd & pacemakerd) -+AM_CONDITIONAL(USE_PACEMAKERD_API, test "$HAVE_pacemakerd_api" = "1") -+ - CONFIGDIR="" - AC_ARG_WITH(configdir, - [ --with-configdir=DIR -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index 6e53557..1243bfc 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -83,6 +83,62 @@ pe_free_working_set(pe_working_set_t *data_set) - - #endif - -+static void clean_up(int rc); -+ -+#if USE_PACEMAKERD_API -+#include -+ -+static pcmk_ipc_api_t *pacemakerd_api = NULL; -+static time_t last_ok = (time_t) 0; -+ -+static void -+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, -+ enum pcmk_ipc_event event_type, crm_exit_t status, -+ void *event_data, void *user_data) -+{ -+ pcmk_pacemakerd_api_reply_t *reply = event_data; -+ -+ switch (event_type) { -+ case pcmk_ipc_event_disconnect: -+ /* Unexpected */ -+ cl_log(LOG_ERR, "Lost connection to pacemakerd\n"); -+ return; -+ -+ case pcmk_ipc_event_reply: -+ break; -+ -+ default: -+ return; -+ } -+ -+ if (status != CRM_EX_OK) { -+ cl_log(LOG_ERR, "Bad reply from pacemakerd: %s", -+ crm_exit_str(status)); -+ return; -+ } -+ -+ if (reply->reply_type != pcmk_pacemakerd_reply_ping) { -+ cl_log(LOG_ERR, "Unknown reply type %d from pacemakerd\n", -+ reply->reply_type); -+ } else { -+ if ((reply->data.ping.last_good != (time_t) 0) && -+ (reply->data.ping.status == pcmk_rc_ok)) { -+ switch (reply->data.ping.state) { -+ case pcmk_pacemakerd_state_running: -+ case pcmk_pacemakerd_state_shutting_down: -+ last_ok = reply->data.ping.last_good; -+ break; -+ case pcmk_pacemakerd_state_shutdown_complete: -+ clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); -+ break; -+ default: -+ break; -+ } -+ } -+ } -+} -+#endif -+ - extern int disk_count; - - static void clean_up(int rc); -@@ -133,10 +189,13 @@ mon_cib_connection_destroy(gpointer user_data) - cib->cmds->signoff(cib); - /* retrigger as last one might have been skipped */ - mon_refresh_state(NULL); -+ -+#if !USE_PACEMAKERD_API - if (pcmk_clean_shutdown) { - /* assume a graceful pacemaker-shutdown */ - clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); - } -+#endif - /* getting here we aren't sure about the pacemaker-state - so try to use the timeout to reconnect and get - everything sorted out again -@@ -196,6 +255,13 @@ mon_timer_notify(gpointer data) - g_source_remove(timer_id_notify); - } - -+#if USE_PACEMAKERD_API -+ { -+ time_t now = time(NULL); -+ -+ if ((last_ok <= now) && (now - last_ok < timeout_watchdog)) { -+#endif -+ - if (cib_connected) { - if (counter == counter_max) { - mon_retrieve_current_cib(); -@@ -207,6 +273,16 @@ mon_timer_notify(gpointer data) - counter++; - } - } -+ -+#if USE_PACEMAKERD_API -+ } -+ } -+ if (pcmk_connect_ipc(pacemakerd_api, -+ pcmk_ipc_dispatch_main) == pcmk_rc_ok) { -+ pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); -+ } -+#endif -+ - timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL); - return FALSE; - } -@@ -526,6 +602,14 @@ clean_up(int rc) - cib = NULL; - } - -+#if USE_PACEMAKERD_API -+ if (pacemakerd_api != NULL) { -+ pcmk_ipc_api_t *capi = pacemakerd_api; -+ pacemakerd_api = NULL; // Ensure we can't free this twice -+ pcmk_free_ipc_api(capi); -+ } -+#endif -+ - if (rc >= 0) { - exit(rc); - } -@@ -535,11 +619,11 @@ clean_up(int rc) - int - servant_pcmk(const char *diskname, int mode, const void* argp) - { -- int exit_code = 0; -+ int exit_code = 0; - -- crm_system_name = strdup("sbd:pcmk"); -- cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); -- set_proc_title("sbd: watcher: Pacemaker"); -+ crm_system_name = strdup("sbd:pcmk"); -+ cl_log(LOG_NOTICE, "Monitoring Pacemaker health"); -+ set_proc_title("sbd: watcher: Pacemaker"); - setenv("PCMK_watchdog", "true", 1); - - if(debug == 0) { -@@ -548,12 +632,40 @@ servant_pcmk(const char *diskname, int mode, const void* argp) - } - - -- if (data_set == NULL) { -- data_set = pe_new_working_set(); -- } -- if (data_set == NULL) { -- return -1; -- } -+ if (data_set == NULL) { -+ data_set = pe_new_working_set(); -+ } -+ if (data_set == NULL) { -+ return -1; -+ } -+ -+#if USE_PACEMAKERD_API -+ { -+ int rc; -+ -+ rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); -+ if (pacemakerd_api == NULL) { -+ cl_log(LOG_ERR, "Could not connect to pacemakerd: %s\n", -+ pcmk_rc_str(rc)); -+ return -1; -+ } -+ pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL); -+ do { -+ rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main); -+ if (rc != pcmk_rc_ok) { -+ cl_log(LOG_DEBUG, "Could not connect to pacemakerd: %s\n", -+ pcmk_rc_str(rc)); -+ sleep(reconnect_msec / 1000); -+ } -+ } while (rc != pcmk_rc_ok); -+ /* send a ping to pacemakerd to wake it up */ -+ pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name); -+ /* cib should come up now as well so it's time -+ * to have the inquisitor have a closer look -+ */ -+ notify_parent(); -+ } -+#endif - - if (current_cib == NULL) { - cib = cib_new(); --- -1.8.3.1 - diff --git a/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch b/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch deleted file mode 100644 index 0c38862..0000000 --- a/SOURCES/0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch +++ /dev/null @@ -1,110 +0,0 @@ -From f4d38a073ce3bfa2078792f1cc85229457430292 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 21 Jul 2020 18:30:30 +0200 -Subject: [PATCH] Fix: make syncing of pacemaker resource startup configurable - ---- - src/sbd-inquisitor.c | 20 ++++++++++++++++++++ - src/sbd-pacemaker.c | 6 +++--- - src/sbd.h | 1 + - src/sbd.sysconfig | 14 ++++++++++++++ - 4 files changed, 38 insertions(+), 3 deletions(-) - -diff --git a/src/sbd-inquisitor.c b/src/sbd-inquisitor.c -index 52ede8a..962725e 100644 ---- a/src/sbd-inquisitor.c -+++ b/src/sbd-inquisitor.c -@@ -35,6 +35,7 @@ bool do_flush = true; - char timeout_sysrq_char = 'b'; - bool move_to_root_cgroup = true; - bool enforce_moving_to_root_cgroup = false; -+bool sync_resource_startup = false; - - int parse_device_line(const char *line); - -@@ -964,6 +965,25 @@ int main(int argc, char **argv, char **envp) - } - } - -+ value = getenv("SBD_SYNC_RESOURCE_STARTUP"); -+ if(value) { -+ sync_resource_startup = crm_is_true(value); -+ } -+#if !USE_PACEMAKERD_API -+ if (sync_resource_startup) { -+ fprintf(stderr, "Failed to sync resource-startup as " -+ "SBD was built against pacemaker not supporting pacemakerd-API.\n"); -+ exit_status = -1; -+ goto out; -+ } -+#else -+ if (!sync_resource_startup) { -+ cl_log(LOG_WARNING, "SBD built against pacemaker supporting " -+ "pacemakerd-API. Should think about enabling " -+ "SBD_SYNC_RESOURCE_STARTUP."); -+ } -+#endif -+ - while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) { - switch (c) { - case 'D': -diff --git a/src/sbd-pacemaker.c b/src/sbd-pacemaker.c -index 1243bfc..aa1fb57 100644 ---- a/src/sbd-pacemaker.c -+++ b/src/sbd-pacemaker.c -@@ -190,12 +190,12 @@ mon_cib_connection_destroy(gpointer user_data) - /* retrigger as last one might have been skipped */ - mon_refresh_state(NULL); - --#if !USE_PACEMAKERD_API -- if (pcmk_clean_shutdown) { -+ -+ if ((pcmk_clean_shutdown) && (!sync_resource_startup)) { - /* assume a graceful pacemaker-shutdown */ - clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN); - } --#endif -+ - /* getting here we aren't sure about the pacemaker-state - so try to use the timeout to reconnect and get - everything sorted out again -diff --git a/src/sbd.h b/src/sbd.h -index 382e553..3b6647c 100644 ---- a/src/sbd.h -+++ b/src/sbd.h -@@ -161,6 +161,7 @@ extern bool do_flush; - extern char timeout_sysrq_char; - extern bool move_to_root_cgroup; - extern bool enforce_moving_to_root_cgroup; -+extern bool sync_resource_startup; - - /* Global, non-tunable variables: */ - extern int sector_size; -diff --git a/src/sbd.sysconfig b/src/sbd.sysconfig -index 33b50d0..b32e826 100644 ---- a/src/sbd.sysconfig -+++ b/src/sbd.sysconfig -@@ -106,6 +106,20 @@ SBD_TIMEOUT_ACTION=flush,reboot - # - SBD_MOVE_TO_ROOT_CGROUP=auto - -+## Type: yesno -+## Default: no -+# -+# If resource startup syncing is enabled then pacemakerd is -+# gonna wait to be pinged via IPC before it starts resources. -+# On shutdown pacemakerd is going to wait in a state where it -+# has cleanly shutdown resources till sbd fetches that state. -+# -+# Default is 'no' to prevent pacemaker from waiting for a -+# ping that will never come when working together with an sbd -+# version that doesn't support the feature. -+# -+SBD_SYNC_RESOURCE_STARTUP=no -+ - ## Type: string - ## Default: "" - # --- -1.8.3.1 - diff --git a/SPECS/sbd.spec b/SPECS/sbd.spec index 0a15574..fc05c5b 100644 --- a/SPECS/sbd.spec +++ b/SPECS/sbd.spec @@ -15,25 +15,28 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # -%global commit 25fce8a7d5e8cd5abc2379077381b10bd6cec183 -%global shortcommit %(c=%{commit}; echo ${c:0:7}) +%global commit bfeee963f7363720da91a018045ca6746d822ba0 %global github_owner Clusterlabs -%global buildnum 7 +%global buildnum 1 + +%ifarch s390x s390 +# minimum timeout on LPAR diag288 watchdog is 15s +%global watchdog_timeout_default 15 +%else +%global watchdog_timeout_default 5 +%endif + +%global sync_resource_startup_default yes +%global sync_resource_startup_sysconfig yes Name: sbd Summary: Storage-based death License: GPLv2+ Group: System Environment/Daemons -Version: 1.4.1 +Version: 1.4.2 Release: %{buildnum}%{?dist} Url: https://github.com/%{github_owner}/%{name} Source0: https://github.com/%{github_owner}/%{name}/archive/%{commit}/%{name}-%{commit}.tar.gz -Patch1: 0001-Fix-regressions.sh-make-parameter-passing-consistent.patch -Patch2: 0002-Doc-add-environment-section-to-man-page.patch -Patch3: 0003-Fix-sbd-pacemaker-handle-new-no_quorum_demote.patch -Patch4: 0004-Fix-sbd-cluster-match-qdevice-sync_timeout-against-w.patch -Patch5: 0005-Fix-sbd-pacemaker-sync-with-pacemakerd-for-robustnes.patch -Patch6: 0006-Fix-make-syncing-of-pacemaker-resource-startup-confi.patch BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRequires: autoconf BuildRequires: automake @@ -49,7 +52,7 @@ BuildRequires: pkgconfig BuildRequires: systemd BuildRequires: make Conflicts: fence-agents-sbd < 4.2.1-38 -Conflicts: pacemaker-libs < 2.0.4-5 +Conflicts: pacemaker-libs < 2.0.5-4 %if 0%{?rhel} > 0 ExclusiveArch: i686 x86_64 s390x ppc64le aarch64 @@ -76,18 +79,15 @@ regression-testing sbd. %prep %autosetup -n %{name}-%{commit} -p1 -%ifarch s390x s390 -sed -i src/sbd.sysconfig -e "s/Default: 5/Default: 15/" -sed -i src/sbd.sysconfig -e "s/SBD_WATCHDOG_TIMEOUT=5/SBD_WATCHDOG_TIMEOUT=15/" -%endif -sed -i src/sbd.sysconfig -e "s/SBD_SYNC_RESOURCE_STARTUP=no/SBD_SYNC_RESOURCE_STARTUP=yes/" ########################################################### %build ./autogen.sh export CFLAGS="$RPM_OPT_FLAGS -Wall -Werror" -%configure +%configure --with-watchdog-timeout-default=%{watchdog_timeout_default} \ + --with-sync-resource-startup-default=%{sync_resource_startup_default} \ + --with-sync-resource-startup-sysconfig=%{sync_resource_startup_sysconfig} make %{?_smp_mflags} ########################################################### @@ -144,7 +144,8 @@ fi %defattr(-,root,root) %config(noreplace) %{_sysconfdir}/sysconfig/sbd %{_sbindir}/sbd -#%{_datadir}/sbd +%{_datadir}/sbd +%{_datadir}/pkgconfig/sbd.pc %exclude %{_datadir}/sbd/regressions.sh %doc %{_mandir}/man8/sbd* %if %{defined _unitdir} @@ -161,6 +162,15 @@ fi %{_libdir}/libsbdtestbed* %changelog +* Thu Dec 3 2020 Klaus Wenninger - 1.4.2-1 +- rebase to upstream v1.4.2 +- make sbd default to do pacemakerd-api handshake +- conflict with pacemaker-libs < 2.0.5-4 to assure pacemaker + is defaulting to pacemakerd-api handshake + + Resolves: rhbz#1903730 + Resolves: rhbz#1873135 + * Thu Jul 30 2020 Klaus Wenninger - 1.4.1-7 - conflict with pacemaker-libs < 2.0.4-5 instead of requiring a minimum pacemaker version