From 2c9ce6a7667e09010d498ca9d9bcf6e476123bbb Mon Sep 17 00:00:00 2001 From: Oyvind Albrigtsen Date: Thu, 20 May 2021 12:51:12 +0200 Subject: [PATCH] fence_mpath: watchdog retries support Added options to be able to be able to set number of retries and sleep between retries. --- agents/mpath/fence_mpath.py | 44 +++++++++++++++++++++-------- tests/data/metadata/fence_mpath.xml | 4 ++- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/agents/mpath/fence_mpath.py b/agents/mpath/fence_mpath.py index 08dab6536..61ab1f205 100644 --- a/agents/mpath/fence_mpath.py +++ b/agents/mpath/fence_mpath.py @@ -4,6 +4,7 @@ import stat import re import os +import time import logging import atexit import ctypes @@ -167,14 +168,20 @@ def dev_read(options, fail=True): store_fh.close() return dev_key -def mpath_check_get_verbose(): +def mpath_check_get_options(options): try: - f = open("/etc/sysconfig/watchdog", "r") + f = open("/etc/sysconfig/stonith", "r") except IOError: - return False - match = re.search(r"^\s*verbose=yes", "".join(f.readlines()), re.MULTILINE) + return options + + match = re.findall(r"^\s*(\S*)\s*=\s*(\S*)\s*", "".join(f.readlines()), re.MULTILINE) + + for m in match: + options[m[0].lower()] = m[1].lower() + f.close() - return bool(match) + + return options def mpath_check(hardreboot=False): if len(sys.argv) >= 3 and sys.argv[1] == "repair": @@ -183,18 +190,27 @@ def mpath_check(hardreboot=False): options["--mpathpersist-path"] = "/usr/sbin/mpathpersist" options["--store-path"] = "/var/run/cluster" options["--power-timeout"] = "5" - if mpath_check_get_verbose(): + options["retry"] = "0" + options["retry-sleep"] = "1" + options = mpath_check_get_options(options) + if "verbose" in options and options["verbose"] == "yes": logging.getLogger().setLevel(logging.DEBUG) devs = dev_read(options, fail=False) if not devs: logging.error("No devices found") return 0 for dev, key in list(devs.items()): - if key in get_registration_keys(options, dev, fail=False): - logging.debug("key " + key + " registered with device " + dev) - return 0 - else: - logging.debug("key " + key + " not registered with device " + dev) + for n in range(int(options["retry"]) + 1): + if n > 0: + logging.debug("retry: " + str(n) + " of " + options["retry"]) + if key in get_registration_keys(options, dev, fail=False): + logging.debug("key " + key + " registered with device " + dev) + return 0 + else: + logging.debug("key " + key + " not registered with device " + dev) + + if n < int(options["retry"]): + time.sleep(float(options["retry-sleep"])) logging.debug("key " + key + " registered with any devices") if hardreboot == True: @@ -289,7 +305,11 @@ def main(): device(s). The result is that only registered nodes may write to the \ device(s). When a node failure occurs, the fence_mpath agent will remove the \ key belonging to the failed node from the device(s). The failed node will no \ -longer be able to write to the device(s). A manual reboot is required." +longer be able to write to the device(s). A manual reboot is required.\ +\n.P\n\ +When used as a watchdog device you can define e.g. retry=1, retry-sleep=2 and \ +verbose=yes parameters in /etc/sysconfig/stonith if you have issues with it \ +failing." docs["vendorurl"] = "https://www.sourceware.org/dm/" show_docs(options, docs) diff --git a/tests/data/metadata/fence_mpath.xml b/tests/data/metadata/fence_mpath.xml index 524776aa0..0255d4b4e 100644 --- a/tests/data/metadata/fence_mpath.xml +++ b/tests/data/metadata/fence_mpath.xml @@ -1,7 +1,9 @@ fence_mpath is an I/O fencing agent that uses SCSI-3 persistent reservations to control access multipath devices. Underlying devices must support SCSI-3 persistent reservations (SPC-3 or greater) as well as the "preempt-and-abort" subcommand. -The fence_mpath agent works by having a unique key for each node that has to be set in /etc/multipath.conf. Once registered, a single node will become the reservation holder by creating a "write exclusive, registrants only" reservation on the device(s). The result is that only registered nodes may write to the device(s). When a node failure occurs, the fence_mpath agent will remove the key belonging to the failed node from the device(s). The failed node will no longer be able to write to the device(s). A manual reboot is required. +The fence_mpath agent works by having a unique key for each node that has to be set in /etc/multipath.conf. Once registered, a single node will become the reservation holder by creating a "write exclusive, registrants only" reservation on the device(s). The result is that only registered nodes may write to the device(s). When a node failure occurs, the fence_mpath agent will remove the key belonging to the failed node from the device(s). The failed node will no longer be able to write to the device(s). A manual reboot is required. + +When used as a watchdog device you can define e.g. retry=1, retry-sleep=2 and verbose=yes parameters in /etc/sysconfig/stonith if you have issues with it failing. https://www.sourceware.org/dm/