|
|
2107d2 |
From 2c9ce6a7667e09010d498ca9d9bcf6e476123bbb Mon Sep 17 00:00:00 2001
|
|
|
2107d2 |
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
|
|
2107d2 |
Date: Thu, 20 May 2021 12:51:12 +0200
|
|
|
2107d2 |
Subject: [PATCH] fence_mpath: watchdog retries support
|
|
|
2107d2 |
|
|
|
2107d2 |
Added options to be able to be able to set number of retries and sleep between retries.
|
|
|
2107d2 |
---
|
|
|
2107d2 |
agents/mpath/fence_mpath.py | 44 +++++++++++++++++++++--------
|
|
|
2107d2 |
tests/data/metadata/fence_mpath.xml | 4 ++-
|
|
|
2107d2 |
2 files changed, 35 insertions(+), 13 deletions(-)
|
|
|
2107d2 |
|
|
|
2107d2 |
diff --git a/agents/mpath/fence_mpath.py b/agents/mpath/fence_mpath.py
|
|
|
2107d2 |
index 08dab6536..61ab1f205 100644
|
|
|
2107d2 |
--- a/agents/mpath/fence_mpath.py
|
|
|
2107d2 |
+++ b/agents/mpath/fence_mpath.py
|
|
|
2107d2 |
@@ -4,6 +4,7 @@
|
|
|
2107d2 |
import stat
|
|
|
2107d2 |
import re
|
|
|
2107d2 |
import os
|
|
|
2107d2 |
+import time
|
|
|
2107d2 |
import logging
|
|
|
2107d2 |
import atexit
|
|
|
2107d2 |
import ctypes
|
|
|
2107d2 |
@@ -167,14 +168,20 @@ def dev_read(options, fail=True):
|
|
|
2107d2 |
store_fh.close()
|
|
|
2107d2 |
return dev_key
|
|
|
2107d2 |
|
|
|
2107d2 |
-def mpath_check_get_verbose():
|
|
|
2107d2 |
+def mpath_check_get_options(options):
|
|
|
2107d2 |
try:
|
|
|
2107d2 |
- f = open("/etc/sysconfig/watchdog", "r")
|
|
|
2107d2 |
+ f = open("/etc/sysconfig/stonith", "r")
|
|
|
2107d2 |
except IOError:
|
|
|
2107d2 |
- return False
|
|
|
2107d2 |
- match = re.search(r"^\s*verbose=yes", "".join(f.readlines()), re.MULTILINE)
|
|
|
2107d2 |
+ return options
|
|
|
2107d2 |
+
|
|
|
2107d2 |
+ match = re.findall(r"^\s*(\S*)\s*=\s*(\S*)\s*", "".join(f.readlines()), re.MULTILINE)
|
|
|
2107d2 |
+
|
|
|
2107d2 |
+ for m in match:
|
|
|
2107d2 |
+ options[m[0].lower()] = m[1].lower()
|
|
|
2107d2 |
+
|
|
|
2107d2 |
f.close()
|
|
|
2107d2 |
- return bool(match)
|
|
|
2107d2 |
+
|
|
|
2107d2 |
+ return options
|
|
|
2107d2 |
|
|
|
2107d2 |
def mpath_check(hardreboot=False):
|
|
|
2107d2 |
if len(sys.argv) >= 3 and sys.argv[1] == "repair":
|
|
|
2107d2 |
@@ -183,18 +190,27 @@ def mpath_check(hardreboot=False):
|
|
|
2107d2 |
options["--mpathpersist-path"] = "/usr/sbin/mpathpersist"
|
|
|
2107d2 |
options["--store-path"] = "/var/run/cluster"
|
|
|
2107d2 |
options["--power-timeout"] = "5"
|
|
|
2107d2 |
- if mpath_check_get_verbose():
|
|
|
2107d2 |
+ options["retry"] = "0"
|
|
|
2107d2 |
+ options["retry-sleep"] = "1"
|
|
|
2107d2 |
+ options = mpath_check_get_options(options)
|
|
|
2107d2 |
+ if "verbose" in options and options["verbose"] == "yes":
|
|
|
2107d2 |
logging.getLogger().setLevel(logging.DEBUG)
|
|
|
2107d2 |
devs = dev_read(options, fail=False)
|
|
|
2107d2 |
if not devs:
|
|
|
2107d2 |
logging.error("No devices found")
|
|
|
2107d2 |
return 0
|
|
|
2107d2 |
for dev, key in list(devs.items()):
|
|
|
2107d2 |
- if key in get_registration_keys(options, dev, fail=False):
|
|
|
2107d2 |
- logging.debug("key " + key + " registered with device " + dev)
|
|
|
2107d2 |
- return 0
|
|
|
2107d2 |
- else:
|
|
|
2107d2 |
- logging.debug("key " + key + " not registered with device " + dev)
|
|
|
2107d2 |
+ for n in range(int(options["retry"]) + 1):
|
|
|
2107d2 |
+ if n > 0:
|
|
|
2107d2 |
+ logging.debug("retry: " + str(n) + " of " + options["retry"])
|
|
|
2107d2 |
+ if key in get_registration_keys(options, dev, fail=False):
|
|
|
2107d2 |
+ logging.debug("key " + key + " registered with device " + dev)
|
|
|
2107d2 |
+ return 0
|
|
|
2107d2 |
+ else:
|
|
|
2107d2 |
+ logging.debug("key " + key + " not registered with device " + dev)
|
|
|
2107d2 |
+
|
|
|
2107d2 |
+ if n < int(options["retry"]):
|
|
|
2107d2 |
+ time.sleep(float(options["retry-sleep"]))
|
|
|
2107d2 |
logging.debug("key " + key + " registered with any devices")
|
|
|
2107d2 |
|
|
|
2107d2 |
if hardreboot == True:
|
|
|
2107d2 |
@@ -289,7 +305,11 @@ def main():
|
|
|
2107d2 |
device(s). The result is that only registered nodes may write to the \
|
|
|
2107d2 |
device(s). When a node failure occurs, the fence_mpath agent will remove the \
|
|
|
2107d2 |
key belonging to the failed node from the device(s). The failed node will no \
|
|
|
2107d2 |
-longer be able to write to the device(s). A manual reboot is required."
|
|
|
2107d2 |
+longer be able to write to the device(s). A manual reboot is required.\
|
|
|
2107d2 |
+\n.P\n\
|
|
|
2107d2 |
+When used as a watchdog device you can define e.g. retry=1, retry-sleep=2 and \
|
|
|
2107d2 |
+verbose=yes parameters in /etc/sysconfig/stonith if you have issues with it \
|
|
|
2107d2 |
+failing."
|
|
|
2107d2 |
docs["vendorurl"] = "https://www.sourceware.org/dm/"
|
|
|
2107d2 |
show_docs(options, docs)
|
|
|
2107d2 |
|
|
|
2107d2 |
diff --git a/tests/data/metadata/fence_mpath.xml b/tests/data/metadata/fence_mpath.xml
|
|
|
2107d2 |
index 524776aa0..0255d4b4e 100644
|
|
|
2107d2 |
--- a/tests/data/metadata/fence_mpath.xml
|
|
|
2107d2 |
+++ b/tests/data/metadata/fence_mpath.xml
|
|
|
2107d2 |
@@ -1,7 +1,9 @@
|
|
|
2107d2 |
|
|
|
2107d2 |
<resource-agent name="fence_mpath" shortdesc="Fence agent for multipath persistent reservation" >
|
|
|
2107d2 |
<longdesc>fence_mpath is an I/O fencing agent that uses SCSI-3 persistent reservations to control access multipath devices. Underlying devices must support SCSI-3 persistent reservations (SPC-3 or greater) as well as the "preempt-and-abort" subcommand.
|
|
|
2107d2 |
-The fence_mpath agent works by having a unique key for each node that has to be set in /etc/multipath.conf. Once registered, a single node will become the reservation holder by creating a "write exclusive, registrants only" reservation on the device(s). The result is that only registered nodes may write to the device(s). When a node failure occurs, the fence_mpath agent will remove the key belonging to the failed node from the device(s). The failed node will no longer be able to write to the device(s). A manual reboot is required.</longdesc>
|
|
|
2107d2 |
+The fence_mpath agent works by having a unique key for each node that has to be set in /etc/multipath.conf. Once registered, a single node will become the reservation holder by creating a "write exclusive, registrants only" reservation on the device(s). The result is that only registered nodes may write to the device(s). When a node failure occurs, the fence_mpath agent will remove the key belonging to the failed node from the device(s). The failed node will no longer be able to write to the device(s). A manual reboot is required.
|
|
|
2107d2 |
+
|
|
|
2107d2 |
+When used as a watchdog device you can define e.g. retry=1, retry-sleep=2 and verbose=yes parameters in /etc/sysconfig/stonith if you have issues with it failing.</longdesc>
|
|
|
2107d2 |
<vendor-url>https://www.sourceware.org/dm/</vendor-url>
|
|
|
2107d2 |
<parameters>
|
|
|
2107d2 |
<parameter name="action" unique="0" required="1">
|