Blame SOURCES/bz1476401-4-add-fence_heuristics_ping.patch

4822a5
diff --git a/configure.ac b/configure.ac
4822a5
index 8acfef9..f9f29cf 100644
4822a5
--- a/configure.ac
4822a5
+++ b/configure.ac
4822a5
@@ -214,6 +214,27 @@ AC_PATH_PROG([SNMPGET_PATH], [snmpget], [/usr/bin/snmpget])
4822a5
 AC_PATH_PROG([MPATH_PATH], [mpathpersist], [/usr/sbin/mpathpersist])
4822a5
 AC_PATH_PROG([SUDO_PATH], [mpathpersist], [/usr/bin/sudo])
4822a5
 
4822a5
+AC_PATH_PROG([PING_CMD], [ping])
4822a5
+AC_PATH_PROG([PING6_CMD], [ping6])
4822a5
+AC_PATH_PROG([PING4_CMD], [ping4])
4822a5
+
4822a5
+if test "x${ac_cv_path_PING_CMD}" = x; then
4822a5
+	# assume multicall-ping just not available in build-environment
4822a5
+	PING_CMD="/bin/ping"
4822a5
+	PING4_CMD="/bin/ping -4"
4822a5
+	PING6_CMD="/bin/ping -6"
4822a5
+elif test "x${ac_cv_path_PING6_CMD}" = x; then
4822a5
+	# just IPv4
4822a5
+	PING4_CMD="${ac_cv_path_PING_CMD}"
4822a5
+elif test -L ${ac_cv_path_PING6_CMD}; then
4822a5
+	# assume multicall-ping
4822a5
+	PING4_CMD="${ac_cv_path_PING_CMD} -4"
4822a5
+else
4822a5
+	# ping is just IPv4
4822a5
+	PING4_CMD="${ac_cv_path_PING_CMD}"
4822a5
+fi
4822a5
+
4822a5
+
4822a5
 ## do subst
4822a5
 
4822a5
 AC_SUBST([DEFAULT_CONFIG_DIR])
4822a5
@@ -278,6 +278,7 @@
4822a5
 		 fence/agents/eaton_snmp/Makefile
4822a5
 		 fence/agents/emerson/Makefile
4822a5
 		 fence/agents/eps/Makefile
4822a5
+		 fence/agents/heuristics_ping/Makefile
4822a5
 		 fence/agents/hpblade/Makefile
4822a5
 		 fence/agents/ibmblade/Makefile
4822a5
 		 fence/agents/ipdu/Makefile
4822a5
--- /dev/null	2017-09-27 08:35:37.286500265 +0200
4822a5
+++ b/fence/agents/heuristics_ping/Makefile.am	2017-09-28 15:27:42.605317632 +0200
4822a5
@@ -0,0 +1,20 @@
4822a5
+MAINTAINERCLEANFILES	= Makefile.in
4822a5
+
4822a5
+TARGET			= fence_heuristics_ping
4822a5
+
4822a5
+SRC			= $(TARGET).py
4822a5
+
4822a5
+EXTRA_DIST		= $(SRC)
4822a5
+
4822a5
+sbin_SCRIPTS		= $(TARGET)
4822a5
+
4822a5
+man_MANS		= $(TARGET).8
4822a5
+
4822a5
+FENCE_TEST_ARGS         = --ping-targets=localhost
4822a5
+
4822a5
+include $(top_srcdir)/make/fencebuild.mk
4822a5
+include $(top_srcdir)/make/fenceman.mk
4822a5
+include $(top_srcdir)/make/agentpycheck.mk
4822a5
+
4822a5
+clean-local: clean-man
4822a5
+	rm -f $(TARGET)
4822a5
diff --git a/doc/COPYRIGHT b/doc/COPYRIGHT
4822a5
index 8124c53..49f88c6 100644
4822a5
--- a/doc/COPYRIGHT
4822a5
+++ b/doc/COPYRIGHT
4822a5
@@ -58,6 +58,7 @@ Joel Becker <joel.becker at oracle.com>
4822a5
 Jonathan Brassow <jbrassow at redhat.com>
4822a5
 jparsons <jparsons at redhat.com>
4822a5
 Ken Preslan <kpreslan at redhat.com>
4822a5
+Klaus Wenninger <kwenning at redhat.com>
4822a5
 Lon Hohberger <lhh at redhat.com>
4822a5
 Marc - A. Dahlhaus <mad at wol.de>
4822a5
 Marek 'marx' Grac <mgrac at redhat.com>
4822a5
diff --git a/fence/agents/heuristics_ping/fence_heuristics_ping.py b/fence/agents/heuristics_ping/fence_heuristics_ping.py
4822a5
new file mode 100644
4822a5
index 0000000..b21d6a4
4822a5
--- /dev/null
4822a5
+++ b/fence/agents/heuristics_ping/fence_heuristics_ping.py
4822a5
@@ -0,0 +1,200 @@
4822a5
+#!/usr/bin/python -tt
4822a5
+
4822a5
+# The Following Agent Has Been Tested On:
4822a5
+#
4822a5
+# RHEL 7.4
4822a5
+#
4822a5
+
4822a5
+import io
4822a5
+import re
4822a5
+import subprocess
4822a5
+import shlex
4822a5
+import sys, stat
4822a5
+import logging
4822a5
+import os
4822a5
+import atexit
4822a5
+sys.path.append("@FENCEAGENTSLIBDIR@")
4822a5
+from fencing import fail_usage, run_command, fence_action, all_opt
4822a5
+from fencing import atexit_handler, check_input, process_input, show_docs
4822a5
+from fencing import run_delay
4822a5
+
4822a5
+def ping_test(con, options):
4822a5
+	# Send pings to the targets
4822a5
+
4822a5
+	if options["--action"] == "on":
4822a5
+		# we want unfencing to always succeed
4822a5
+		return True
4822a5
+
4822a5
+	if not "--ping-targets" in options or options["--ping-targets"] == "":
4822a5
+		# "off" was requested so fake "on" to provoke failure
4822a5
+		logging.error("ping target required")
4822a5
+		return False
4822a5
+
4822a5
+	timeout = int(options["--ping-timeout"])
4822a5
+	count = int(options["--ping-count"])
4822a5
+	interval = int(options["--ping-interval"])
4822a5
+	good_required = int(options["--ping-good-count"])
4822a5
+	maxfail = int(options["--ping-maxfail"])
4822a5
+	targets = options["--ping-targets"].split(",")
4822a5
+	exitcode = True
4822a5
+	p = {}
4822a5
+	failcount = 0
4822a5
+	# search string for parsing the results of the ping-executable
4822a5
+	packet_count = re.compile(r".*transmitted, ([0-9]*) received.*")
4822a5
+
4822a5
+	# start a ping-process per target
4822a5
+	for target in targets:
4822a5
+		ping_path = '@PING_CMD@'
4822a5
+		target_mangled = target
4822a5
+		if target.startswith('inet6:'):
4822a5
+			if '@PING6_CMD@' == '':
4822a5
+				p[target] = None
4822a5
+				continue
4822a5
+			ping_path = '@PING6_CMD@'
4822a5
+			target_mangled = target.lstrip('inet6:')
4822a5
+		elif target.startswith('inet:'):
4822a5
+			ping_path = '@PING4_CMD@'
4822a5
+			target_mangled = target.lstrip('inet:')
4822a5
+
4822a5
+		ping_cmd = "%s -n -q -W %d -c %d -i %d %s" % (
4822a5
+			ping_path, timeout, count, interval, target_mangled)
4822a5
+		logging.info("Running command: %s", ping_cmd)
4822a5
+		try:
4822a5
+			p[target] = subprocess.Popen(shlex.split(ping_cmd),
4822a5
+				stdout=subprocess.PIPE);
4822a5
+		except OSError:
4822a5
+			p[target] = None
4822a5
+
4822a5
+	# collect the results of the ping-processes
4822a5
+	for target in targets:
4822a5
+		good = 0
4822a5
+		if p[target] != None:
4822a5
+			p[target].wait()
4822a5
+			if p[target].returncode == 0:
4822a5
+				for line in p[target].stdout:
4822a5
+					searchres = packet_count.search(line)
4822a5
+					if searchres:
4822a5
+						good = int(searchres.group(1))
4822a5
+						break
4822a5
+				if good >= good_required:
4822a5
+					logging.info("ping target %s received %d of %d" \
4822a5
+						% (target, good, count))
4822a5
+					continue
4822a5
+			failcount += 1
4822a5
+			logging.info("ping target %s received %d of %d and thus failed"
4822a5
+				% (target, good, count))
4822a5
+		else:
4822a5
+			failcount += 1
4822a5
+			logging.error("ping target %s failed on OS level" % target)
4822a5
+
4822a5
+	if failcount > maxfail:
4822a5
+		exitcode = False
4822a5
+
4822a5
+	return exitcode
4822a5
+
4822a5
+
4822a5
+def define_new_opts():
4822a5
+	all_opt["ping_count"] = {
4822a5
+		"getopt" : ":",
4822a5
+		"longopt" : "ping-count",
4822a5
+		"required" : "0",
4822a5
+		"help" : "--ping-count=[number]          Number of ping-probes to send",
4822a5
+		"shortdesc" : "The number of ping-probes that is being sent per target",
4822a5
+		"default" : "10",
4822a5
+		"order" : 1
4822a5
+		}
4822a5
+
4822a5
+	all_opt["ping_good_count"] = {
4822a5
+		"getopt" : ":",
4822a5
+		"longopt" : "ping-good-count",
4822a5
+		"required" : "0",
4822a5
+		"help" : "--ping-good-count=[number]     Number of positive ping-probes required",
4822a5
+		"shortdesc" : "The number of positive ping-probes required to account a target as available",
4822a5
+		"default" : "8",
4822a5
+		"order" : 1
4822a5
+		}
4822a5
+
4822a5
+	all_opt["ping_interval"] = {
4822a5
+		"getopt" : ":",
4822a5
+		"longopt" : "ping-interval",
4822a5
+		"required" : "0",
4822a5
+		"help" : "--ping-interval=[seconds]      Seconds between ping-probes",
4822a5
+		"shortdesc" : "The interval in seconds between ping-probes",
4822a5
+		"default" : "1",
4822a5
+		"order" : 1
4822a5
+		}
4822a5
+
4822a5
+	all_opt["ping_timeout"] = {
4822a5
+		"getopt" : ":",
4822a5
+		"longopt" : "ping-timeout",
4822a5
+		"required" : "0",
4822a5
+		"help" : "--ping-timeout=[seconds]       Timeout for individual ping-probes",
4822a5
+		"shortdesc" : "The timeout in seconds till an individual ping-probe is accounted as lost",
4822a5
+		"default" : "2",
4822a5
+		"order" : 1
4822a5
+		}
4822a5
+
4822a5
+	all_opt["ping_maxfail"] = {
4822a5
+		"getopt" : ":",
4822a5
+		"longopt" : "ping-maxfail",
4822a5
+		"required" : "0",
4822a5
+		"help" : "--ping-maxfail=[number]        Number of failed ping-targets allowed",
4822a5
+		"shortdesc" : "The number of failed ping-targets to still account as overall success",
4822a5
+		"default" : "0",
4822a5
+		"order" : 1
4822a5
+		}
4822a5
+
4822a5
+	all_opt["ping_targets"] = {
4822a5
+		"getopt" : ":",
4822a5
+		"longopt" : "ping-targets",
4822a5
+		"required" : "1",
4822a5
+		"help" : "--ping-targets=tgt1,[inet6:]tgt2  Comma separated list of ping-targets",
4822a5
+		"shortdesc" : "A comma separated list of ping-targets (optionally prepended by 'inet:' or 'inet6:') to be probed",
4822a5
+		"default" : "",
4822a5
+		"order" : 1
4822a5
+		}
4822a5
+
4822a5
+
4822a5
+def main():
4822a5
+	device_opt = ["no_status", "no_password", "ping_count", "ping_good_count",
4822a5
+		"ping_interval", "ping_timeout", "ping_maxfail", "ping_targets", "method"]
4822a5
+	define_new_opts()
4822a5
+	atexit.register(atexit_handler)
4822a5
+
4822a5
+	all_opt["method"]["default"] = "cycle"
4822a5
+	all_opt["method"]["help"] = "-m, --method=[method]          Method to fence (cycle|onoff) (Default: cycle)"
4822a5
+
4822a5
+	options = check_input(device_opt, process_input(device_opt))
4822a5
+
4822a5
+	docs = {}
4822a5
+	docs["shortdesc"] = "Fence agent for ping-heuristic based fencing"
4822a5
+	docs["longdesc"] = "fence_heuristics_ping uses ping-heuristics to control execution of another fence agent on the same fencing level.\
4822a5
+\n.P\n\
4822a5
+This is not a fence agent by itself! \
4822a5
+Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristics_ping.\
4822a5
+\n.P\n\
4822a5
+fence_heuristics_ping is currently provided as tech preview in RHEL-7.5."
4822a5
+	docs["vendorurl"] = ""
4822a5
+	show_docs(options, docs)
4822a5
+
4822a5
+	# move ping-test to the end of the time-window set via --delay
4822a5
+	# as to give the network time to settle after the incident that has
4822a5
+	# caused fencing and have the results as current as possible
4822a5
+	max_pingcheck = (int(options["--ping-count"]) - 1) * \
4822a5
+		int(options["--ping-interval"]) + int(options["--ping-timeout"])
4822a5
+	run_delay(options, reserve=max_pingcheck)
4822a5
+
4822a5
+	result = fence_action(\
4822a5
+				None, \
4822a5
+				options, \
4822a5
+				None, \
4822a5
+				None, \
4822a5
+				reboot_cycle_fn = ping_test,
4822a5
+				sync_set_power_fn = ping_test)
4822a5
+
4822a5
+	# execute the remaining delay
4822a5
+	run_delay(options, result=result)
4822a5
+	sys.exit(result)
4822a5
+
4822a5
+if __name__ == "__main__":
4822a5
+	main()
4822a5
diff --git a/make/fencebuild.mk b/make/fencebuild.mk
4822a5
index 0a1f2bc..25bb0f1 100644
4822a5
--- a/make/fencebuild.mk
4822a5
+++ b/make/fencebuild.mk
4822a5
@@ -28,5 +28,8 @@ define gen_agent_from_py
4822a5
 		-e 's#@''STORE_PATH@#${CLUSTERVARRUN}#g' \
4822a5
 		-e 's#@''SUDO_PATH@#${SUDO_PATH}#g' \
4822a5
+		-e 's#@''PING_CMD@#${PING_CMD}#g' \
4822a5
+		-e 's#@''PING6_CMD@#${PING6_CMD}#g' \
4822a5
+		-e 's#@''PING4_CMD@#${PING4_CMD}#g' \
4822a5
 	> $@
4822a5
 
4822a5
 	if [ 0 -eq `echo "$(SRC)" | grep fence_ &> /dev/null; echo $$?` ]; then \
4822a5
--- /dev/null	2017-10-08 13:42:59.634387493 +0200
4822a5
+++ fence-agents-4.0.11/tests/data/metadata/fence_heuristics_ping.xml	2017-10-18 20:55:23.978815450 +0200
4822a5
@@ -0,0 +1,117 @@
4822a5
+
4822a5
+<resource-agent name="fence_heuristics_ping" shortdesc="Fence agent for ping-heuristic based fencing" >
4822a5
+<longdesc>fence_heuristics_ping uses ping-heuristics to control execution of another fence agent on the same fencing level.
4822a5
+.P
4822a5
+This is not a fence agent by itself! Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristics_ping.
4822a5
+.P
4822a5
+fence_heuristics_ping is currently provided as tech preview in RHEL-7.5.</longdesc>
4822a5
+<vendor-url></vendor-url>
4822a5
+<parameters>
4822a5
+	<parameter name="ping_interval" unique="0" required="0">
4822a5
+		<getopt mixed="--ping-interval=[seconds]" />
4822a5
+		<content type="string" default="1"  />
4822a5
+		<shortdesc lang="en">The interval in seconds between ping-probes</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="ping_maxfail" unique="0" required="0">
4822a5
+		<getopt mixed="--ping-maxfail=[number]" />
4822a5
+		<content type="string" default="0"  />
4822a5
+		<shortdesc lang="en">The number of failed ping-targets to still account as overall success</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="ping_targets" unique="0" required="1">
4822a5
+		<getopt mixed="--ping-targets=tgt1,[inet6:]tgt2" />
4822a5
+		<content type="string"  />
4822a5
+		<shortdesc lang="en">A comma separated list of ping-targets (optionally prepended by 'inet:' or 'inet6:') to be probed</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="action" unique="0" required="1">
4822a5
+		<getopt mixed="-o, --action=[action]" />
4822a5
+		<content type="string" default="reboot"  />
4822a5
+		<shortdesc lang="en">Fencing Action</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="ping_good_count" unique="0" required="0">
4822a5
+		<getopt mixed="--ping-good-count=[number]" />
4822a5
+		<content type="string" default="8"  />
4822a5
+		<shortdesc lang="en">The number of positive ping-probes required to account a target as available</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="ping_timeout" unique="0" required="0">
4822a5
+		<getopt mixed="--ping-timeout=[seconds]" />
4822a5
+		<content type="string" default="2"  />
4822a5
+		<shortdesc lang="en">The timeout in seconds till an individual ping-probe is accounted as lost</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="ping_count" unique="0" required="0">
4822a5
+		<getopt mixed="--ping-count=[number]" />
4822a5
+		<content type="string" default="10"  />
4822a5
+		<shortdesc lang="en">The number of ping-probes that is being sent per target</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="method" unique="0" required="0">
4822a5
+		<getopt mixed="-m, --method=[method]" />
4822a5
+		<content type="select" default="cycle"  >
4822a5
+			<option value="onoff" />
4822a5
+			<option value="cycle" />
4822a5
+		</content>
4822a5
+		<shortdesc lang="en">Method to fence (onoff|cycle)</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="verbose" unique="0" required="0">
4822a5
+		<getopt mixed="-v, --verbose" />
4822a5
+		<content type="boolean"  />
4822a5
+		<shortdesc lang="en">Verbose mode</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="debug" unique="0" required="0" deprecated="1">
4822a5
+		<getopt mixed="-D, --debug-file=[debugfile]" />
4822a5
+		<content type="string"  />
4822a5
+		<shortdesc lang="en">Write debug information to given file</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="debug_file" unique="0" required="0" obsoletes="debug">
4822a5
+		<getopt mixed="-D, --debug-file=[debugfile]" />
4822a5
+		<content type="string"  />
4822a5
+		<shortdesc lang="en">Write debug information to given file</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="version" unique="0" required="0">
4822a5
+		<getopt mixed="-V, --version" />
4822a5
+		<content type="boolean"  />
4822a5
+		<shortdesc lang="en">Display version information and exit</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="help" unique="0" required="0">
4822a5
+		<getopt mixed="-h, --help" />
4822a5
+		<content type="boolean"  />
4822a5
+		<shortdesc lang="en">Display help and exit</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="shell_timeout" unique="0" required="0">
4822a5
+		<getopt mixed="--shell-timeout=[seconds]" />
4822a5
+		<content type="second" default="3"  />
4822a5
+		<shortdesc lang="en">Wait X seconds for cmd prompt after issuing command</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="delay" unique="0" required="0">
4822a5
+		<getopt mixed="--delay=[seconds]" />
4822a5
+		<content type="second" default="0"  />
4822a5
+		<shortdesc lang="en">Wait X seconds before fencing is started</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="power_timeout" unique="0" required="0">
4822a5
+		<getopt mixed="--power-timeout=[seconds]" />
4822a5
+		<content type="second" default="20"  />
4822a5
+		<shortdesc lang="en">Test X seconds for status change after ON/OFF</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="power_wait" unique="0" required="0">
4822a5
+		<getopt mixed="--power-wait=[seconds]" />
4822a5
+		<content type="second" default="0"  />
4822a5
+		<shortdesc lang="en">Wait X seconds after issuing ON/OFF</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="login_timeout" unique="0" required="0">
4822a5
+		<getopt mixed="--login-timeout=[seconds]" />
4822a5
+		<content type="second" default="5"  />
4822a5
+		<shortdesc lang="en">Wait X seconds for cmd prompt after login</shortdesc>
4822a5
+	</parameter>
4822a5
+	<parameter name="retry_on" unique="0" required="0">
4822a5
+		<getopt mixed="--retry-on=[attempts]" />
4822a5
+		<content type="integer" default="1"  />
4822a5
+		<shortdesc lang="en">Count of attempts to retry power on</shortdesc>
4822a5
+	</parameter>
4822a5
+</parameters>
4822a5
+<actions>
4822a5
+	<action name="on" automatic="0"/>
4822a5
+	<action name="off" />
4822a5
+	<action name="reboot" />
4822a5
+	<action name="monitor" />
4822a5
+	<action name="metadata" />
4822a5
+	<action name="validate-all" />
4822a5
+</actions>
4822a5
+</resource-agent>