diff --git a/configure.ac b/configure.ac index 8acfef9..f9f29cf 100644 --- a/configure.ac +++ b/configure.ac @@ -214,6 +214,27 @@ AC_PATH_PROG([SNMPGET_PATH], [snmpget], [/usr/bin/snmpget]) AC_PATH_PROG([MPATH_PATH], [mpathpersist], [/usr/sbin/mpathpersist]) AC_PATH_PROG([SUDO_PATH], [mpathpersist], [/usr/bin/sudo]) +AC_PATH_PROG([PING_CMD], [ping]) +AC_PATH_PROG([PING6_CMD], [ping6]) +AC_PATH_PROG([PING4_CMD], [ping4]) + +if test "x${ac_cv_path_PING_CMD}" = x; then + # assume multicall-ping just not available in build-environment + PING_CMD="/bin/ping" + PING4_CMD="/bin/ping -4" + PING6_CMD="/bin/ping -6" +elif test "x${ac_cv_path_PING6_CMD}" = x; then + # just IPv4 + PING4_CMD="${ac_cv_path_PING_CMD}" +elif test -L ${ac_cv_path_PING6_CMD}; then + # assume multicall-ping + PING4_CMD="${ac_cv_path_PING_CMD} -4" +else + # ping is just IPv4 + PING4_CMD="${ac_cv_path_PING_CMD}" +fi + + ## do subst AC_SUBST([DEFAULT_CONFIG_DIR]) @@ -278,6 +278,7 @@ fence/agents/eaton_snmp/Makefile fence/agents/emerson/Makefile fence/agents/eps/Makefile + fence/agents/heuristics_ping/Makefile fence/agents/hpblade/Makefile fence/agents/ibmblade/Makefile fence/agents/ipdu/Makefile --- /dev/null 2017-09-27 08:35:37.286500265 +0200 +++ b/fence/agents/heuristics_ping/Makefile.am 2017-09-28 15:27:42.605317632 +0200 @@ -0,0 +1,20 @@ +MAINTAINERCLEANFILES = Makefile.in + +TARGET = fence_heuristics_ping + +SRC = $(TARGET).py + +EXTRA_DIST = $(SRC) + +sbin_SCRIPTS = $(TARGET) + +man_MANS = $(TARGET).8 + +FENCE_TEST_ARGS = --ping-targets=localhost + +include $(top_srcdir)/make/fencebuild.mk +include $(top_srcdir)/make/fenceman.mk +include $(top_srcdir)/make/agentpycheck.mk + +clean-local: clean-man + rm -f $(TARGET) diff --git a/doc/COPYRIGHT b/doc/COPYRIGHT index 8124c53..49f88c6 100644 --- a/doc/COPYRIGHT +++ b/doc/COPYRIGHT @@ -58,6 +58,7 @@ Joel Becker Jonathan Brassow jparsons Ken Preslan +Klaus Wenninger Lon Hohberger Marc - A. Dahlhaus Marek 'marx' Grac diff --git a/fence/agents/heuristics_ping/fence_heuristics_ping.py b/fence/agents/heuristics_ping/fence_heuristics_ping.py new file mode 100644 index 0000000..b21d6a4 --- /dev/null +++ b/fence/agents/heuristics_ping/fence_heuristics_ping.py @@ -0,0 +1,200 @@ +#!/usr/bin/python -tt + +# The Following Agent Has Been Tested On: +# +# RHEL 7.4 +# + +import io +import re +import subprocess +import shlex +import sys, stat +import logging +import os +import atexit +sys.path.append("@FENCEAGENTSLIBDIR@") +from fencing import fail_usage, run_command, fence_action, all_opt +from fencing import atexit_handler, check_input, process_input, show_docs +from fencing import run_delay + +def ping_test(con, options): + # Send pings to the targets + + if options["--action"] == "on": + # we want unfencing to always succeed + return True + + if not "--ping-targets" in options or options["--ping-targets"] == "": + # "off" was requested so fake "on" to provoke failure + logging.error("ping target required") + return False + + timeout = int(options["--ping-timeout"]) + count = int(options["--ping-count"]) + interval = int(options["--ping-interval"]) + good_required = int(options["--ping-good-count"]) + maxfail = int(options["--ping-maxfail"]) + targets = options["--ping-targets"].split(",") + exitcode = True + p = {} + failcount = 0 + # search string for parsing the results of the ping-executable + packet_count = re.compile(r".*transmitted, ([0-9]*) received.*") + + # start a ping-process per target + for target in targets: + ping_path = '@PING_CMD@' + target_mangled = target + if target.startswith('inet6:'): + if '@PING6_CMD@' == '': + p[target] = None + continue + ping_path = '@PING6_CMD@' + target_mangled = target.lstrip('inet6:') + elif target.startswith('inet:'): + ping_path = '@PING4_CMD@' + target_mangled = target.lstrip('inet:') + + ping_cmd = "%s -n -q -W %d -c %d -i %d %s" % ( + ping_path, timeout, count, interval, target_mangled) + logging.info("Running command: %s", ping_cmd) + try: + p[target] = subprocess.Popen(shlex.split(ping_cmd), + stdout=subprocess.PIPE); + except OSError: + p[target] = None + + # collect the results of the ping-processes + for target in targets: + good = 0 + if p[target] != None: + p[target].wait() + if p[target].returncode == 0: + for line in p[target].stdout: + searchres = packet_count.search(line) + if searchres: + good = int(searchres.group(1)) + break + if good >= good_required: + logging.info("ping target %s received %d of %d" \ + % (target, good, count)) + continue + failcount += 1 + logging.info("ping target %s received %d of %d and thus failed" + % (target, good, count)) + else: + failcount += 1 + logging.error("ping target %s failed on OS level" % target) + + if failcount > maxfail: + exitcode = False + + return exitcode + + +def define_new_opts(): + all_opt["ping_count"] = { + "getopt" : ":", + "longopt" : "ping-count", + "required" : "0", + "help" : "--ping-count=[number] Number of ping-probes to send", + "shortdesc" : "The number of ping-probes that is being sent per target", + "default" : "10", + "order" : 1 + } + + all_opt["ping_good_count"] = { + "getopt" : ":", + "longopt" : "ping-good-count", + "required" : "0", + "help" : "--ping-good-count=[number] Number of positive ping-probes required", + "shortdesc" : "The number of positive ping-probes required to account a target as available", + "default" : "8", + "order" : 1 + } + + all_opt["ping_interval"] = { + "getopt" : ":", + "longopt" : "ping-interval", + "required" : "0", + "help" : "--ping-interval=[seconds] Seconds between ping-probes", + "shortdesc" : "The interval in seconds between ping-probes", + "default" : "1", + "order" : 1 + } + + all_opt["ping_timeout"] = { + "getopt" : ":", + "longopt" : "ping-timeout", + "required" : "0", + "help" : "--ping-timeout=[seconds] Timeout for individual ping-probes", + "shortdesc" : "The timeout in seconds till an individual ping-probe is accounted as lost", + "default" : "2", + "order" : 1 + } + + all_opt["ping_maxfail"] = { + "getopt" : ":", + "longopt" : "ping-maxfail", + "required" : "0", + "help" : "--ping-maxfail=[number] Number of failed ping-targets allowed", + "shortdesc" : "The number of failed ping-targets to still account as overall success", + "default" : "0", + "order" : 1 + } + + all_opt["ping_targets"] = { + "getopt" : ":", + "longopt" : "ping-targets", + "required" : "1", + "help" : "--ping-targets=tgt1,[inet6:]tgt2 Comma separated list of ping-targets", + "shortdesc" : "A comma separated list of ping-targets (optionally prepended by 'inet:' or 'inet6:') to be probed", + "default" : "", + "order" : 1 + } + + +def main(): + device_opt = ["no_status", "no_password", "ping_count", "ping_good_count", + "ping_interval", "ping_timeout", "ping_maxfail", "ping_targets", "method"] + define_new_opts() + atexit.register(atexit_handler) + + all_opt["method"]["default"] = "cycle" + all_opt["method"]["help"] = "-m, --method=[method] Method to fence (cycle|onoff) (Default: cycle)" + + options = check_input(device_opt, process_input(device_opt)) + + docs = {} + docs["shortdesc"] = "Fence agent for ping-heuristic based fencing" + docs["longdesc"] = "fence_heuristics_ping uses ping-heuristics to control execution of another fence agent on the same fencing level.\ +\n.P\n\ +This is not a fence agent by itself! \ +Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristics_ping.\ +\n.P\n\ +fence_heuristics_ping is currently provided as tech preview in RHEL-7.5." + docs["vendorurl"] = "" + show_docs(options, docs) + + # move ping-test to the end of the time-window set via --delay + # as to give the network time to settle after the incident that has + # caused fencing and have the results as current as possible + max_pingcheck = (int(options["--ping-count"]) - 1) * \ + int(options["--ping-interval"]) + int(options["--ping-timeout"]) + run_delay(options, reserve=max_pingcheck) + + result = fence_action(\ + None, \ + options, \ + None, \ + None, \ + reboot_cycle_fn = ping_test, + sync_set_power_fn = ping_test) + + # execute the remaining delay + run_delay(options, result=result) + sys.exit(result) + +if __name__ == "__main__": + main() diff --git a/make/fencebuild.mk b/make/fencebuild.mk index 0a1f2bc..25bb0f1 100644 --- a/make/fencebuild.mk +++ b/make/fencebuild.mk @@ -28,5 +28,8 @@ define gen_agent_from_py -e 's#@''STORE_PATH@#${CLUSTERVARRUN}#g' \ -e 's#@''SUDO_PATH@#${SUDO_PATH}#g' \ + -e 's#@''PING_CMD@#${PING_CMD}#g' \ + -e 's#@''PING6_CMD@#${PING6_CMD}#g' \ + -e 's#@''PING4_CMD@#${PING4_CMD}#g' \ > $@ if [ 0 -eq `echo "$(SRC)" | grep fence_ &> /dev/null; echo $$?` ]; then \ --- /dev/null 2017-10-08 13:42:59.634387493 +0200 +++ fence-agents-4.0.11/tests/data/metadata/fence_heuristics_ping.xml 2017-10-18 20:55:23.978815450 +0200 @@ -0,0 +1,117 @@ + + +fence_heuristics_ping uses ping-heuristics to control execution of another fence agent on the same fencing level. +.P +This is not a fence agent by itself! Its only purpose is to enable/disable another fence agent that lives on the same fencing level but after fence_heuristics_ping. +.P +fence_heuristics_ping is currently provided as tech preview in RHEL-7.5. + + + + + + The interval in seconds between ping-probes + + + + + The number of failed ping-targets to still account as overall success + + + + + A comma separated list of ping-targets (optionally prepended by 'inet:' or 'inet6:') to be probed + + + + + Fencing Action + + + + + The number of positive ping-probes required to account a target as available + + + + + The timeout in seconds till an individual ping-probe is accounted as lost + + + + + The number of ping-probes that is being sent per target + + + + + + Method to fence (onoff|cycle) + + + + + Verbose mode + + + + + Write debug information to given file + + + + + Write debug information to given file + + + + + Display version information and exit + + + + + Display help and exit + + + + + Wait X seconds for cmd prompt after issuing command + + + + + Wait X seconds before fencing is started + + + + + Test X seconds for status change after ON/OFF + + + + + Wait X seconds after issuing ON/OFF + + + + + Wait X seconds for cmd prompt after login + + + + + Count of attempts to retry power on + + + + + + + + + + +