From 563c93016b0581c2f6beea1f3f18e76e25491895 Mon Sep 17 00:00:00 2001 From: Marek 'marx' Grac Date: Fri, 5 Jun 2015 18:03:00 +0200 Subject: [PATCH 1/4] fence_compute: Fence agent for Nova compute machines Author: Andrew Beekhof --- configure.ac | 2 + fence/agents/compute/Makefile.am | 17 +++ fence/agents/compute/fence_compute.py | 218 ++++++++++++++++++++++++++++++++++ make/fencebuild.mk | 1 + tests/data/metadata/fence_compute.xml | 121 +++++++++++++++++++ 5 files changed, 359 insertions(+) create mode 100644 fence/agents/compute/Makefile.am create mode 100644 fence/agents/compute/fence_compute.py create mode 100644 tests/data/metadata/fence_compute.xml diff --git a/configure.ac b/configure.ac index b603878..9d996d3 100644 --- a/configure.ac +++ b/configure.ac @@ -167,6 +167,7 @@ AC_PATH_PROG([COROSYNC_CMAPCTL_PATH], [corosync-cmapctl], [/usr/sbin/corosync-cm AC_PATH_PROG([SG_PERSIST_PATH], [sg_persist], [/usr/bin/sg_persist]) AC_PATH_PROG([SG_TURS_PATH], [sg_turs], [/usr/bin/sg_turs]) AC_PATH_PROG([VGS_PATH], [vgs], [/usr/sbin/vgs]) +AC_PATH_PROG([NOVA_PATH], [nova], [/usr/bin/nova]) ## do subst AC_SUBST([DEFAULT_CONFIG_DIR]) @@ -265,6 +266,7 @@ AC_CONFIG_FILES([Makefile fence/agents/brocade/Makefile fence/agents/cisco_mds/Makefile fence/agents/cisco_ucs/Makefile + fence/agents/compute/Makefile fence/agents/docker/Makefile fence/agents/drac/Makefile fence/agents/drac5/Makefile diff --git a/fence/agents/compute/Makefile.am b/fence/agents/compute/Makefile.am new file mode 100644 index 0000000..ab21272 --- /dev/null +++ b/fence/agents/compute/Makefile.am @@ -0,0 +1,17 @@ +MAINTAINERCLEANFILES = Makefile.in + +TARGET = fence_compute + +SRC = $(TARGET).py + +EXTRA_DIST = $(SRC) + +sbin_SCRIPTS = $(TARGET) + +man_MANS = $(TARGET).8 + +FENCE_TEST_ARGS = -l test -p test -n 1 + +include $(top_srcdir)/make/fencebuild.mk +include $(top_srcdir)/make/fenceman.mk +include $(top_srcdir)/make/agentpycheck.mk diff --git a/fence/agents/compute/fence_compute.py b/fence/agents/compute/fence_compute.py new file mode 100644 index 0000000..2b37de7 --- /dev/null +++ b/fence/agents/compute/fence_compute.py @@ -0,0 +1,218 @@ +#!/usr/bin/python -tt + +import sys +import time +import atexit +import logging + +sys.path.append("@FENCEAGENTSLIBDIR@") +from fencing import * +from fencing import fail_usage, is_executable, run_command, run_delay +from novaclient import client as nova_client + +#BEGIN_VERSION_GENERATION +RELEASE_VERSION="4.0.11" +BUILD_DATE="(built Wed Nov 12 06:33:38 EST 2014)" +REDHAT_COPYRIGHT="Copyright (C) Red Hat, Inc. 2004-2010 All rights reserved." +#END_VERSION_GENERATION + +override_status = "" +nova = None + +def get_power_status(_, options): + global override_status + + status = "unknown" + logging.debug("get action: " + options["--action"]) + + if len(override_status): + logging.debug("Pretending we're " + override_status) + return override_status + + if nova: + try: + services = nova.services.list(host=options["--plug"]) + except Exception, e: + fail_usage(str(e)) + + for service in services: + if service.binary == "nova-compute": + if service.state == "up": + status = "on" + elif service.state == "down": + status = "down" + else: + logging.debug("Unknown status detected from nova: " + service.state) + break + return status + +# NOTE(sbauza); We mimic the host-evacuate module since it's only a contrib +# module which is not stable +def _server_evacuate(server, on_shared_storage): + success = True + error_message = "" + try: + nova.servers.evacuate(server=server['uuid'], on_shared_storage=on_shared_storage) + except Exception as e: + success = False + error_message = "Error while evacuating instance: %s" % e + + return { + "server_uuid": server['uuid'], + "evacuate_accepted": success, + "error_message": error_message, + } + +def _host_evacuate(host, on_shared_storage): + hypervisors = nova.hypervisors.search(host, servers=True) + response = [] + for hyper in hypervisors: + if hasattr(hyper, 'servers'): + for server in hyper.servers: + response.append(_server_evacuate(server, on_shared_storage)) + +def set_power_status(_, options): + global override_status + + override_status = "" + logging.debug("set action: " + options["--action"]) + + if not nova: + return + + if options["--action"] == "on": + if get_power_status(_, options) == "on": + nova.services.enable(options["--plug"], 'nova-compute') + else: + # Pretend we're 'on' so that the fencing library doesn't loop forever waiting for the node to boot + override_status = "on" + return + + # need to wait for nova to update its internal status or we + # cannot call host-evacuate + while get_power_status(_, options) != "off": + # Loop forever if need be. + # + # Some callers (such as Pacemaker) will have a timer + # running and kill us if necessary + logging.debug("Waiting for nova to update it's internal state") + time.sleep(1) + + if "--no-shared-storage" not in options: + # If the admin sets this when they DO have shared + # storage in use, then they get what they asked for + on_shared_storage = True + else: + on_shared_storage = False + + _host_evacuate(options["--plug"], on_shared_storage) + return + +def get_plugs_list(_, options): + result = {} + + if nova: + hypervisors = nova.hypervisors.list() + for hypervisor in hypervisors: + longhost = hypervisor.hypervisor_hostname + if options["--action"] == "list" and options["--domain"] != "": + shorthost = longhost.replace("." + options["--domain"], + "") + result[shorthost] = ("", None) + else: + result[longhost] = ("", None) + return result + + +def define_new_opts(): + all_opt["tenant-name"] = { + "getopt" : "t:", + "longopt" : "tenant-name", + "help" : "-t, --tenant-name=[tenant] Keystone Admin Tenant", + "required" : "0", + "shortdesc" : "Keystone Admin Tenant", + "default" : "", + "order": 1, + } + all_opt["auth-url"] = { + "getopt" : "k:", + "longopt" : "auth-url", + "help" : "-k, --auth-url=[tenant] Keystone Admin Auth URL", + "required" : "0", + "shortdesc" : "Keystone Admin Auth URL", + "default" : "", + "order": 1, + } + all_opt["novatool-path"] = { + "getopt" : "i:", + "longopt" : "novatool-path", + "help" : "-i, --novatool-path=[path] Path to nova binary", + "required" : "0", + "shortdesc" : "Path to nova binary", + "default" : "@NOVA_PATH@", + "order": 6, + } + all_opt["domain"] = { + "getopt" : "d:", + "longopt" : "domain", + "help" : "-d, --domain=[string] DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN", + "required" : "0", + "shortdesc" : "DNS domain in which hosts live", + "default" : "", + "order": 5, + } + all_opt["no-shared-storage"] = { + "getopt" : "", + "longopt" : "no-shared-storage", + "help" : "--no-shared-storage Disable functionality for shared storage", + "required" : "0", + "shortdesc" : "Disable functionality for dealing with shared storage", + "default" : "False", + "order": 5, + } + +def main(): + global override_status + global nova + atexit.register(atexit_handler) + + device_opt = ["login", "passwd", "tenant-name", "auth-url", + "novatool-path", "no_login", "no_password", "port", "domain", "no-shared-storage"] + define_new_opts() + all_opt["shell_timeout"]["default"] = "180" + + options = check_input(device_opt, process_input(device_opt)) + + docs = {} + docs["shortdesc"] = "Fence agent for nova compute nodes" + docs["longdesc"] = "fence_nova_host is a Nova fencing notification agent" + docs["vendorurl"] = "" + + show_docs(options, docs) + + run_delay(options) + + # The first argument is the Nova client version + nova = nova_client.Client('2', + options["--username"], + options["--password"], + options["--tenant-name"], + options["--auth-url"]) + + if options["--action"] in ["off", "reboot"]: + # Pretend we're 'on' so that the fencing library will always call set_power_status(off) + override_status = "on" + + if options["--action"] == "on": + # Pretend we're 'off' so that the fencing library will always call set_power_status(on) + override_status = "off" + + # Potentially we should make this a pacemaker feature + if options["--action"] != "list" and options["--domain"] != "" and options.has_key("--plug"): + options["--plug"] = options["--plug"]+"."+options["--domain"] + + result = fence_action(None, options, set_power_status, get_power_status, get_plugs_list, None) + sys.exit(result) + +if __name__ == "__main__": + main() diff --git a/make/fencebuild.mk b/make/fencebuild.mk index 1c4be6b..b59c069 100644 --- a/make/fencebuild.mk +++ b/make/fencebuild.mk @@ -16,6 +16,7 @@ $(TARGET): $(SRC) -e 's#@''SG_PERSIST_PATH@#${SG_PERSIST_PATH}#g' \ -e 's#@''SG_TURS_PATH@#${SG_TURS_PATH}#g' \ -e 's#@''VGS_PATH@#${VGS_PATH}#g' \ + -e 's#@''NOVA_PATH@#${NOVA_PATH}#g' \ > $@ if [ 0 -eq `echo "$(SRC)" | grep fence_ &> /dev/null; echo $$?` ]; then \ diff --git a/tests/data/metadata/fence_compute.xml b/tests/data/metadata/fence_compute.xml new file mode 100644 index 0000000..ff7c06c --- /dev/null +++ b/tests/data/metadata/fence_compute.xml @@ -0,0 +1,121 @@ + + +fence_nova_host is a Nova fencing notification agent + + + + + + Physical plug number, name of virtual machine or UUID + + + + + Script to retrieve password + + + + + Keystone Admin Auth URL + + + + + Login password or passphrase + + + + + Keystone Admin Tenant + + + + + Fencing Action + + + + + Login Name + + + + + DNS domain in which hosts live + + + + + Disable functionality for dealing with shared storage + + + + + Path to nova binary + + + + + Verbose mode + + + + + Write debug information to given file + + + + + Display version information and exit + + + + + Display help and exit + + + + + Separator for CSV created by operation list + + + + + Wait X seconds after issuing ON/OFF + + + + + Wait X seconds for cmd prompt after login + + + + + Test X seconds for status change after ON/OFF + + + + + Wait X seconds before fencing is started + + + + + Wait X seconds for cmd prompt after issuing command + + + + + Count of attempts to retry power on + + + + + + + + + + + + -- 1.9.3