From 21c2154bedceb6ebef1cae369768280d4b0f8652 Mon Sep 17 00:00:00 2001
From: Marek 'marx' Grac <mgrac@redhat.com>
Date: Fri, 5 Jun 2015 18:07:55 +0200
Subject: [PATCH 2/4] fence_compute: Improvement of fence agent
---
fence/agents/compute/fence_compute.py | 73 ++++++++++++++++++++++-------------
fence/agents/lib/fencing.py.py | 6 ++-
2 files changed, 50 insertions(+), 29 deletions(-)
diff --git a/fence/agents/compute/fence_compute.py b/fence/agents/compute/fence_compute.py
index 2b37de7..66cf08f 100644
--- a/fence/agents/compute/fence_compute.py
+++ b/fence/agents/compute/fence_compute.py
@@ -4,11 +4,11 @@ import sys
import time
import atexit
import logging
+import requests.exceptions
sys.path.append("@FENCEAGENTSLIBDIR@")
from fencing import *
from fencing import fail_usage, is_executable, run_command, run_delay
-from novaclient import client as nova_client
#BEGIN_VERSION_GENERATION
RELEASE_VERSION="4.0.11"
@@ -32,18 +32,18 @@ def get_power_status(_, options):
if nova:
try:
services = nova.services.list(host=options["--plug"])
- except Exception, e:
- fail_usage(str(e))
-
- for service in services:
- if service.binary == "nova-compute":
- if service.state == "up":
- status = "on"
- elif service.state == "down":
- status = "down"
- else:
- logging.debug("Unknown status detected from nova: " + service.state)
- break
+
+ for service in services:
+ if service.binary == "nova-compute":
+ if service.state == "up":
+ status = "on"
+ elif service.state == "down":
+ status = "off"
+ else:
+ logging.debug("Unknown status detected from nova: " + service.state)
+ break
+ except ConnectionError as (err):
+ logging.warning("Nova connection failed: " + str(err))
return status
# NOTE(sbauza); We mimic the host-evacuate module since it's only a contrib
@@ -143,15 +143,6 @@ def define_new_opts():
"default" : "",
"order": 1,
}
- all_opt["novatool-path"] = {
- "getopt" : "i:",
- "longopt" : "novatool-path",
- "help" : "-i, --novatool-path=[path] Path to nova binary",
- "required" : "0",
- "shortdesc" : "Path to nova binary",
- "default" : "@NOVA_PATH@",
- "order": 6,
- }
all_opt["domain"] = {
"getopt" : "d:",
"longopt" : "domain",
@@ -177,7 +168,7 @@ def main():
atexit.register(atexit_handler)
device_opt = ["login", "passwd", "tenant-name", "auth-url",
- "novatool-path", "no_login", "no_password", "port", "domain", "no-shared-storage"]
+ "no_login", "no_password", "port", "domain", "no-shared-storage"]
define_new_opts()
all_opt["shell_timeout"]["default"] = "180"
@@ -192,6 +183,15 @@ def main():
run_delay(options)
+ try:
+ from novaclient import client as nova_client
+ except ImportError:
+ fail_usage("nova not found or not accessible")
+
+ # Potentially we should make this a pacemaker feature
+ if options["--action"] != "list" and options["--domain"] != "" and options.has_key("--plug"):
+ options["--plug"] = options["--plug"] + "." + options["--domain"]
+
# The first argument is the Nova client version
nova = nova_client.Client('2',
options["--username"],
@@ -199,6 +199,29 @@ def main():
options["--tenant-name"],
options["--auth-url"])
+ if options["--action"] in ["on", "off", "reboot" ]:
+ try:
+ nova.services.list(host=options["--plug"])
+ except ConnectionError as (err):
+ # Yes, exit(0)
+ #
+ # Its possible that the control plane on which this
+ # agent depends is not functional
+ #
+ # In this situation, fencing is waiting for resource
+ # recovery and resource recovery is waiting for
+ # fencing.
+ #
+ # To break the cycle, we all the fencing agent to
+ # return 'done' immediately so that we can recover the
+ # control plane. We then rely on the NovaCompute RA
+ # to call this agent directly once the control plane
+ # is up.
+ #
+ # Yes its horrible, but still better than nova itself.
+ logging.warning("Nova connection failed: %s " % str(err))
+ sys.exit(0)
+
if options["--action"] in ["off", "reboot"]:
# Pretend we're 'on' so that the fencing library will always call set_power_status(off)
override_status = "on"
@@ -207,10 +230,6 @@ def main():
# Pretend we're 'off' so that the fencing library will always call set_power_status(on)
override_status = "off"
- # Potentially we should make this a pacemaker feature
- if options["--action"] != "list" and options["--domain"] != "" and options.has_key("--plug"):
- options["--plug"] = options["--plug"]+"."+options["--domain"]
-
result = fence_action(None, options, set_power_status, get_power_status, get_plugs_list, None)
sys.exit(result)
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index f893082..29b3a94 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -1109,9 +1109,11 @@ def fence_login(options, re_login_string=r"(login\s*: )|(Login Name: )|(usernam
conn.log_expect(options, options["--command-prompt"], int(options["--login-timeout"]))
except KeyError:
fail(EC_PASSWORD_MISSING)
- except pexpect.EOF:
+ except pexpect.EOF, exception:
+ logging.debug("%s", str(exception))
fail(EC_LOGIN_DENIED)
- except pexpect.TIMEOUT:
+ except pexpect.TIMEOUT, exception:
+ logging.debug("%s", str(exception))
fail(EC_LOGIN_DENIED)
return conn
--
1.9.3