Blob Blame History Raw
From 21c2154bedceb6ebef1cae369768280d4b0f8652 Mon Sep 17 00:00:00 2001
From: Marek 'marx' Grac <mgrac@redhat.com>
Date: Fri, 5 Jun 2015 18:07:55 +0200
Subject: [PATCH 2/4] fence_compute: Improvement of fence agent

---
 fence/agents/compute/fence_compute.py | 73 ++++++++++++++++++++++-------------
 fence/agents/lib/fencing.py.py        |  6 ++-
 2 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/fence/agents/compute/fence_compute.py b/fence/agents/compute/fence_compute.py
index 2b37de7..66cf08f 100644
--- a/fence/agents/compute/fence_compute.py
+++ b/fence/agents/compute/fence_compute.py
@@ -4,11 +4,11 @@ import sys
 import time
 import atexit
 import logging
+import requests.exceptions
 
 sys.path.append("@FENCEAGENTSLIBDIR@")
 from fencing import *
 from fencing import fail_usage, is_executable, run_command, run_delay
-from novaclient import client as nova_client
 
 #BEGIN_VERSION_GENERATION
 RELEASE_VERSION="4.0.11"
@@ -32,18 +32,18 @@ def get_power_status(_, options):
 	if nova:
 		try:
 			services = nova.services.list(host=options["--plug"])
-		except Exception, e:
-			fail_usage(str(e))
-
-		for service in services:
-			if service.binary == "nova-compute":
-				if service.state == "up":
-					status = "on"
-				elif service.state == "down":
-					status = "down"
-				else:
-					logging.debug("Unknown status detected from nova: " + service.state)
-				break
+
+			for service in services:
+				if service.binary == "nova-compute":
+					if service.state == "up":
+						status = "on"
+					elif service.state == "down":
+						status = "off"
+					else:
+						logging.debug("Unknown status detected from nova: " + service.state)
+					break
+		except ConnectionError as (err):
+			logging.warning("Nova connection failed: " + str(err))
 	return status
 
 # NOTE(sbauza); We mimic the host-evacuate module since it's only a contrib
@@ -143,15 +143,6 @@ def define_new_opts():
 		"default" : "",
 		"order": 1,
 	}
-	all_opt["novatool-path"] = {
-		"getopt" : "i:",
-		"longopt" : "novatool-path",
-		"help" : "-i, --novatool-path=[path]     Path to nova binary",
-		"required" : "0",
-		"shortdesc" : "Path to nova binary",
-		"default" : "@NOVA_PATH@",
-		"order": 6,
-	}
 	all_opt["domain"] = {
 		"getopt" : "d:",
 		"longopt" : "domain",
@@ -177,7 +168,7 @@ def main():
 	atexit.register(atexit_handler)
 
 	device_opt = ["login", "passwd", "tenant-name", "auth-url",
-		"novatool-path", "no_login", "no_password", "port", "domain", "no-shared-storage"]
+		"no_login", "no_password", "port", "domain", "no-shared-storage"]
 	define_new_opts()
 	all_opt["shell_timeout"]["default"] = "180"
 
@@ -192,6 +183,15 @@ def main():
 
 	run_delay(options)
 
+	try:
+		from novaclient import client as nova_client
+	except ImportError:
+		fail_usage("nova not found or not accessible")
+
+	# Potentially we should make this a pacemaker feature
+	if options["--action"] != "list" and options["--domain"] != "" and options.has_key("--plug"):
+		options["--plug"] = options["--plug"] + "." + options["--domain"]
+
 	# The first argument is the Nova client version
 	nova = nova_client.Client('2',
 		options["--username"],
@@ -199,6 +199,29 @@ def main():
 		options["--tenant-name"],
 		options["--auth-url"])
 
+	if options["--action"] in ["on", "off", "reboot" ]:
+		try:
+			nova.services.list(host=options["--plug"])
+		except ConnectionError as (err):
+			# Yes, exit(0)
+			#
+			# Its possible that the control plane on which this
+			# agent depends is not functional
+			#
+			# In this situation, fencing is waiting for resource
+			# recovery and resource recovery is waiting for
+			# fencing.
+			#
+			# To break the cycle, we all the fencing agent to
+			# return 'done' immediately so that we can recover the
+			# control plane. We then rely on the NovaCompute RA
+			# to call this agent directly once the control plane
+			# is up.
+			#
+			# Yes its horrible, but still better than nova itself.
+			logging.warning("Nova connection failed: %s " % str(err))
+			sys.exit(0)
+
 	if options["--action"] in ["off", "reboot"]:
 		# Pretend we're 'on' so that the fencing library will always call set_power_status(off)
 		override_status = "on"
@@ -207,10 +230,6 @@ def main():
 		# Pretend we're 'off' so that the fencing library will always call set_power_status(on)
 		override_status = "off"
 
-	# Potentially we should make this a pacemaker feature
-	if options["--action"] != "list" and options["--domain"] != "" and options.has_key("--plug"):
-		options["--plug"] = options["--plug"]+"."+options["--domain"]
-
 	result = fence_action(None, options, set_power_status, get_power_status, get_plugs_list, None)
 	sys.exit(result)
 
diff --git a/fence/agents/lib/fencing.py.py b/fence/agents/lib/fencing.py.py
index f893082..29b3a94 100644
--- a/fence/agents/lib/fencing.py.py
+++ b/fence/agents/lib/fencing.py.py
@@ -1109,9 +1109,11 @@ def fence_login(options, re_login_string=r"(login\s*: )|(Login Name:  )|(usernam
 					conn.log_expect(options, options["--command-prompt"], int(options["--login-timeout"]))
 			except KeyError:
 				fail(EC_PASSWORD_MISSING)
-	except pexpect.EOF:
+	except pexpect.EOF, exception:
+		logging.debug("%s", str(exception))
 		fail(EC_LOGIN_DENIED)
-	except pexpect.TIMEOUT:
+	except pexpect.TIMEOUT, exception:
+		logging.debug("%s", str(exception))
 		fail(EC_LOGIN_DENIED)
 	return conn
 
-- 
1.9.3