Blame SOURCES/bz2078244-fence_gce-update.patch

3f9bc7
--- fence-agents-4.2.1/agents/gce/fence_gce.py	2022-04-28 15:58:42.949723547 +0200
3f9bc7
+++ fence-agents-4.2.1/agents/gce/fence_gce.py2	2022-04-28 15:59:21.054915265 +0200
3f9bc7
@@ -1,10 +1,22 @@
3f9bc7
 #!@PYTHON@ -tt
3f9bc7
 
3f9bc7
+#
3f9bc7
+# Requires the googleapiclient and oauth2client
3f9bc7
+# RHEL 7.x: google-api-python-client==1.6.7 python-gflags==2.0 pyasn1==0.4.8 rsa==3.4.2 pysocks==1.7.1 httplib2==0.19.0
3f9bc7
+# RHEL 8.x: pysocks==1.7.1 httplib2==0.19.0
3f9bc7
+# SLES 12.x: python-google-api-python-client python-oauth2client python-oauth2client-gce pysocks==1.7.1 httplib2==0.19.0
3f9bc7
+# SLES 15.x: python3-google-api-python-client python3-oauth2client pysocks==1.7.1 httplib2==0.19.0
3f9bc7
+#
3f9bc7
+
3f9bc7
 import atexit
3f9bc7
 import logging
3f9bc7
+import json
3f9bc7
+import re
3f9bc7
 import os
3f9bc7
+import socket
3f9bc7
 import sys
3f9bc7
 import time
3f9bc7
+
3f9bc7
 if sys.version_info >= (3, 0):
3f9bc7
   # Python 3 imports.
3f9bc7
   import urllib.parse as urlparse
3f9bc7
@@ -15,7 +27,7 @@
3f9bc7
   import urllib2 as urlrequest
3f9bc7
 sys.path.append("@FENCEAGENTSLIBDIR@")
3f9bc7
 
3f9bc7
-from fencing import fail_usage, run_delay, all_opt, atexit_handler, check_input, process_input, show_docs, fence_action
3f9bc7
+from fencing import fail_usage, run_delay, all_opt, atexit_handler, check_input, process_input, show_docs, fence_action, run_command
3f9bc7
 try:
3f9bc7
   sys.path.insert(0, '/usr/lib/fence-agents/bundled/google')
3f9bc7
   import httplib2
3f9bc7
@@ -30,6 +42,85 @@
3f9bc7
 
3f9bc7
 METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
3f9bc7
 METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
3f9bc7
+INSTANCE_LINK = 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}/instances/{}'
3f9bc7
+
3f9bc7
+def run_on_fail(options):
3f9bc7
+	if "--runonfail" in options:
3f9bc7
+		run_command(options, options["--runonfail"])
3f9bc7
+
3f9bc7
+def fail_fence_agent(options, message):
3f9bc7
+	run_on_fail(options)
3f9bc7
+	fail_usage(message)
3f9bc7
+
3f9bc7
+def raise_fence_agent(options, message):
3f9bc7
+	run_on_fail(options)
3f9bc7
+	raise Exception(message)
3f9bc7
+
3f9bc7
+#
3f9bc7
+# Will use baremetalsolution setting or the environment variable
3f9bc7
+# FENCE_GCE_URI_REPLACEMENTS to replace the uri for calls to *.googleapis.com.
3f9bc7
+#
3f9bc7
+def replace_api_uri(options, http_request):
3f9bc7
+	uri_replacements = []
3f9bc7
+	# put any env var replacements first, then baremetalsolution if in options
3f9bc7
+	if "FENCE_GCE_URI_REPLACEMENTS" in os.environ:
3f9bc7
+		logging.debug("FENCE_GCE_URI_REPLACEMENTS environment variable exists")
3f9bc7
+		env_uri_replacements = os.environ["FENCE_GCE_URI_REPLACEMENTS"]
3f9bc7
+		try:
3f9bc7
+			uri_replacements_json = json.loads(env_uri_replacements)
3f9bc7
+			if isinstance(uri_replacements_json, list):
3f9bc7
+				uri_replacements = uri_replacements_json
3f9bc7
+			else:
3f9bc7
+				logging.warning("FENCE_GCE_URI_REPLACEMENTS exists, but is not a JSON List")
3f9bc7
+		except ValueError as e:
3f9bc7
+			logging.warning("FENCE_GCE_URI_REPLACEMENTS exists but is not valid JSON")
3f9bc7
+	if "--baremetalsolution" in options:
3f9bc7
+		uri_replacements.append(
3f9bc7
+			{
3f9bc7
+				"matchlength": 4,
3f9bc7
+				"match": "https://compute.googleapis.com/compute/v1/projects/(.*)/zones/(.*)/instances/(.*)/reset(.*)",
3f9bc7
+				"replace": "https://baremetalsolution.googleapis.com/v1/projects/\\1/locations/\\2/instances/\\3:resetInstance\\4"
3f9bc7
+			})
3f9bc7
+	for uri_replacement in uri_replacements:
3f9bc7
+		# each uri_replacement should have matchlength, match, and replace
3f9bc7
+		if "matchlength" not in uri_replacement or "match" not in uri_replacement or "replace" not in uri_replacement:
3f9bc7
+			logging.warning("FENCE_GCE_URI_REPLACEMENTS missing matchlength, match, or replace in %s" % uri_replacement)
3f9bc7
+			continue
3f9bc7
+		match = re.match(uri_replacement["match"], http_request.uri)
3f9bc7
+		if match is None or len(match.groups()) != uri_replacement["matchlength"]:
3f9bc7
+			continue
3f9bc7
+		replaced_uri = re.sub(uri_replacement["match"], uri_replacement["replace"], http_request.uri)
3f9bc7
+		match = re.match("https:\/\/.*.googleapis.com", replaced_uri)
3f9bc7
+		if match is None or match.start() != 0:
3f9bc7
+			logging.warning("FENCE_GCE_URI_REPLACEMENTS replace is not "
3f9bc7
+				"targeting googleapis.com, ignoring it: %s" % replaced_uri)
3f9bc7
+			continue
3f9bc7
+		logging.debug("Replacing googleapis uri %s with %s" % (http_request.uri, replaced_uri))
3f9bc7
+		http_request.uri = replaced_uri
3f9bc7
+		break
3f9bc7
+	return http_request
3f9bc7
+
3f9bc7
+def retry_api_execute(options, http_request):
3f9bc7
+	replaced_http_request = replace_api_uri(options, http_request)
3f9bc7
+	retries = 3
3f9bc7
+	if options.get("--retries"):
3f9bc7
+		retries = int(options.get("--retries"))
3f9bc7
+	retry_sleep = 5
3f9bc7
+	if options.get("--retrysleep"):
3f9bc7
+		retry_sleep = int(options.get("--retrysleep"))
3f9bc7
+	retry = 0
3f9bc7
+	current_err = None
3f9bc7
+	while retry <= retries:
3f9bc7
+		if retry > 0:
3f9bc7
+			time.sleep(retry_sleep)
3f9bc7
+		try:
3f9bc7
+			return replaced_http_request.execute()
3f9bc7
+		except Exception as err:
3f9bc7
+			current_err = err
3f9bc7
+			logging.warning("Could not execute api call to: %s, retry: %s, "
3f9bc7
+				"err: %s" % (replaced_http_request.uri, retry, str(err)))
3f9bc7
+		retry += 1
3f9bc7
+	raise current_err
3f9bc7
 
3f9bc7
 
3f9bc7
 def translate_status(instance_status):
3f9bc7
@@ -43,86 +134,174 @@
3f9bc7
 
3f9bc7
 def get_nodes_list(conn, options):
3f9bc7
 	result = {}
3f9bc7
+	if "--zone" not in options:
3f9bc7
+		fail_fence_agent(options, "Failed: get_nodes_list: Please specify the --zone in the command")
3f9bc7
 	try:
3f9bc7
-		instanceList = conn.instances().list(project=options["--project"], zone=options["--zone"]).execute()
3f9bc7
-		for instance in instanceList["items"]:
3f9bc7
-			result[instance["id"]] = (instance["name"], translate_status(instance["status"]))
3f9bc7
+		for zone in options["--zone"].split(","):
3f9bc7
+			instanceList = retry_api_execute(options, conn.instances().list(
3f9bc7
+				project=options["--project"],
3f9bc7
+				zone=zone))
3f9bc7
+			for instance in instanceList["items"]:
3f9bc7
+				result[instance["id"]] = (instance["name"], translate_status(instance["status"]))
3f9bc7
 	except Exception as err:
3f9bc7
-		fail_usage("Failed: get_nodes_list: {}".format(str(err)))
3f9bc7
+		fail_fence_agent(options, "Failed: get_nodes_list: {}".format(str(err)))
3f9bc7
 
3f9bc7
 	return result
3f9bc7
 
3f9bc7
 
3f9bc7
 def get_power_status(conn, options):
3f9bc7
+	logging.debug("get_power_status")
3f9bc7
+	# if this is bare metal we need to just send back the opposite of the
3f9bc7
+	# requested action: if on send off, if off send on
3f9bc7
+	if "--baremetalsolution" in options:
3f9bc7
+		if options.get("--action") == "on":
3f9bc7
+			return "off"
3f9bc7
+		else:
3f9bc7
+			return "on"
3f9bc7
+	# If zone is not listed for an entry we attempt to get it automatically
3f9bc7
+	instance = options["--plug"]
3f9bc7
+	zone = get_zone(conn, options, instance) if "--plugzonemap" not in options else options["--plugzonemap"][instance]
3f9bc7
+	instance_status = get_instance_power_status(conn, options, instance, zone)
3f9bc7
+	# If any of the instances do not match the intended status we return the
3f9bc7
+	# the opposite status so that the fence agent can change it.
3f9bc7
+	if instance_status != options.get("--action"):
3f9bc7
+		return instance_status
3f9bc7
+
3f9bc7
+	return options.get("--action")
3f9bc7
+
3f9bc7
+
3f9bc7
+def get_instance_power_status(conn, options, instance, zone):
3f9bc7
 	try:
3f9bc7
-		instance = conn.instances().get(
3f9bc7
-				project=options["--project"],
3f9bc7
-				zone=options["--zone"],
3f9bc7
-				instance=options["--plug"]).execute()
3f9bc7
+		instance = retry_api_execute(
3f9bc7
+				options,
3f9bc7
+				conn.instances().get(project=options["--project"], zone=zone, instance=instance))
3f9bc7
 		return translate_status(instance["status"])
3f9bc7
 	except Exception as err:
3f9bc7
-		fail_usage("Failed: get_power_status: {}".format(str(err)))
3f9bc7
+		fail_fence_agent(options, "Failed: get_instance_power_status: {}".format(str(err)))
3f9bc7
 
3f9bc7
 
3f9bc7
-def wait_for_operation(conn, project, zone, operation):
3f9bc7
+def check_for_existing_operation(conn, options, instance, zone, operation_type):
3f9bc7
+	logging.debug("check_for_existing_operation")
3f9bc7
+	if "--baremetalsolution" in options:
3f9bc7
+		# There is no API for checking in progress operations
3f9bc7
+		return False
3f9bc7
+
3f9bc7
+	project = options["--project"]
3f9bc7
+	target_link = INSTANCE_LINK.format(project, zone, instance)
3f9bc7
+	query_filter = '(targetLink = "{}") AND (operationType = "{}") AND (status = "RUNNING")'.format(target_link, operation_type)
3f9bc7
+	result = retry_api_execute(
3f9bc7
+			options,
3f9bc7
+			conn.zoneOperations().list(project=project, zone=zone, filter=query_filter, maxResults=1))
3f9bc7
+
3f9bc7
+	if "items" in result and result["items"]:
3f9bc7
+		logging.info("Existing %s operation found", operation_type)
3f9bc7
+		return result["items"][0]
3f9bc7
+
3f9bc7
+
3f9bc7
+def wait_for_operation(conn, options, zone, operation):
3f9bc7
+	if 'name' not in operation:
3f9bc7
+		logging.warning('Cannot wait for operation to complete, the'
3f9bc7
+		' requested operation will continue asynchronously')
3f9bc7
+		return False
3f9bc7
+
3f9bc7
+	wait_time = 0
3f9bc7
+	project = options["--project"]
3f9bc7
 	while True:
3f9bc7
-		result = conn.zoneOperations().get(
3f9bc7
+		result = retry_api_execute(options, conn.zoneOperations().get(
3f9bc7
 			project=project,
3f9bc7
 			zone=zone,
3f9bc7
-			operation=operation['name']).execute()
3f9bc7
+			operation=operation['name']))
3f9bc7
 		if result['status'] == 'DONE':
3f9bc7
 			if 'error' in result:
3f9bc7
-				raise Exception(result['error'])
3f9bc7
-			return
3f9bc7
+				raise_fence_agent(options, result['error'])
3f9bc7
+			return True
3f9bc7
+
3f9bc7
+		if "--errortimeout" in options and wait_time > int(options["--errortimeout"]):
3f9bc7
+			raise_fence_agent(options, "Operation did not complete before the timeout.")
3f9bc7
+
3f9bc7
+		if "--warntimeout" in options and wait_time > int(options["--warntimeout"]):
3f9bc7
+			logging.warning("Operation did not complete before the timeout.")
3f9bc7
+			if "--runonwarn" in options:
3f9bc7
+				run_command(options, options["--runonwarn"])
3f9bc7
+			return False
3f9bc7
+
3f9bc7
+		wait_time = wait_time + 1
3f9bc7
 		time.sleep(1)
3f9bc7
 
3f9bc7
 
3f9bc7
 def set_power_status(conn, options):
3f9bc7
+	logging.debug("set_power_status")
3f9bc7
+	instance = options["--plug"]
3f9bc7
+	# If zone is not listed for an entry we attempt to get it automatically
3f9bc7
+	zone = get_zone(conn, options, instance) if "--plugzonemap" not in options else options["--plugzonemap"][instance]
3f9bc7
+	set_instance_power_status(conn, options, instance, zone, options["--action"])
3f9bc7
+
3f9bc7
+
3f9bc7
+def set_instance_power_status(conn, options, instance, zone, action):
3f9bc7
+	logging.info("Setting power status of %s in zone %s", instance, zone)
3f9bc7
+	project = options["--project"]
3f9bc7
+
3f9bc7
 	try:
3f9bc7
-		if options["--action"] == "off":
3f9bc7
-			logging.info("Issuing poweroff of %s in zone %s" % (options["--plug"], options["--zone"]))
3f9bc7
-			operation = conn.instances().stop(
3f9bc7
-					project=options["--project"],
3f9bc7
-					zone=options["--zone"],
3f9bc7
-					instance=options["--plug"]).execute()
3f9bc7
-			wait_for_operation(conn, options["--project"], options["--zone"], operation)
3f9bc7
-			logging.info("Poweroff of %s in zone %s complete" % (options["--plug"], options["--zone"]))
3f9bc7
-		elif options["--action"] == "on":
3f9bc7
-			logging.info("Issuing poweron of %s in zone %s" % (options["--plug"], options["--zone"]))
3f9bc7
-			operation = conn.instances().start(
3f9bc7
-					project=options["--project"],
3f9bc7
-					zone=options["--zone"],
3f9bc7
-					instance=options["--plug"]).execute()
3f9bc7
-			wait_for_operation(conn, options["--project"], options["--zone"], operation)
3f9bc7
-			logging.info("Poweron of %s in zone %s complete" % (options["--plug"], options["--zone"]))
3f9bc7
+		if action == "off":
3f9bc7
+			logging.info("Issuing poweroff of %s in zone %s", instance, zone)
3f9bc7
+			operation = check_for_existing_operation(conn, options, instance, zone, "stop")
3f9bc7
+			if operation and "--earlyexit" in options:
3f9bc7
+				return
3f9bc7
+			if not operation:
3f9bc7
+				operation = retry_api_execute(
3f9bc7
+						options,
3f9bc7
+						conn.instances().stop(project=project, zone=zone, instance=instance))
3f9bc7
+			logging.info("Poweroff command completed, waiting for the operation to complete")
3f9bc7
+			if wait_for_operation(conn, options, zone, operation):
3f9bc7
+				logging.info("Poweroff of %s in zone %s complete", instance, zone)
3f9bc7
+		elif action == "on":
3f9bc7
+			logging.info("Issuing poweron of %s in zone %s", instance, zone)
3f9bc7
+			operation = check_for_existing_operation(conn, options, instance, zone, "start")
3f9bc7
+			if operation and "--earlyexit" in options:
3f9bc7
+				return
3f9bc7
+			if not operation:
3f9bc7
+				operation = retry_api_execute(
3f9bc7
+						options,
3f9bc7
+						conn.instances().start(project=project, zone=zone, instance=instance))
3f9bc7
+			if wait_for_operation(conn, options, zone, operation):
3f9bc7
+				logging.info("Poweron of %s in zone %s complete", instance, zone)
3f9bc7
 	except Exception as err:
3f9bc7
-		fail_usage("Failed: set_power_status: {}".format(str(err)))
3f9bc7
-
3f9bc7
+		fail_fence_agent(options, "Failed: set_instance_power_status: {}".format(str(err)))
3f9bc7
 
3f9bc7
 def power_cycle(conn, options):
3f9bc7
+	logging.debug("power_cycle")
3f9bc7
+	instance = options["--plug"]
3f9bc7
+	# If zone is not listed for an entry we attempt to get it automatically
3f9bc7
+	zone = get_zone(conn, options, instance) if "--plugzonemap" not in options else options["--plugzonemap"][instance]
3f9bc7
+	return power_cycle_instance(conn, options, instance, zone)
3f9bc7
+
3f9bc7
+
3f9bc7
+def power_cycle_instance(conn, options, instance, zone):
3f9bc7
+	logging.info("Issuing reset of %s in zone %s", instance, zone)
3f9bc7
+	project = options["--project"]
3f9bc7
+
3f9bc7
 	try:
3f9bc7
-		logging.info('Issuing reset of %s in zone %s' % (options["--plug"], options["--zone"]))
3f9bc7
-		operation = conn.instances().reset(
3f9bc7
-				project=options["--project"],
3f9bc7
-				zone=options["--zone"],
3f9bc7
-				instance=options["--plug"]).execute()
3f9bc7
-		wait_for_operation(conn, options["--project"], options["--zone"], operation)
3f9bc7
-		logging.info('Reset of %s in zone %s complete' % (options["--plug"], options["--zone"]))
3f9bc7
+		operation = check_for_existing_operation(conn, options, instance, zone, "reset")
3f9bc7
+		if operation and "--earlyexit" in options:
3f9bc7
+			return True
3f9bc7
+		if not operation:
3f9bc7
+			operation = retry_api_execute(
3f9bc7
+					options,
3f9bc7
+					conn.instances().reset(project=project, zone=zone, instance=instance))
3f9bc7
+		logging.info("Reset command sent, waiting for the operation to complete")
3f9bc7
+		if wait_for_operation(conn, options, zone, operation):
3f9bc7
+			logging.info("Reset of %s in zone %s complete", instance, zone)
3f9bc7
 		return True
3f9bc7
 	except Exception as err:
3f9bc7
-		logging.error("Failed: power_cycle: {}".format(str(err)))
3f9bc7
-		return False
3f9bc7
-
3f9bc7
-
3f9bc7
-def get_instance(conn, project, zone, instance):
3f9bc7
-	request = conn.instances().get(
3f9bc7
-			project=project, zone=zone, instance=instance)
3f9bc7
-	return request.execute()
3f9bc7
+		logging.exception("Failed: power_cycle")
3f9bc7
+		raise err
3f9bc7
 
3f9bc7
 
3f9bc7
-def get_zone(conn, project, instance):
3f9bc7
+def get_zone(conn, options, instance):
3f9bc7
+	logging.debug("get_zone");
3f9bc7
+	project = options['--project']
3f9bc7
 	fl = 'name="%s"' % instance
3f9bc7
-	request = conn.instances().aggregatedList(project=project, filter=fl)
3f9bc7
+	request = replace_api_uri(options, conn.instances().aggregatedList(project=project, filter=fl))
3f9bc7
 	while request is not None:
3f9bc7
 		response = request.execute()
3f9bc7
 		zones = response.get('items', {})
3f9bc7
@@ -130,9 +309,9 @@
3f9bc7
 			for inst in zone.get('instances', []):
3f9bc7
 				if inst['name'] == instance:
3f9bc7
 					return inst['zone'].split("/")[-1]
3f9bc7
-		request = conn.instances().aggregatedList_next(
3f9bc7
-				previous_request=request, previous_response=response)
3f9bc7
-	raise Exception("Unable to find instance %s" % (instance))
3f9bc7
+		request = replace_api_uri(options, conn.instances().aggregatedList_next(
3f9bc7
+				previous_request=request, previous_response=response))
3f9bc7
+	raise_fence_agent(options, "Unable to find instance %s" % (instance))
3f9bc7
 
3f9bc7
 
3f9bc7
 def get_metadata(metadata_key, params=None, timeout=None):
3f9bc7
@@ -149,6 +328,7 @@
3f9bc7
 	Raises:
3f9bc7
 		urlerror.HTTPError: raises when the GET request fails.
3f9bc7
 	"""
3f9bc7
+	logging.debug("get_metadata");
3f9bc7
 	timeout = timeout or 60
3f9bc7
 	metadata_url = os.path.join(METADATA_SERVER, metadata_key)
3f9bc7
 	params = urlparse.urlencode(params or {})
3f9bc7
@@ -178,12 +358,50 @@
3f9bc7
 	all_opt["stackdriver-logging"] = {
3f9bc7
 		"getopt" : "",
3f9bc7
 		"longopt" : "stackdriver-logging",
3f9bc7
-		"help" : "--stackdriver-logging		Enable Logging to Stackdriver. Using stackdriver logging requires additional libraries (google-cloud-logging).",
3f9bc7
-		"shortdesc" : "Stackdriver-logging support. Requires additional libraries (google-cloud-logging).",
3f9bc7
-		"longdesc" : "If enabled IP failover logs will be posted to stackdriver logging. Using stackdriver logging requires additional libraries (google-cloud-logging).",
3f9bc7
+		"help" : "--stackdriver-logging          Enable Logging to Stackdriver",
3f9bc7
+		"shortdesc" : "Stackdriver-logging support.",
3f9bc7
+		"longdesc" : "If enabled IP failover logs will be posted to stackdriver logging.",
3f9bc7
 		"required" : "0",
3f9bc7
 		"order" : 4
3f9bc7
 	}
3f9bc7
+	all_opt["baremetalsolution"] = {
3f9bc7
+		"getopt" : "",
3f9bc7
+		"longopt" : "baremetalsolution",
3f9bc7
+		"help" : "--baremetalsolution            Enable on bare metal",
3f9bc7
+		"shortdesc" : "If enabled this is a bare metal offering from google.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"order" : 5
3f9bc7
+	}
3f9bc7
+	all_opt["apitimeout"] = {
3f9bc7
+		"getopt" : ":",
3f9bc7
+		"type" : "second",
3f9bc7
+		"longopt" : "apitimeout",
3f9bc7
+		"help" : "--apitimeout=[seconds]         Timeout to use for API calls",
3f9bc7
+		"shortdesc" : "Timeout in seconds to use for API calls, default is 60.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"default" : 60,
3f9bc7
+		"order" : 6
3f9bc7
+	}
3f9bc7
+	all_opt["retries"] = {
3f9bc7
+		"getopt" : ":",
3f9bc7
+		"type" : "integer",
3f9bc7
+		"longopt" : "retries",
3f9bc7
+		"help" : "--retries=[retries]            Number of retries on failure for API calls",
3f9bc7
+		"shortdesc" : "Number of retries on failure for API calls, default is 3.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"default" : 3,
3f9bc7
+		"order" : 7
3f9bc7
+	}
3f9bc7
+	all_opt["retrysleep"] = {
3f9bc7
+		"getopt" : ":",
3f9bc7
+		"type" : "second",
3f9bc7
+		"longopt" : "retrysleep",
3f9bc7
+		"help" : "--retrysleep=[seconds]         Time to sleep between API retries",
3f9bc7
+		"shortdesc" : "Time to sleep in seconds between API retries, default is 5.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"default" : 5,
3f9bc7
+		"order" : 8
3f9bc7
+	}
3f9bc7
 	all_opt["serviceaccount"] = {
3f9bc7
 		"getopt" : ":",
3f9bc7
 		"longopt" : "serviceaccount",
3f9bc7
@@ -192,13 +410,21 @@
3f9bc7
 		"required" : "0",
3f9bc7
 		"order" : 9
3f9bc7
 	}
3f9bc7
+	all_opt["plugzonemap"] = {
3f9bc7
+		"getopt" : ":",
3f9bc7
+		"longopt" : "plugzonemap",
3f9bc7
+		"help" : "--plugzonemap=[plugzonemap]    Comma separated zone map when fencing multiple plugs",
3f9bc7
+		"shortdesc" : "Comma separated zone map when fencing multiple plugs.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"order" : 10
3f9bc7
+	}
3f9bc7
 	all_opt["proxyhost"] = {
3f9bc7
 		"getopt" : ":",
3f9bc7
 		"longopt" : "proxyhost",
3f9bc7
 		"help" : "--proxyhost=[proxy_host]       The proxy host to use, if one is needed to access the internet (Example: 10.122.0.33)",
3f9bc7
 		"shortdesc" : "If a proxy is used for internet access, the proxy host should be specified.",
3f9bc7
 		"required" : "0",
3f9bc7
-		"order" : 10
3f9bc7
+		"order" : 11
3f9bc7
 	}
3f9bc7
 	all_opt["proxyport"] = {
3f9bc7
 		"getopt" : ":",
3f9bc7
@@ -207,7 +433,49 @@
3f9bc7
 		"help" : "--proxyport=[proxy_port]       The proxy port to use, if one is needed to access the internet (Example: 3127)",
3f9bc7
 		"shortdesc" : "If a proxy is used for internet access, the proxy port should be specified.",
3f9bc7
 		"required" : "0",
3f9bc7
-		"order" : 11
3f9bc7
+		"order" : 12
3f9bc7
+	}
3f9bc7
+	all_opt["earlyexit"] = {
3f9bc7
+		"getopt" : "",
3f9bc7
+		"longopt" : "earlyexit",
3f9bc7
+		"help" : "--earlyexit                    Return early if reset is already in progress",
3f9bc7
+		"shortdesc" : "If an existing reset operation is detected, the fence agent will return before the operation completes with a 0 return code.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"order" : 13
3f9bc7
+	}
3f9bc7
+	all_opt["warntimeout"] = {
3f9bc7
+		"getopt" : ":",
3f9bc7
+		"type" : "second",
3f9bc7
+		"longopt" : "warntimeout",
3f9bc7
+		"help" : "--warntimeout=[warn_timeout]   Timeout seconds before logging a warning and returning a 0 status code",
3f9bc7
+		"shortdesc" : "If the operation is not completed within the timeout, the cluster operations are allowed to continue.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"order" : 14
3f9bc7
+	}
3f9bc7
+	all_opt["errortimeout"] = {
3f9bc7
+		"getopt" : ":",
3f9bc7
+		"type" : "second",
3f9bc7
+		"longopt" : "errortimeout",
3f9bc7
+		"help" : "--errortimeout=[error_timeout] Timeout seconds before failing and returning a non-zero status code",
3f9bc7
+		"shortdesc" : "If the operation is not completed within the timeout, cluster is notified of the operation failure.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"order" : 15
3f9bc7
+	}
3f9bc7
+	all_opt["runonwarn"] = {
3f9bc7
+		"getopt" : ":",
3f9bc7
+		"longopt" : "runonwarn",
3f9bc7
+		"help" : "--runonwarn=[run_on_warn]      If a timeout occurs and warning is generated, run the supplied command",
3f9bc7
+		"shortdesc" : "If a timeout would occur while running the agent, then the supplied command is run.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"order" : 16
3f9bc7
+	}
3f9bc7
+	all_opt["runonfail"] = {
3f9bc7
+		"getopt" : ":",
3f9bc7
+		"longopt" : "runonfail",
3f9bc7
+		"help" : "--runonfail=[run_on_fail]      If a failure occurs, run the supplied command",
3f9bc7
+		"shortdesc" : "If a failure would occur while running the agent, then the supplied command is run.",
3f9bc7
+		"required" : "0",
3f9bc7
+		"order" : 17
3f9bc7
 	}
3f9bc7
 
3f9bc7
 
3f9bc7
@@ -215,7 +483,9 @@
3f9bc7
 	conn = None
3f9bc7
 
3f9bc7
 	device_opt = ["port", "no_password", "zone", "project", "stackdriver-logging",
3f9bc7
-		"method", "serviceaccount", "proxyhost", "proxyport"]
3f9bc7
+		"method", "baremetalsolution", "apitimeout", "retries", "retrysleep",
3f9bc7
+		"serviceaccount", "plugzonemap", "proxyhost", "proxyport", "earlyexit",
3f9bc7
+		"warntimeout", "errortimeout", "runonwarn", "runonfail"]
3f9bc7
 
3f9bc7
 	atexit.register(atexit_handler)
3f9bc7
 
3f9bc7
@@ -259,6 +529,11 @@
3f9bc7
 			logging.error('Couldn\'t import google.cloud.logging, '
3f9bc7
 				'disabling Stackdriver-logging support')
3f9bc7
 
3f9bc7
+  # if apitimeout is defined we set the socket timeout, if not we keep the
3f9bc7
+  # socket default which is 60s
3f9bc7
+	if options.get("--apitimeout"):
3f9bc7
+		socket.setdefaulttimeout(options["--apitimeout"])
3f9bc7
+
3f9bc7
 	# Prepare cli
3f9bc7
 	try:
3f9bc7
 		serviceaccount = options.get("--serviceaccount")
3f9bc7
@@ -291,20 +566,39 @@
3f9bc7
 			conn = googleapiclient.discovery.build(
3f9bc7
 				'compute', 'v1', credentials=credentials, cache_discovery=False)
3f9bc7
 	except Exception as err:
3f9bc7
-		fail_usage("Failed: Create GCE compute v1 connection: {}".format(str(err)))
3f9bc7
+		fail_fence_agent(options, "Failed: Create GCE compute v1 connection: {}".format(str(err)))
3f9bc7
 
3f9bc7
 	# Get project and zone
3f9bc7
 	if not options.get("--project"):
3f9bc7
 		try:
3f9bc7
 			options["--project"] = get_metadata('project/project-id')
3f9bc7
 		except Exception as err:
3f9bc7
-			fail_usage("Failed retrieving GCE project. Please provide --project option: {}".format(str(err)))
3f9bc7
+			fail_fence_agent(options, "Failed retrieving GCE project. Please provide --project option: {}".format(str(err)))
3f9bc7
 
3f9bc7
-	if not options.get("--zone"):
3f9bc7
-		try:
3f9bc7
-			options["--zone"] = get_zone(conn, options['--project'], options['--plug'])
3f9bc7
-		except Exception as err:
3f9bc7
-			fail_usage("Failed retrieving GCE zone. Please provide --zone option: {}".format(str(err)))
3f9bc7
+	if "--baremetalsolution" in options:
3f9bc7
+		options["--zone"] = "none"
3f9bc7
+
3f9bc7
+	# Populates zone automatically if missing from the command
3f9bc7
+	zones = [] if not "--zone" in options else options["--zone"].split(",")
3f9bc7
+	options["--plugzonemap"] = {}
3f9bc7
+	if "--plug" in options:
3f9bc7
+		for i, instance in enumerate(options["--plug"].split(",")):
3f9bc7
+			if len(zones) == 1:
3f9bc7
+				# If only one zone is specified, use it across all plugs
3f9bc7
+				options["--plugzonemap"][instance] = zones[0]
3f9bc7
+				continue
3f9bc7
+
3f9bc7
+			if len(zones) - 1 >= i:
3f9bc7
+				# If we have enough zones specified with the --zone flag use the zone at
3f9bc7
+				# the same index as the plug
3f9bc7
+				options["--plugzonemap"][instance] = zones[i]
3f9bc7
+				continue
3f9bc7
+
3f9bc7
+			try:
3f9bc7
+				# In this case we do not have a zone specified so we attempt to detect it
3f9bc7
+				options["--plugzonemap"][instance] = get_zone(conn, options, instance)
3f9bc7
+			except Exception as err:
3f9bc7
+				fail_fence_agent(options, "Failed retrieving GCE zone. Please provide --zone option: {}".format(str(err)))
3f9bc7
 
3f9bc7
 	# Operate the fencing device
3f9bc7
 	result = fence_action(conn, options, set_power_status, get_power_status, get_nodes_list, power_cycle)