Blame SOURCES/bz1816203-fence_aws-1-fix-race-condition.patch

b7eff9
From 1742baf17954c58a84b9c668a617bac78303ce95 Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Tue, 17 Mar 2020 13:18:38 +0000
b7eff9
Subject: [PATCH 1/9] fence_aws: Fix fence race condition by checking local
b7eff9
 instance status
b7eff9
b7eff9
---
b7eff9
 agents/aws/fence_aws.py | 37 ++++++++++++++++++++++++++++++++++++-
b7eff9
 1 file changed, 36 insertions(+), 1 deletion(-)
b7eff9
b7eff9
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
b7eff9
index 4a4d9de2..f37f68d6 100644
b7eff9
--- a/agents/aws/fence_aws.py
b7eff9
+++ b/agents/aws/fence_aws.py
b7eff9
@@ -3,6 +3,7 @@
b7eff9
 import sys, re
b7eff9
 import logging
b7eff9
 import atexit
b7eff9
+import requests
b7eff9
 sys.path.append("@FENCEAGENTSLIBDIR@")
b7eff9
 from fencing import *
b7eff9
 from fencing import fail, fail_usage, run_delay, EC_STATUS
b7eff9
@@ -10,6 +11,17 @@
b7eff9
 import boto3
b7eff9
 from botocore.exceptions import ClientError, EndpointConnectionError, NoRegionError
b7eff9
 
b7eff9
+def get_instance_id():
b7eff9
+	try:
b7eff9
+		r = requests.get('http://169.254.169.254/latest/meta-data/instance-id')
b7eff9
+		return r.content
b7eff9
+	except HTTPError as http_err:
b7eff9
+		logging.error('HTTP error occurred while trying to access EC2 metadata server: %s', http_err)
b7eff9
+	except Exception as err:
b7eff9
+		logging.error('A fatal error occurred while trying to access EC2 metadata server: %s', err)
b7eff9
+	return None
b7eff9
+	
b7eff9
+
b7eff9
 def get_nodes_list(conn, options):
b7eff9
 	result = {}
b7eff9
 	try:
b7eff9
@@ -45,10 +57,33 @@ def get_power_status(conn, options):
b7eff9
 		logging.error("Failed to get power status: %s", e)
b7eff9
 		fail(EC_STATUS)
b7eff9
 
b7eff9
+def get_self_power_status(conn, options):
b7eff9
+	try:
b7eff9
+		instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [instance_id]}])
b7eff9
+		state = list(instance)[0].state["Name"]
b7eff9
+		if state == "running":
b7eff9
+			logging.debug("Captured my (%s) state and it %s - returning OK - Proceeding with fencing",instance_id,state.upper())
b7eff9
+			return "ok"
b7eff9
+		else:
b7eff9
+			logging.debug("Captured my (%s) state it is %s - returning Alert - Unable to fence other nodes",instance_id,state.upper())
b7eff9
+			return "alert"
b7eff9
+	
b7eff9
+	except ClientError:
b7eff9
+		fail_usage("Failed: Incorrect Access Key or Secret Key.")
b7eff9
+	except EndpointConnectionError:
b7eff9
+		fail_usage("Failed: Incorrect Region.")
b7eff9
+	except IndexError:
b7eff9
+		return "fail"
b7eff9
+
b7eff9
 def set_power_status(conn, options):
b7eff9
+	my_instance = get_instance_id()
b7eff9
 	try:
b7eff9
 		if (options["--action"]=="off"):
b7eff9
-			conn.instances.filter(InstanceIds=[options["--plug"]]).stop(Force=True)
b7eff9
+			if (get_self_power_status(conn,myinstance) == "ok"):
b7eff9
+				logging.info("Called StopInstance API call for %s", options["--plug"])
b7eff9
+				conn.instances.filter(InstanceIds=[options["--plug"]]).stop(Force=True)
b7eff9
+			else:
b7eff9
+				logging.info("Skipping fencing as instance is not in running status")
b7eff9
 		elif (options["--action"]=="on"):
b7eff9
 			conn.instances.filter(InstanceIds=[options["--plug"]]).start()
b7eff9
 	except Exception as e:
b7eff9
b7eff9
From 45e429b3132ebc9e78121c3fbb15f0bf46845a59 Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Tue, 17 Mar 2020 13:28:34 +0000
b7eff9
Subject: [PATCH 2/9] fence_aws: Use local logger and improve logging
b7eff9
 experience
b7eff9
b7eff9
---
b7eff9
 agents/aws/fence_aws.py | 34 ++++++++++++++++++++++++++--------
b7eff9
 1 file changed, 26 insertions(+), 8 deletions(-)
b7eff9
b7eff9
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
b7eff9
index f37f68d6..b0b6685a 100644
b7eff9
--- a/agents/aws/fence_aws.py
b7eff9
+++ b/agents/aws/fence_aws.py
b7eff9
@@ -6,7 +6,7 @@
b7eff9
 import requests
b7eff9
 sys.path.append("@FENCEAGENTSLIBDIR@")
b7eff9
 from fencing import *
b7eff9
-from fencing import fail, fail_usage, run_delay, EC_STATUS
b7eff9
+from fencing import fail, fail_usage, run_delay, EC_STATUS, SyslogLibHandler
b7eff9
 
b7eff9
 import boto3
b7eff9
 from botocore.exceptions import ClientError, EndpointConnectionError, NoRegionError
b7eff9
@@ -16,13 +16,14 @@ def get_instance_id():
b7eff9
 		r = requests.get('http://169.254.169.254/latest/meta-data/instance-id')
b7eff9
 		return r.content
b7eff9
 	except HTTPError as http_err:
b7eff9
-		logging.error('HTTP error occurred while trying to access EC2 metadata server: %s', http_err)
b7eff9
+		logger.error('HTTP error occurred while trying to access EC2 metadata server: %s', http_err)
b7eff9
 	except Exception as err:
b7eff9
-		logging.error('A fatal error occurred while trying to access EC2 metadata server: %s', err)
b7eff9
+		logger.error('A fatal error occurred while trying to access EC2 metadata server: %s', err)
b7eff9
 	return None
b7eff9
 	
b7eff9
 
b7eff9
 def get_nodes_list(conn, options):
b7eff9
+	logger.info("Starting monitor operation")
b7eff9
 	result = {}
b7eff9
 	try:
b7eff9
 		for instance in conn.instances.all():
b7eff9
@@ -32,14 +33,16 @@ def get_nodes_list(conn, options):
b7eff9
 	except EndpointConnectionError:
b7eff9
 		fail_usage("Failed: Incorrect Region.")
b7eff9
 	except Exception as e:
b7eff9
-		logging.error("Failed to get node list: %s", e)
b7eff9
-
b7eff9
+		logger.error("Failed to get node list: %s", e)
b7eff9
+	logger.debug("Monitor operation OK: %s",result)
b7eff9
 	return result
b7eff9
 
b7eff9
 def get_power_status(conn, options):
b7eff9
+	logger.debug("Starting status operation")
b7eff9
 	try:
b7eff9
 		instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [options["--plug"]]}])
b7eff9
 		state = list(instance)[0].state["Name"]
b7eff9
+		logger.info("Status operation for EC2 instance %s returned state: %s",options["--plug"],state.upper())
b7eff9
 		if state == "running":
b7eff9
 			return "on"
b7eff9
 		elif state == "stopped":
b7eff9
@@ -80,14 +83,14 @@ def set_power_status(conn, options):
b7eff9
 	try:
b7eff9
 		if (options["--action"]=="off"):
b7eff9
 			if (get_self_power_status(conn,myinstance) == "ok"):
b7eff9
-				logging.info("Called StopInstance API call for %s", options["--plug"])
b7eff9
 				conn.instances.filter(InstanceIds=[options["--plug"]]).stop(Force=True)
b7eff9
+				logger.info("Called StopInstance API call for %s", options["--plug"])
b7eff9
 			else:
b7eff9
-				logging.info("Skipping fencing as instance is not in running status")
b7eff9
+				logger.info("Skipping fencing as instance is not in running status")
b7eff9
 		elif (options["--action"]=="on"):
b7eff9
 			conn.instances.filter(InstanceIds=[options["--plug"]]).start()
b7eff9
 	except Exception as e:
b7eff9
-		logging.error("Failed to power %s %s: %s", \
b7eff9
+		logger.error("Failed to power %s %s: %s", \
b7eff9
 				options["--action"], options["--plug"], e)
b7eff9
 
b7eff9
 def define_new_opts():
b7eff9
@@ -142,6 +145,13 @@ def main():
b7eff9
 
b7eff9
 	run_delay(options)
b7eff9
 
b7eff9
+	if options.get("--verbose") is not None:
b7eff9
+		lh = logging.FileHandler('/var/log/fence_aws_debug.log')
b7eff9
+		logger.addHandler(lh)
b7eff9
+		lhf = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
b7eff9
+		lh.setFormatter(lhf)
b7eff9
+		logger.setLevel(logging.DEBUG)
b7eff9
+
b7eff9
 	region = options.get("--region")
b7eff9
 	access_key = options.get("--access-key")
b7eff9
 	secret_key = options.get("--secret-key")
b7eff9
@@ -157,4 +167,12 @@ def main():
b7eff9
 	sys.exit(result)
b7eff9
 
b7eff9
 if __name__ == "__main__":
b7eff9
+
b7eff9
+	logger = logging.getLogger("fence_aws")
b7eff9
+	logger.propagate = False
b7eff9
+	logger.setLevel(logging.INFO)
b7eff9
+	logger.addHandler(SyslogLibHandler())
b7eff9
+	logger.getLogger('botocore.vendored').propagate = False
b7eff9
+	
b7eff9
+
b7eff9
 	main()
b7eff9
b7eff9
From 00569921597b8007c67296ab8332747baf1e6fae Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Tue, 17 Mar 2020 13:33:02 +0000
b7eff9
Subject: [PATCH 3/9] fence_aws: Decouple boto3 and botocore debug logging from
b7eff9
 local logging
b7eff9
b7eff9
---
b7eff9
 agents/aws/fence_aws.py | 26 +++++++++++++++++++++++++-
b7eff9
 1 file changed, 25 insertions(+), 1 deletion(-)
b7eff9
b7eff9
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
b7eff9
index b0b6685a..11714315 100644
b7eff9
--- a/agents/aws/fence_aws.py
b7eff9
+++ b/agents/aws/fence_aws.py
b7eff9
@@ -118,18 +118,27 @@ def define_new_opts():
b7eff9
 		"required" : "0",
b7eff9
 		"order" : 4
b7eff9
 	}
b7eff9
+	all_opt["boto3_debug"] = {
b7eff9
+		"getopt" : "b:",
b7eff9
+		"longopt" : "boto3_debug",
b7eff9
+		"help" : "-b, --boto3_debug=on|off      Boto3 and Botocore library debug logging",
b7eff9
+		"shortdesc": "Boto Lib debug",
b7eff9
+		"required": "0",
b7eff9
+		"order": 5
b7eff9
+	}
b7eff9
 
b7eff9
 # Main agent method
b7eff9
 def main():
b7eff9
 	conn = None
b7eff9
 
b7eff9
-	device_opt = ["port", "no_password", "region", "access_key", "secret_key"]
b7eff9
+	device_opt = ["port", "no_password", "region", "access_key", "secret_key", "boto3_debug"]
b7eff9
 
b7eff9
 	atexit.register(atexit_handler)
b7eff9
 
b7eff9
 	define_new_opts()
b7eff9
 
b7eff9
 	all_opt["power_timeout"]["default"] = "60"
b7eff9
+	all_opt["boto3_debug"]["default"] = "off"
b7eff9
 
b7eff9
 	options = check_input(device_opt, process_input(device_opt))
b7eff9
 
b7eff9
@@ -151,6 +160,21 @@ def main():
b7eff9
 		lhf = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
b7eff9
 		lh.setFormatter(lhf)
b7eff9
 		logger.setLevel(logging.DEBUG)
b7eff9
+	
b7eff9
+	if options["--boto3_debug"] != "on":
b7eff9
+		boto3.set_stream_logger('boto3',logging.INFO)
b7eff9
+		boto3.set_stream_logger('botocore',logging.INFO)
b7eff9
+		logging.getLogger('botocore').propagate = False
b7eff9
+		logging.getLogger('boto3').propagate = False
b7eff9
+	else:
b7eff9
+		log_format = logging.Formatter('%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
b7eff9
+		logging.getLogger('botocore').propagate = False
b7eff9
+		logging.getLogger('boto3').propagate = False
b7eff9
+		fdh = logging.FileHandler('/var/log/fence_aws_boto3.log')
b7eff9
+		fdh.setFormatter(log_format)
b7eff9
+		logging.getLogger('boto3').addHandler(fdh)
b7eff9
+		logging.getLogger('botocore').addHandler(fdh)
b7eff9
+		logging.debug("Boto debug level is %s and sending debug info to /var/log/fence_aws_boto3.log", options["--boto3_debug"])
b7eff9
 
b7eff9
 	region = options.get("--region")
b7eff9
 	access_key = options.get("--access-key")
b7eff9
b7eff9
From ed309bd51dfd5e0fed30156e7a312d5b5a8f4bd4 Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Thu, 19 Mar 2020 16:02:47 +0000
b7eff9
Subject: [PATCH 4/9] fence_aws: Fix typos and variable names
b7eff9
b7eff9
---
b7eff9
 agents/aws/fence_aws.py | 6 +++---
b7eff9
 1 file changed, 3 insertions(+), 3 deletions(-)
b7eff9
b7eff9
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
b7eff9
index 11714315..207631e8 100644
b7eff9
--- a/agents/aws/fence_aws.py
b7eff9
+++ b/agents/aws/fence_aws.py
b7eff9
@@ -60,7 +60,7 @@ def get_power_status(conn, options):
b7eff9
 		logging.error("Failed to get power status: %s", e)
b7eff9
 		fail(EC_STATUS)
b7eff9
 
b7eff9
-def get_self_power_status(conn, options):
b7eff9
+def get_self_power_status(conn, instance_id):
b7eff9
 	try:
b7eff9
 		instance = conn.instances.filter(Filters=[{"Name": "instance-id", "Values": [instance_id]}])
b7eff9
 		state = list(instance)[0].state["Name"]
b7eff9
@@ -82,7 +82,7 @@ def set_power_status(conn, options):
b7eff9
 	my_instance = get_instance_id()
b7eff9
 	try:
b7eff9
 		if (options["--action"]=="off"):
b7eff9
-			if (get_self_power_status(conn,myinstance) == "ok"):
b7eff9
+			if (get_self_power_status(conn,my_instance) == "ok"):
b7eff9
 				conn.instances.filter(InstanceIds=[options["--plug"]]).stop(Force=True)
b7eff9
 				logger.info("Called StopInstance API call for %s", options["--plug"])
b7eff9
 			else:
b7eff9
@@ -196,7 +196,7 @@ def main():
b7eff9
 	logger.propagate = False
b7eff9
 	logger.setLevel(logging.INFO)
b7eff9
 	logger.addHandler(SyslogLibHandler())
b7eff9
-	logger.getLogger('botocore.vendored').propagate = False
b7eff9
+	logging.getLogger('botocore.vendored').propagate = False
b7eff9
 	
b7eff9
 
b7eff9
 	main()
b7eff9
b7eff9
From 624c652a95a676286af408898186186b7d7fcf55 Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Thu, 19 Mar 2020 16:58:45 +0000
b7eff9
Subject: [PATCH 5/9] fence_aws: Missing brackets on boto3_debug metadata
b7eff9
b7eff9
---
b7eff9
 agents/aws/fence_aws.py | 2 +-
b7eff9
 1 file changed, 1 insertion(+), 1 deletion(-)
b7eff9
b7eff9
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
b7eff9
index 207631e8..8916f4a0 100644
b7eff9
--- a/agents/aws/fence_aws.py
b7eff9
+++ b/agents/aws/fence_aws.py
b7eff9
@@ -121,7 +121,7 @@ def define_new_opts():
b7eff9
 	all_opt["boto3_debug"] = {
b7eff9
 		"getopt" : "b:",
b7eff9
 		"longopt" : "boto3_debug",
b7eff9
-		"help" : "-b, --boto3_debug=on|off      Boto3 and Botocore library debug logging",
b7eff9
+		"help" : "-b, --boto3_debug=[on|off]      Boto3 and Botocore library debug logging",
b7eff9
 		"shortdesc": "Boto Lib debug",
b7eff9
 		"required": "0",
b7eff9
 		"order": 5
b7eff9
b7eff9
From 7c641a6885c4ab67b7739a43892d92d95a6f566c Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Thu, 19 Mar 2020 17:04:31 +0000
b7eff9
Subject: [PATCH 6/9] fence_aws: Fix travis build #1
b7eff9
b7eff9
---
b7eff9
 agents/aws/fence_aws.py | 2 +-
b7eff9
 1 file changed, 1 insertion(+), 1 deletion(-)
b7eff9
b7eff9
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
b7eff9
index 8916f4a0..f41a47e4 100644
b7eff9
--- a/agents/aws/fence_aws.py
b7eff9
+++ b/agents/aws/fence_aws.py
b7eff9
@@ -121,7 +121,7 @@ def define_new_opts():
b7eff9
 	all_opt["boto3_debug"] = {
b7eff9
 		"getopt" : "b:",
b7eff9
 		"longopt" : "boto3_debug",
b7eff9
-		"help" : "-b, --boto3_debug=[on|off]      Boto3 and Botocore library debug logging",
b7eff9
+		"help" : "-b, --boto3_debug=[option]      Boto3 and Botocore library debug logging",
b7eff9
 		"shortdesc": "Boto Lib debug",
b7eff9
 		"required": "0",
b7eff9
 		"order": 5
b7eff9
b7eff9
From 257af7ccc9789646adc7abf1e7dbac744b756071 Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Fri, 20 Mar 2020 10:59:56 +0000
b7eff9
Subject: [PATCH 7/9] fence_aws: Updated metadata XML file
b7eff9
b7eff9
---
b7eff9
 tests/data/metadata/fence_aws.xml | 5 +++++
b7eff9
 1 file changed, 5 insertions(+)
b7eff9
b7eff9
diff --git a/tests/data/metadata/fence_aws.xml b/tests/data/metadata/fence_aws.xml
b7eff9
index 5e5d5d99..acfebb61 100644
b7eff9
--- a/tests/data/metadata/fence_aws.xml
b7eff9
+++ b/tests/data/metadata/fence_aws.xml
b7eff9
@@ -36,6 +36,11 @@ For instructions see: https://boto3.readthedocs.io/en/latest/guide/quickstart.ht
b7eff9
 		<content type="string"  />
b7eff9
 		<shortdesc lang="en">Secret Key.</shortdesc>
b7eff9
 	</parameter>
b7eff9
+	<parameter name="boto3_debug" unique="0" required="0">
b7eff9
+		<getopt mixed="-b, --boto3_debug=[option]" />
b7eff9
+		<content type="string" default="off"  />
b7eff9
+		<shortdesc lang="en">Boto Lib debug</shortdesc>
b7eff9
+	</parameter>
b7eff9
 	<parameter name="quiet" unique="0" required="0">
b7eff9
 		<getopt mixed="-q, --quiet" />
b7eff9
 		<content type="boolean"  />
b7eff9
b7eff9
From 8f78bc19356b5e07d0021aaf7da3fc4e712e00f0 Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Fri, 20 Mar 2020 12:13:16 +0000
b7eff9
Subject: [PATCH 8/9] fence_aws: Moving logger config next to import statements
b7eff9
 for visibility
b7eff9
b7eff9
---
b7eff9
 agents/aws/fence_aws.py | 13 ++++++-------
b7eff9
 1 file changed, 6 insertions(+), 7 deletions(-)
b7eff9
b7eff9
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
b7eff9
index f41a47e4..72fb8843 100644
b7eff9
--- a/agents/aws/fence_aws.py
b7eff9
+++ b/agents/aws/fence_aws.py
b7eff9
@@ -11,6 +11,12 @@
b7eff9
 import boto3
b7eff9
 from botocore.exceptions import ClientError, EndpointConnectionError, NoRegionError
b7eff9
 
b7eff9
+logger = logging.getLogger("fence_aws")
b7eff9
+logger.propagate = False
b7eff9
+logger.setLevel(logging.INFO)
b7eff9
+logger.addHandler(SyslogLibHandler())
b7eff9
+logging.getLogger('botocore.vendored').propagate = False
b7eff9
+	
b7eff9
 def get_instance_id():
b7eff9
 	try:
b7eff9
 		r = requests.get('http://169.254.169.254/latest/meta-data/instance-id')
b7eff9
@@ -192,11 +198,4 @@ def main():
b7eff9
 
b7eff9
 if __name__ == "__main__":
b7eff9
 
b7eff9
-	logger = logging.getLogger("fence_aws")
b7eff9
-	logger.propagate = False
b7eff9
-	logger.setLevel(logging.INFO)
b7eff9
-	logger.addHandler(SyslogLibHandler())
b7eff9
-	logging.getLogger('botocore.vendored').propagate = False
b7eff9
-	
b7eff9
-
b7eff9
 	main()
b7eff9
b7eff9
From 570a05c425fe55008c8892ebaad8a73d36143909 Mon Sep 17 00:00:00 2001
b7eff9
From: Guilherme Felix <fguilher@amazon.com>
b7eff9
Date: Fri, 20 Mar 2020 14:17:55 +0000
b7eff9
Subject: [PATCH 9/9] fence_aws: Remove empty line
b7eff9
b7eff9
---
b7eff9
 agents/aws/fence_aws.py | 3 +--
b7eff9
 1 file changed, 1 insertion(+), 2 deletions(-)
b7eff9
b7eff9
diff --git a/agents/aws/fence_aws.py b/agents/aws/fence_aws.py
b7eff9
index 72fb8843..ed55f390 100644
b7eff9
--- a/agents/aws/fence_aws.py
b7eff9
+++ b/agents/aws/fence_aws.py
b7eff9
@@ -197,5 +197,4 @@ def main():
b7eff9
 	sys.exit(result)
b7eff9
 
b7eff9
 if __name__ == "__main__":
b7eff9
-
b7eff9
-	main()
b7eff9
+	main()
b7eff9
\ No newline at end of file