Blame SOURCES/bz2042070-2-gcp-vpc-move-vip-add-retries.patch

3717f3
From 3ae6d8f0a34d099945d9bf005ed45dbfe9452202 Mon Sep 17 00:00:00 2001
3717f3
From: kj1724 <78624900+kj1724@users.noreply.github.com>
3717f3
Date: Wed, 28 Apr 2021 10:22:38 -0400
3717f3
Subject: [PATCH] gcp-vpc-move-vip.in: Adds retries
3717f3
3717f3
If the cluster fails a monitoring event, it will try to restart the resource. If the resource agent makes an API/metadata call that fails at that time, the resource will be considered "failed", but in certain case also "unconfigured", which prevents further operations.
3717f3
3717f3
These changes can help the agent recover on certain intermittent failures.
3717f3
---
3717f3
 heartbeat/gcp-vpc-move-vip.in | 62 ++++++++++++++++++++---------------
3717f3
 1 file changed, 35 insertions(+), 27 deletions(-)
3717f3
3717f3
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
3717f3
index bbbd87b7a9..c411555110 100755
3717f3
--- a/heartbeat/gcp-vpc-move-vip.in
3717f3
+++ b/heartbeat/gcp-vpc-move-vip.in
3717f3
@@ -50,6 +50,8 @@ REMOVE = 1
3717f3
 CONN = None
3717f3
 THIS_VM = None
3717f3
 ALIAS = None
3717f3
+MAX_RETRIES = 3
3717f3
+RETRY_BACKOFF_SECS = 1
3717f3
 METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
3717f3
 METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
3717f3
 METADATA = \
3717f3
@@ -111,18 +113,37 @@ def get_metadata(metadata_key, params=None, timeout=None):
3717f3
 
3717f3
   Returns:
3717f3
     HTTP response from the GET request.
3717f3
-
3717f3
-  Raises:
3717f3
-    urlerror.HTTPError: raises when the GET request fails.
3717f3
   """
3717f3
-  timeout = timeout or 60
3717f3
-  metadata_url = os.path.join(METADATA_SERVER, metadata_key)
3717f3
-  params = urlparse.urlencode(params or {})
3717f3
-  url = '%s?%s' % (metadata_url, params)
3717f3
-  request = urlrequest.Request(url, headers=METADATA_HEADERS)
3717f3
-  request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
3717f3
-  return request_opener.open(
3717f3
-      request, timeout=timeout * 1.1).read().decode("utf-8")
3717f3
+  for i in range(MAX_RETRIES):
3717f3
+    try:
3717f3
+      timeout = timeout or 60
3717f3
+      metadata_url = os.path.join(METADATA_SERVER, metadata_key)
3717f3
+      params = urlparse.urlencode(params or {})
3717f3
+      url = '%s?%s' % (metadata_url, params)
3717f3
+      request = urlrequest.Request(url, headers=METADATA_HEADERS)
3717f3
+      request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
3717f3
+      return request_opener.open(
3717f3
+          request, timeout=timeout * 1.1).read().decode("utf-8")
3717f3
+    except Exception as e:
3717f3
+      logger.error('Couldn\'t get instance name, is this running inside GCE?: '
3717f3
+                   + str(e))
3717f3
+      time.sleep(RETRY_BACKOFF_SECS * (i + 1))
3717f3
+
3717f3
+  # If the retries are exhausted we exit with a generic error.
3717f3
+  sys.exit(OCF_ERR_GENERIC)
3717f3
+
3717f3
+
3717f3
+def create_api_connection():
3717f3
+  for i in range(MAX_RETRIES):
3717f3
+    try:
3717f3
+      return googleapiclient.discovery.build('compute', 'v1',
3717f3
+                                             cache_discovery=False)
3717f3
+    except Exception as e:
3717f3
+      logger.error('Couldn\'t connect with google api: ' + str(e))
3717f3
+      time.sleep(RETRY_BACKOFF_SECS * (i + 1))
3717f3
+
3717f3
+  # If the retries are exhausted we exit with a generic error.
3717f3
+  sys.exit(OCF_ERR_GENERIC)
3717f3
 
3717f3
 
3717f3
 def get_instance(project, zone, instance):
3717f3
@@ -358,24 +379,11 @@ def gcp_alias_status(alias):
3717f3
 
3717f3
 def validate():
3717f3
   global ALIAS
3717f3
-  global CONN
3717f3
   global THIS_VM
3717f3
+  global CONN
3717f3
 
3717f3
-  # Populate global vars
3717f3
-  try:
3717f3
-    CONN = googleapiclient.discovery.build('compute', 'v1',
3717f3
-                                           cache_discovery=False)
3717f3
-  except Exception as e:
3717f3
-    logger.error('Couldn\'t connect with google api: ' + str(e))
3717f3
-    sys.exit(OCF_ERR_CONFIGURED)
3717f3
-
3717f3
-  try:
3717f3
-    THIS_VM = get_metadata('instance/name')
3717f3
-  except Exception as e:
3717f3
-    logger.error('Couldn\'t get instance name, is this running inside GCE?: '
3717f3
-                 + str(e))
3717f3
-    sys.exit(OCF_ERR_CONFIGURED)
3717f3
-
3717f3
+  CONN = create_api_connection()
3717f3
+  THIS_VM = get_metadata('instance/name')
3717f3
   ALIAS = os.environ.get('OCF_RESKEY_alias_ip')
3717f3
   if not ALIAS:
3717f3
     logger.error('Missing alias_ip parameter')