|
|
4dce39 |
From 3ae6d8f0a34d099945d9bf005ed45dbfe9452202 Mon Sep 17 00:00:00 2001
|
|
|
4dce39 |
From: kj1724 <78624900+kj1724@users.noreply.github.com>
|
|
|
4dce39 |
Date: Wed, 28 Apr 2021 10:22:38 -0400
|
|
|
4dce39 |
Subject: [PATCH] gcp-vpc-move-vip.in: Adds retries
|
|
|
4dce39 |
|
|
|
4dce39 |
If the cluster fails a monitoring event, it will try to restart the resource. If the resource agent makes an API/metadata call that fails at that time, the resource will be considered "failed", but in certain case also "unconfigured", which prevents further operations.
|
|
|
4dce39 |
|
|
|
4dce39 |
These changes can help the agent recover on certain intermittent failures.
|
|
|
4dce39 |
---
|
|
|
4dce39 |
heartbeat/gcp-vpc-move-vip.in | 62 ++++++++++++++++++++---------------
|
|
|
4dce39 |
1 file changed, 35 insertions(+), 27 deletions(-)
|
|
|
4dce39 |
|
|
|
4dce39 |
diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in
|
|
|
4dce39 |
index bbbd87b7a9..c411555110 100755
|
|
|
4dce39 |
--- a/heartbeat/gcp-vpc-move-vip.in
|
|
|
4dce39 |
+++ b/heartbeat/gcp-vpc-move-vip.in
|
|
|
4dce39 |
@@ -50,6 +50,8 @@ REMOVE = 1
|
|
|
4dce39 |
CONN = None
|
|
|
4dce39 |
THIS_VM = None
|
|
|
4dce39 |
ALIAS = None
|
|
|
4dce39 |
+MAX_RETRIES = 3
|
|
|
4dce39 |
+RETRY_BACKOFF_SECS = 1
|
|
|
4dce39 |
METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/'
|
|
|
4dce39 |
METADATA_HEADERS = {'Metadata-Flavor': 'Google'}
|
|
|
4dce39 |
METADATA = \
|
|
|
4dce39 |
@@ -111,18 +113,37 @@ def get_metadata(metadata_key, params=None, timeout=None):
|
|
|
4dce39 |
|
|
|
4dce39 |
Returns:
|
|
|
4dce39 |
HTTP response from the GET request.
|
|
|
4dce39 |
-
|
|
|
4dce39 |
- Raises:
|
|
|
4dce39 |
- urlerror.HTTPError: raises when the GET request fails.
|
|
|
4dce39 |
"""
|
|
|
4dce39 |
- timeout = timeout or 60
|
|
|
4dce39 |
- metadata_url = os.path.join(METADATA_SERVER, metadata_key)
|
|
|
4dce39 |
- params = urlparse.urlencode(params or {})
|
|
|
4dce39 |
- url = '%s?%s' % (metadata_url, params)
|
|
|
4dce39 |
- request = urlrequest.Request(url, headers=METADATA_HEADERS)
|
|
|
4dce39 |
- request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
|
|
|
4dce39 |
- return request_opener.open(
|
|
|
4dce39 |
- request, timeout=timeout * 1.1).read().decode("utf-8")
|
|
|
4dce39 |
+ for i in range(MAX_RETRIES):
|
|
|
4dce39 |
+ try:
|
|
|
4dce39 |
+ timeout = timeout or 60
|
|
|
4dce39 |
+ metadata_url = os.path.join(METADATA_SERVER, metadata_key)
|
|
|
4dce39 |
+ params = urlparse.urlencode(params or {})
|
|
|
4dce39 |
+ url = '%s?%s' % (metadata_url, params)
|
|
|
4dce39 |
+ request = urlrequest.Request(url, headers=METADATA_HEADERS)
|
|
|
4dce39 |
+ request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({}))
|
|
|
4dce39 |
+ return request_opener.open(
|
|
|
4dce39 |
+ request, timeout=timeout * 1.1).read().decode("utf-8")
|
|
|
4dce39 |
+ except Exception as e:
|
|
|
4dce39 |
+ logger.error('Couldn\'t get instance name, is this running inside GCE?: '
|
|
|
4dce39 |
+ + str(e))
|
|
|
4dce39 |
+ time.sleep(RETRY_BACKOFF_SECS * (i + 1))
|
|
|
4dce39 |
+
|
|
|
4dce39 |
+ # If the retries are exhausted we exit with a generic error.
|
|
|
4dce39 |
+ sys.exit(OCF_ERR_GENERIC)
|
|
|
4dce39 |
+
|
|
|
4dce39 |
+
|
|
|
4dce39 |
+def create_api_connection():
|
|
|
4dce39 |
+ for i in range(MAX_RETRIES):
|
|
|
4dce39 |
+ try:
|
|
|
4dce39 |
+ return googleapiclient.discovery.build('compute', 'v1',
|
|
|
4dce39 |
+ cache_discovery=False)
|
|
|
4dce39 |
+ except Exception as e:
|
|
|
4dce39 |
+ logger.error('Couldn\'t connect with google api: ' + str(e))
|
|
|
4dce39 |
+ time.sleep(RETRY_BACKOFF_SECS * (i + 1))
|
|
|
4dce39 |
+
|
|
|
4dce39 |
+ # If the retries are exhausted we exit with a generic error.
|
|
|
4dce39 |
+ sys.exit(OCF_ERR_GENERIC)
|
|
|
4dce39 |
|
|
|
4dce39 |
|
|
|
4dce39 |
def get_instance(project, zone, instance):
|
|
|
4dce39 |
@@ -358,24 +379,11 @@ def gcp_alias_status(alias):
|
|
|
4dce39 |
|
|
|
4dce39 |
def validate():
|
|
|
4dce39 |
global ALIAS
|
|
|
4dce39 |
- global CONN
|
|
|
4dce39 |
global THIS_VM
|
|
|
4dce39 |
+ global CONN
|
|
|
4dce39 |
|
|
|
4dce39 |
- # Populate global vars
|
|
|
4dce39 |
- try:
|
|
|
4dce39 |
- CONN = googleapiclient.discovery.build('compute', 'v1',
|
|
|
4dce39 |
- cache_discovery=False)
|
|
|
4dce39 |
- except Exception as e:
|
|
|
4dce39 |
- logger.error('Couldn\'t connect with google api: ' + str(e))
|
|
|
4dce39 |
- sys.exit(OCF_ERR_CONFIGURED)
|
|
|
4dce39 |
-
|
|
|
4dce39 |
- try:
|
|
|
4dce39 |
- THIS_VM = get_metadata('instance/name')
|
|
|
4dce39 |
- except Exception as e:
|
|
|
4dce39 |
- logger.error('Couldn\'t get instance name, is this running inside GCE?: '
|
|
|
4dce39 |
- + str(e))
|
|
|
4dce39 |
- sys.exit(OCF_ERR_CONFIGURED)
|
|
|
4dce39 |
-
|
|
|
4dce39 |
+ CONN = create_api_connection()
|
|
|
4dce39 |
+ THIS_VM = get_metadata('instance/name')
|
|
|
4dce39 |
ALIAS = os.environ.get('OCF_RESKEY_alias_ip')
|
|
|
4dce39 |
if not ALIAS:
|
|
|
4dce39 |
logger.error('Missing alias_ip parameter')
|