From 4dce396645fdd29051b0c2fdc46244a2a82bcb6e Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: May 14 2021 04:17:31 +0000 Subject: import resource-agents-4.1.1-94.el8 --- diff --git a/SOURCES/bz1957765-gcp-vpc-move-vip-retry.patch b/SOURCES/bz1957765-gcp-vpc-move-vip-retry.patch new file mode 100644 index 0000000..2350f1a --- /dev/null +++ b/SOURCES/bz1957765-gcp-vpc-move-vip-retry.patch @@ -0,0 +1,102 @@ +From 3ae6d8f0a34d099945d9bf005ed45dbfe9452202 Mon Sep 17 00:00:00 2001 +From: kj1724 <78624900+kj1724@users.noreply.github.com> +Date: Wed, 28 Apr 2021 10:22:38 -0400 +Subject: [PATCH] gcp-vpc-move-vip.in: Adds retries + +If the cluster fails a monitoring event, it will try to restart the resource. If the resource agent makes an API/metadata call that fails at that time, the resource will be considered "failed", but in certain case also "unconfigured", which prevents further operations. + +These changes can help the agent recover on certain intermittent failures. +--- + heartbeat/gcp-vpc-move-vip.in | 62 ++++++++++++++++++++--------------- + 1 file changed, 35 insertions(+), 27 deletions(-) + +diff --git a/heartbeat/gcp-vpc-move-vip.in b/heartbeat/gcp-vpc-move-vip.in +index bbbd87b7a9..c411555110 100755 +--- a/heartbeat/gcp-vpc-move-vip.in ++++ b/heartbeat/gcp-vpc-move-vip.in +@@ -50,6 +50,8 @@ REMOVE = 1 + CONN = None + THIS_VM = None + ALIAS = None ++MAX_RETRIES = 3 ++RETRY_BACKOFF_SECS = 1 + METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/' + METADATA_HEADERS = {'Metadata-Flavor': 'Google'} + METADATA = \ +@@ -111,18 +113,37 @@ def get_metadata(metadata_key, params=None, timeout=None): + + Returns: + HTTP response from the GET request. +- +- Raises: +- urlerror.HTTPError: raises when the GET request fails. + """ +- timeout = timeout or 60 +- metadata_url = os.path.join(METADATA_SERVER, metadata_key) +- params = urlparse.urlencode(params or {}) +- url = '%s?%s' % (metadata_url, params) +- request = urlrequest.Request(url, headers=METADATA_HEADERS) +- request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) +- return request_opener.open( +- request, timeout=timeout * 1.1).read().decode("utf-8") ++ for i in range(MAX_RETRIES): ++ try: ++ timeout = timeout or 60 ++ metadata_url = os.path.join(METADATA_SERVER, metadata_key) ++ params = urlparse.urlencode(params or {}) ++ url = '%s?%s' % (metadata_url, params) ++ request = urlrequest.Request(url, headers=METADATA_HEADERS) ++ request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) ++ return request_opener.open( ++ request, timeout=timeout * 1.1).read().decode("utf-8") ++ except Exception as e: ++ logger.error('Couldn\'t get instance name, is this running inside GCE?: ' ++ + str(e)) ++ time.sleep(RETRY_BACKOFF_SECS * (i + 1)) ++ ++ # If the retries are exhausted we exit with a generic error. ++ sys.exit(OCF_ERR_GENERIC) ++ ++ ++def create_api_connection(): ++ for i in range(MAX_RETRIES): ++ try: ++ return googleapiclient.discovery.build('compute', 'v1', ++ cache_discovery=False) ++ except Exception as e: ++ logger.error('Couldn\'t connect with google api: ' + str(e)) ++ time.sleep(RETRY_BACKOFF_SECS * (i + 1)) ++ ++ # If the retries are exhausted we exit with a generic error. ++ sys.exit(OCF_ERR_GENERIC) + + + def get_instance(project, zone, instance): +@@ -358,24 +379,11 @@ def gcp_alias_status(alias): + + def validate(): + global ALIAS +- global CONN + global THIS_VM ++ global CONN + +- # Populate global vars +- try: +- CONN = googleapiclient.discovery.build('compute', 'v1', +- cache_discovery=False) +- except Exception as e: +- logger.error('Couldn\'t connect with google api: ' + str(e)) +- sys.exit(OCF_ERR_CONFIGURED) +- +- try: +- THIS_VM = get_metadata('instance/name') +- except Exception as e: +- logger.error('Couldn\'t get instance name, is this running inside GCE?: ' +- + str(e)) +- sys.exit(OCF_ERR_CONFIGURED) +- ++ CONN = create_api_connection() ++ THIS_VM = get_metadata('instance/name') + ALIAS = os.environ.get('OCF_RESKEY_alias_ip') + if not ALIAS: + logger.error('Missing alias_ip parameter') diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index 056c1e3..baa5133 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -70,7 +70,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.1.1 -Release: 93%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 94%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} @@ -278,6 +278,7 @@ Patch186: bz1940094-aws-agents-dont-spam-logs.patch Patch187: bz1939281-aws-vpc-move-ip-add-ENI-lookup.patch Patch188: bz1934651-db2-add-PRIMARY-REMOTE_CATCHUP_PENDING-CONNECTED.patch Patch189: bz1872754-pgsqlms-new-ra.patch +Patch190: bz1957765-gcp-vpc-move-vip-retry.patch # bundle patches Patch1000: 7-gcp-bundled.patch @@ -640,6 +641,7 @@ exit 1 %patch187 -p1 -F2 %patch188 -p1 %patch189 -p1 +%patch190 -p1 chmod 755 heartbeat/nova-compute-wait chmod 755 heartbeat/NovaEvacuate @@ -1221,6 +1223,11 @@ ccs_update_schema > /dev/null 2>&1 ||: %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm %changelog +* Wed May 12 2021 Oyvind Albrigtsen - 4.1.1-94 +- gcp-vpc-move-vip: add retry logic + + Resolves: rhbz#1957765 + * Wed Apr 28 2021 Oyvind Albrigtsen - 4.1.1-93 - db2: add PRIMARY/REMOTE_CATCHUP_PENDING/CONNECTED status to promote-check - pgsqlms: new resource agent