diff --git a/SOURCES/bz1633251-gcp-pd-move-1.patch b/SOURCES/bz1633251-gcp-pd-move-1.patch new file mode 100644 index 0000000..c7cbe8e --- /dev/null +++ b/SOURCES/bz1633251-gcp-pd-move-1.patch @@ -0,0 +1,425 @@ +From dedf420b8aa7e7e64fa56eeda2d7aeb5b2a5fcd9 Mon Sep 17 00:00:00 2001 +From: Gustavo Serra Scalet +Date: Mon, 17 Sep 2018 12:29:51 -0300 +Subject: [PATCH] Add gcp-pd-move python script + +--- + configure.ac | 1 + + doc/man/Makefile.am | 1 + + heartbeat/Makefile.am | 1 + + heartbeat/gcp-pd-move.in | 370 +++++++++++++++++++++++++++++++++++++++ + 4 files changed, 373 insertions(+) + create mode 100755 heartbeat/gcp-pd-move.in + +diff --git a/configure.ac b/configure.ac +index 10f5314da..b7ffb99f3 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -958,6 +958,7 @@ AC_CONFIG_FILES([heartbeat/conntrackd], [chmod +x heartbeat/conntrackd]) + AC_CONFIG_FILES([heartbeat/dnsupdate], [chmod +x heartbeat/dnsupdate]) + AC_CONFIG_FILES([heartbeat/eDir88], [chmod +x heartbeat/eDir88]) + AC_CONFIG_FILES([heartbeat/fio], [chmod +x heartbeat/fio]) ++AC_CONFIG_FILES([heartbeat/gcp-pd-move], [chmod +x heartbeat/gcp-pd-move]) + AC_CONFIG_FILES([heartbeat/gcp-vpc-move-ip], [chmod +x heartbeat/gcp-vpc-move-ip]) + AC_CONFIG_FILES([heartbeat/gcp-vpc-move-vip], [chmod +x heartbeat/gcp-vpc-move-vip]) + AC_CONFIG_FILES([heartbeat/gcp-vpc-move-route], [chmod +x heartbeat/gcp-vpc-move-route]) +diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am +index 0bef88740..0235c9af6 100644 +--- a/doc/man/Makefile.am ++++ b/doc/man/Makefile.am +@@ -115,6 +115,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ + ocf_heartbeat_fio.7 \ + ocf_heartbeat_galera.7 \ + ocf_heartbeat_garbd.7 \ ++ ocf_heartbeat_gcp-pd-move.7 \ + ocf_heartbeat_gcp-vpc-move-ip.7 \ + ocf_heartbeat_gcp-vpc-move-vip.7 \ + ocf_heartbeat_gcp-vpc-move-route.7 \ +diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am +index 993bff042..843186c98 100644 +--- a/heartbeat/Makefile.am ++++ b/heartbeat/Makefile.am +@@ -111,6 +111,7 @@ ocf_SCRIPTS = AoEtarget \ + fio \ + galera \ + garbd \ ++ gcp-pd-move \ + gcp-vpc-move-ip \ + gcp-vpc-move-vip \ + gcp-vpc-move-route \ +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +new file mode 100755 +index 000000000..f9f6c3163 +--- /dev/null ++++ b/heartbeat/gcp-pd-move.in +@@ -0,0 +1,370 @@ ++#!@PYTHON@ -tt ++# - *- coding: utf- 8 - *- ++# ++# --------------------------------------------------------------------- ++# Copyright 2018 Google Inc. ++# ++# Licensed under the Apache License, Version 2.0 (the "License"); ++# you may not use this file except in compliance with the License. ++# You may obtain a copy of the License at ++# ++# http://www.apache.org/licenses/LICENSE-2.0 ++# Unless required by applicable law or agreed to in writing, software ++# distributed under the License is distributed on an "AS IS" BASIS, ++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ++# See the License for the specific language governing permissions and ++# limitations under the License. ++# --------------------------------------------------------------------- ++# Description: Google Cloud Platform - Disk attach ++# --------------------------------------------------------------------- ++ ++import json ++import logging ++import os ++import re ++import sys ++import time ++ ++OCF_FUNCTIONS_DIR = "%s/lib/heartbeat" % os.environ.get("OCF_ROOT") ++sys.path.append(OCF_FUNCTIONS_DIR) ++ ++import ocf ++ ++try: ++ import googleapiclient.discovery ++except ImportError: ++ pass ++ ++if sys.version_info >= (3, 0): ++ # Python 3 imports. ++ import urllib.parse as urlparse ++ import urllib.request as urlrequest ++else: ++ # Python 2 imports. ++ import urllib as urlparse ++ import urllib2 as urlrequest ++ ++ ++CONN = None ++PROJECT = None ++ZONE = None ++REGION = None ++LIST_DISK_ATTACHED_INSTANCES = None ++INSTANCE_NAME = None ++ ++PARAMETERS = { ++ 'disk_name': None, ++ 'disk_scope': None, ++ 'disk_csek_file': None, ++ 'mode': None, ++ 'device_name': None, ++} ++ ++MANDATORY_PARAMETERS = ['disk_name', 'disk_scope'] ++ ++METADATA_SERVER = 'http://metadata.google.internal/computeMetadata/v1/' ++METADATA_HEADERS = {'Metadata-Flavor': 'Google'} ++METADATA = ''' ++ ++ ++1.0 ++ ++Resource Agent that can attach or detach a regional/zonal disk on current GCP ++instance. ++Requirements : ++- Disk has to be properly created as regional/zonal in order to be used ++correctly. ++ ++Attach/Detach a persistent disk on current GCP instance ++ ++ ++The name of the GCP disk. ++Disk name ++ ++ ++ ++Disk scope ++Network name ++ ++ ++ ++Path to a Customer-Supplied Encryption Key (CSEK) key file ++Customer-Supplied Encryption Key file ++ ++ ++ ++Attachment mode (rw, ro) ++Attachment mode ++ ++ ++ ++An optional name that indicates the disk name the guest operating system will see. ++Optional device name ++ ++ ++ ++ ++ ++ ++ ++ ++ ++''' ++ ++ ++def get_metadata(metadata_key, params=None, timeout=None): ++ """Performs a GET request with the metadata headers. ++ ++ Args: ++ metadata_key: string, the metadata to perform a GET request on. ++ params: dictionary, the query parameters in the GET request. ++ timeout: int, timeout in seconds for metadata requests. ++ ++ Returns: ++ HTTP response from the GET request. ++ ++ Raises: ++ urlerror.HTTPError: raises when the GET request fails. ++ """ ++ timeout = timeout or 60 ++ metadata_url = os.path.join(METADATA_SERVER, metadata_key) ++ params = urlparse.urlencode(params or {}) ++ url = '%s?%s' % (metadata_url, params) ++ request = urlrequest.Request(url, headers=METADATA_HEADERS) ++ request_opener = urlrequest.build_opener(urlrequest.ProxyHandler({})) ++ return request_opener.open(request, timeout=timeout * 1.1).read() ++ ++ ++def populate_vars(): ++ global CONN ++ global INSTANCE_NAME ++ global PROJECT ++ global ZONE ++ global REGION ++ global LIST_DISK_ATTACHED_INSTANCES ++ ++ global PARAMETERS ++ ++ # Populate global vars ++ try: ++ CONN = googleapiclient.discovery.build('compute', 'v1') ++ except Exception as e: ++ logger.error('Couldn\'t connect with google api: ' + str(e)) ++ sys.exit(ocf.OCF_ERR_CONFIGURED) ++ ++ for param in PARAMETERS: ++ value = os.environ.get('OCF_RESKEY_%s' % param, None) ++ if not value and param in MANDATORY_PARAMETERS: ++ logger.error('Missing %s mandatory parameter' % param) ++ sys.exit(ocf.OCF_ERR_CONFIGURED) ++ PARAMETERS[param] = value ++ ++ try: ++ INSTANCE_NAME = get_metadata('instance/name') ++ except Exception as e: ++ logger.error( ++ 'Couldn\'t get instance name, is this running inside GCE?: ' + str(e)) ++ sys.exit(ocf.OCF_ERR_CONFIGURED) ++ ++ PROJECT = get_metadata('project/project-id') ++ ZONE = get_metadata('instance/zone').split('/')[-1] ++ REGION = ZONE[:-2] ++ LIST_DISK_ATTACHED_INSTANCES = get_disk_attached_instances( ++ PARAMETERS['disk_name']) ++ ++ ++def configure_logs(): ++ # Prepare logging ++ global logger ++ logging.getLogger('googleapiclient').setLevel(logging.WARN) ++ logging_env = os.environ.get('OCF_RESKEY_stackdriver_logging') ++ if logging_env: ++ logging_env = logging_env.lower() ++ if any(x in logging_env for x in ['yes', 'true', 'enabled']): ++ try: ++ import google.cloud.logging.handlers ++ client = google.cloud.logging.Client() ++ handler = google.cloud.logging.handlers.CloudLoggingHandler( ++ client, name=INSTANCE_NAME) ++ handler.setLevel(logging.INFO) ++ formatter = logging.Formatter('gcp:alias "%(message)s"') ++ handler.setFormatter(formatter) ++ ocf.log.addHandler(handler) ++ logger = logging.LoggerAdapter( ++ ocf.log, {'OCF_RESOURCE_INSTANCE': ocf.OCF_RESOURCE_INSTANCE}) ++ except ImportError: ++ logger.error('Couldn\'t import google.cloud.logging, ' ++ 'disabling Stackdriver-logging support') ++ ++ ++def wait_for_operation(operation): ++ while True: ++ result = CONN.zoneOperations().get( ++ project=PROJECT, ++ zone=ZONE, ++ operation=operation['name']).execute() ++ ++ if result['status'] == 'DONE': ++ if 'error' in result: ++ raise Exception(result['error']) ++ return ++ time.sleep(1) ++ ++ ++def get_disk_attached_instances(disk): ++ def get_users_list(): ++ fl = 'name="%s"' % disk ++ request = CONN.disks().aggregatedList(project=PROJECT, filter=fl) ++ while request is not None: ++ response = request.execute() ++ locations = response.get('items', {}) ++ for location in locations.values(): ++ for d in location.get('disks', []): ++ if d['name'] == disk: ++ return d.get('users', []) ++ request = CONN.instances().aggregatedList_next( ++ previous_request=request, previous_response=response) ++ raise Exception("Unable to find disk %s" % disk) ++ ++ def get_only_instance_name(user): ++ return re.sub('.*/instances/', '', user) ++ ++ return map(get_only_instance_name, get_users_list()) ++ ++ ++def is_disk_attached(instance): ++ return instance in LIST_DISK_ATTACHED_INSTANCES ++ ++ ++def detach_disk(instance, disk_name): ++ # Python API misses disk-scope argument. ++ ++ # Detaching a disk is only possible by using deviceName, which is retrieved ++ # as a disk parameter when listing the instance information ++ request = CONN.instances().get( ++ project=PROJECT, zone=ZONE, instance=instance) ++ response = request.execute() ++ ++ device_name = None ++ for disk in response['disks']: ++ if disk_name in disk['source']: ++ device_name = disk['deviceName'] ++ break ++ ++ if not device_name: ++ logger.error("Didn't find %(d)s deviceName attached to %(i)s" % { ++ 'd': disk_name, ++ 'i': instance, ++ }) ++ return ++ ++ request = CONN.instances().detachDisk( ++ project=PROJECT, zone=ZONE, instance=instance, deviceName=device_name) ++ wait_for_operation(request.execute()) ++ ++ ++def attach_disk(instance, disk_name): ++ location = 'zones/%s' % ZONE ++ if PARAMETERS['disk_scope'] == 'regional': ++ location = 'regions/%s' % REGION ++ prefix = 'https://www.googleapis.com/compute/v1' ++ body = { ++ 'source': '%(prefix)s/projects/%(project)s/%(location)s/disks/%(disk)s' % { ++ 'prefix': prefix, ++ 'project': PROJECT, ++ 'location': location, ++ 'disk': disk_name, ++ }, ++ } ++ ++ # Customer-Supplied Encryption Key (CSEK) ++ if PARAMETERS['disk_csek_file']: ++ with open(PARAMETERS['disk_csek_file']) as csek_file: ++ body['diskEncryptionKey'] = { ++ 'rawKey': csek_file.read(), ++ } ++ ++ if PARAMETERS['device_name']: ++ body['deviceName'] = PARAMETERS['device_name'] ++ ++ if PARAMETERS['mode']: ++ body['mode'] = PARAMETERS['mode'] ++ ++ force_attach = None ++ if PARAMETERS['disk_scope'] == 'regional': ++ # Python API misses disk-scope argument. ++ force_attach = True ++ else: ++ # If this disk is attached to some instance, detach it first. ++ for other_instance in LIST_DISK_ATTACHED_INSTANCES: ++ logger.info("Detaching disk %(disk_name)s from other instance %(i)s" % { ++ 'disk_name': PARAMETERS['disk_name'], ++ 'i': other_instance, ++ }) ++ detach_disk(other_instance, PARAMETERS['disk_name']) ++ ++ request = CONN.instances().attachDisk( ++ project=PROJECT, zone=ZONE, instance=instance, body=body, ++ forceAttach=force_attach) ++ wait_for_operation(request.execute()) ++ ++ ++def fetch_data(): ++ configure_logs() ++ populate_vars() ++ ++ ++def gcp_pd_move_start(): ++ fetch_data() ++ if not is_disk_attached(INSTANCE_NAME): ++ logger.info("Attaching disk %(disk_name)s to %(instance)s" % { ++ 'disk_name': PARAMETERS['disk_name'], ++ 'instance': INSTANCE_NAME, ++ }) ++ attach_disk(INSTANCE_NAME, PARAMETERS['disk_name']) ++ ++ ++def gcp_pd_move_stop(): ++ fetch_data() ++ if is_disk_attached(INSTANCE_NAME): ++ logger.info("Detaching disk %(disk_name)s to %(instance)s" % { ++ 'disk_name': PARAMETERS['disk_name'], ++ 'instance': INSTANCE_NAME, ++ }) ++ detach_disk(INSTANCE_NAME, PARAMETERS['disk_name']) ++ ++ ++def gcp_pd_move_status(): ++ fetch_data() ++ if is_disk_attached(INSTANCE_NAME): ++ logger.info("Disk %(disk_name)s is correctly attached to %(instance)s" % { ++ 'disk_name': PARAMETERS['disk_name'], ++ 'instance': INSTANCE_NAME, ++ }) ++ else: ++ sys.exit(ocf.OCF_NOT_RUNNING) ++ ++ ++def main(): ++ if len(sys.argv) < 2: ++ logger.error('Missing argument') ++ return ++ ++ command = sys.argv[1] ++ if 'meta-data' in command: ++ print(METADATA) ++ return ++ ++ if command in 'start': ++ gcp_pd_move_start() ++ elif command in 'stop': ++ gcp_pd_move_stop() ++ elif command in ('monitor', 'status'): ++ gcp_pd_move_status() ++ else: ++ configure_logs() ++ logger.error('no such function %s' % str(command)) ++ ++ ++if __name__ == "__main__": ++ main() diff --git a/SOURCES/bz1633251-gcp-pd-move-2-use-OCF_FUNCTIONS_DIR.patch b/SOURCES/bz1633251-gcp-pd-move-2-use-OCF_FUNCTIONS_DIR.patch new file mode 100644 index 0000000..9a9681c --- /dev/null +++ b/SOURCES/bz1633251-gcp-pd-move-2-use-OCF_FUNCTIONS_DIR.patch @@ -0,0 +1,18 @@ +commit cbe0e6507992b50afbaebc46dfaf8955cc02e5ec +Author: Oyvind Albrigtsen + + Python agents: use OCF_FUNCTIONS_DIR env variable when available + +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +index f9f6c316..c5007a43 100755 +--- a/heartbeat/gcp-pd-move.in ++++ b/heartbeat/gcp-pd-move.in +@@ -25,7 +25,7 @@ import re + import sys + import time + +-OCF_FUNCTIONS_DIR = "%s/lib/heartbeat" % os.environ.get("OCF_ROOT") ++OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")) + sys.path.append(OCF_FUNCTIONS_DIR) + + import ocf diff --git a/SOURCES/bz1633251-gcp-pd-move-3-add-stackdriver_logging-to-metadata.patch b/SOURCES/bz1633251-gcp-pd-move-3-add-stackdriver_logging-to-metadata.patch new file mode 100644 index 0000000..5819b94 --- /dev/null +++ b/SOURCES/bz1633251-gcp-pd-move-3-add-stackdriver_logging-to-metadata.patch @@ -0,0 +1,48 @@ +From 4fa41a1d7b4bee31526649c40cc4c58bc6333917 Mon Sep 17 00:00:00 2001 +From: masaki-tamura +Date: Wed, 2 Oct 2019 17:12:42 +0900 +Subject: [PATCH 1/2] add parameter stackdriver_logging + +--- + heartbeat/gcp-pd-move.in | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +index c5007a43c..fac5c9744 100755 +--- a/heartbeat/gcp-pd-move.in ++++ b/heartbeat/gcp-pd-move.in +@@ -102,6 +102,11 @@ correctly. + Optional device name + + ++ ++Use stackdriver_logging output to global resource (yes, true, enabled) ++Use stackdriver_logging ++ ++ + + + + +From f762ce3da00e1775587a04751a8828ba004fb534 Mon Sep 17 00:00:00 2001 +From: masaki-tamura +Date: Wed, 2 Oct 2019 17:44:30 +0900 +Subject: [PATCH 2/2] defautl no + +--- + heartbeat/gcp-pd-move.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/gcp-pd-move.in b/heartbeat/gcp-pd-move.in +index fac5c9744..7fabc80dc 100755 +--- a/heartbeat/gcp-pd-move.in ++++ b/heartbeat/gcp-pd-move.in +@@ -105,7 +105,7 @@ correctly. + + Use stackdriver_logging output to global resource (yes, true, enabled) + Use stackdriver_logging +- ++ + + + diff --git a/SOURCES/bz1759115-aws-vpc-route53-1-update.patch b/SOURCES/bz1759115-aws-vpc-route53-1-update.patch new file mode 100644 index 0000000..9c689b1 --- /dev/null +++ b/SOURCES/bz1759115-aws-vpc-route53-1-update.patch @@ -0,0 +1,273 @@ +--- ClusterLabs-resource-agents-e711383f/heartbeat/aws-vpc-route53.in 2018-06-29 14:05:02.000000000 +0200 ++++ /home/oalbrigt/src/resource-agents/heartbeat/aws-vpc-route53.in 2019-11-07 12:24:18.822111495 +0100 +@@ -152,9 +152,15 @@ + END + } + +-ec2ip_validate() { ++r53_validate() { + ocf_log debug "function: validate" + ++ # Check for required binaries ++ ocf_log debug "Checking for required binaries" ++ for command in curl dig; do ++ check_binary "$command" ++ done ++ + # Full name + [[ -z "$OCF_RESKEY_fullname" ]] && ocf_log error "Full name parameter not set $OCF_RESKEY_fullname!" && exit $OCF_ERR_CONFIGURED + +@@ -175,32 +181,111 @@ + ocf_log debug "ok" + + if [ -n "$OCF_RESKEY_profile" ]; then +- AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile" ++ AWS_PROFILE_OPT="--profile $OCF_RESKEY_profile --cli-connect-timeout 10" + else +- AWS_PROFILE_OPT="--profile default" ++ AWS_PROFILE_OPT="--profile default --cli-connect-timeout 10" + fi + + return $OCF_SUCCESS + } + +-ec2ip_monitor() { +- ec2ip_validate ++r53_monitor() { ++ # ++ # For every start action the agent will call Route53 API to check for DNS record ++ # otherwise it will try to get results directly bu querying the DNS using "dig". ++ # Due to complexity in some DNS architectures "dig" can fail, and if this happens ++ # the monitor will fallback to the Route53 API call. ++ # ++ # There will be no failure, failover or restart of the agent if the monitor operation fails ++ # hence we only return $OCF_SUCESS in this function ++ # ++ # In case of the monitor operation detects a wrong or non-existent Route53 DNS entry ++ # it will try to fix the existing one, or create it again ++ # ++ # ++ ARECORD="" ++ IPREGEX="^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$" ++ r53_validate + ocf_log debug "Checking Route53 record sets" +- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')" +- ARECORD="$(aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query "ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" | grep RESOURCERECORDS | /usr/bin/awk '{ print $2 }' )" +- ocf_log debug "Found IP address: $ARECORD ." +- if [ "${ARECORD}" == "${IPADDRESS}" ]; then +- ocf_log debug "ARECORD $ARECORD found" ++ # ++ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ # ++ if [ "$__OCF_ACTION" = "start" ] || ocf_is_probe ; then ++ # ++ cmd="aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" ++ ocf_log info "Route53 Agent Starting or probing - executing monitoring API call: $cmd" ++ CLIRES="$($cmd 2>&1)" ++ rc=$? ++ ocf_log debug "awscli returned code: $rc" ++ if [ $rc -ne 0 ]; then ++ CLIRES=$(echo $CLIRES | grep -v '^$') ++ ocf_log warn "Route53 API returned an error: $CLIRES" ++ ocf_log warn "Skipping cluster action due to API call error" ++ return $OCF_ERR_GENERIC ++ fi ++ ARECORD=$(echo $CLIRES | grep RESOURCERECORDS | awk '{ print $5 }') ++ # ++ if ocf_is_probe; then ++ # ++ # Prevent R53 record change during probe ++ # ++ if [[ $ARECORD =~ $IPREGEX ]] && [ "$ARECORD" != "$IPADDRESS" ]; then ++ ocf_log debug "Route53 DNS record $ARECORD found at probing, disregarding" ++ return $OCF_NOT_RUNNING ++ fi ++ fi ++ else ++ # ++ cmd="dig +retries=3 +time=5 +short $OCF_RESKEY_fullname 2>/dev/null" ++ ocf_log info "executing monitoring command : $cmd" ++ ARECORD="$($cmd)" ++ rc=$? ++ ocf_log debug "dig return code: $rc" ++ # ++ if [[ ! $ARECORD =~ $IPREGEX ]] || [ $rc -ne 0 ]; then ++ ocf_log info "Fallback to Route53 API query due to DNS resolution failure" ++ cmd="aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" ++ ocf_log debug "executing monitoring API call: $cmd" ++ CLIRES="$($cmd 2>&1)" ++ rc=$? ++ ocf_log debug "awscli return code: $rc" ++ if [ $rc -ne 0 ]; then ++ CLIRES=$(echo $CLIRES | grep -v '^$') ++ ocf_log warn "Route53 API returned an error: $CLIRES" ++ ocf_log warn "Monitor skipping cluster action due to API call error" ++ return $OCF_SUCCESS ++ fi ++ ARECORD=$(echo $CLIRES | grep RESOURCERECORDS | awk '{ print $5 }') ++ fi ++ # ++ fi ++ ocf_log info "Route53 DNS record pointing $OCF_RESKEY_fullname to IP address $ARECORD" ++ # ++ if [ "$ARECORD" == "$IPADDRESS" ]; then ++ ocf_log info "Route53 DNS record $ARECORD found" ++ return $OCF_SUCCESS ++ elif [[ $ARECORD =~ $IPREGEX ]] && [ "$ARECORD" != "$IPADDRESS" ]; then ++ ocf_log info "Route53 DNS record points to a different host, setting DNS record on Route53 to this host" ++ _update_record "UPSERT" "$IPADDRESS" + return $OCF_SUCCESS + else +- ocf_log debug "No ARECORD found" +- return $OCF_NOT_RUNNING ++ ocf_log info "No Route53 DNS record found, setting DNS record on Route53 to this host" ++ _update_record "UPSERT" "$IPADDRESS" ++ return $OCF_SUCCESS + fi + + return $OCF_SUCCESS + } + + _update_record() { ++ # ++ # This function is the one that will actually execute Route53's API call ++ # and configure the DNS record using the correct API calls and parameters ++ # ++ # It creates a temporary JSON file under /tmp with the required API payload ++ # ++ # Failures in this function are critical and will cause the agent to fail ++ # + update_action="$1" + IPADDRESS="$2" + ocf_log info "Updating Route53 $OCF_RESKEY_hostedzoneid with $IPADDRESS for $OCF_RESKEY_fullname" +@@ -209,19 +294,19 @@ + ocf_exit_reason "Failed to create temporary file for record update" + exit $OCF_ERR_GENERIC + fi +- cat >>"${ROUTE53RECORD}" <<-EOF ++ cat >>"$ROUTE53RECORD" <<-EOF + { + "Comment": "Update record to reflect new IP address for a system ", + "Changes": [ + { +- "Action": "${update_action}", ++ "Action": "$update_action", + "ResourceRecordSet": { +- "Name": "${OCF_RESKEY_fullname}", ++ "Name": "$OCF_RESKEY_fullname", + "Type": "A", +- "TTL": ${OCF_RESKEY_ttl}, ++ "TTL": $OCF_RESKEY_ttl, + "ResourceRecords": [ + { +- "Value": "${IPADDRESS}" ++ "Value": "$IPADDRESS" + } + ] + } +@@ -229,46 +314,53 @@ + ] + } + EOF +- cmd="aws --profile ${OCF_RESKEY_profile} route53 change-resource-record-sets --hosted-zone-id ${OCF_RESKEY_hostedzoneid} \ +- --change-batch file://${ROUTE53RECORD} " ++ cmd="aws --profile $OCF_RESKEY_profile route53 change-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --change-batch file://$ROUTE53RECORD " + ocf_log debug "Executing command: $cmd" +- CHANGEID=$($cmd | grep CHANGEINFO | /usr/bin/awk -F'\t' '{ print $3 }' ) +- ocf_log debug "Change id: ${CHANGEID}" +- rmtempfile ${ROUTE53RECORD} +- CHANGEID=$(echo $CHANGEID |cut -d'/' -f 3 |cut -d'"' -f 1 ) +- ocf_log debug "Change id: ${CHANGEID}" ++ CLIRES="$($cmd 2>&1)" ++ rc=$? ++ ocf_log debug "awscli returned code: $rc" ++ if [ $rc -ne 0 ]; then ++ CLIRES=$(echo $CLIRES | grep -v '^$') ++ ocf_log warn "Route53 API returned an error: $CLIRES" ++ ocf_log warn "Skipping cluster action due to API call error" ++ return $OCF_ERR_GENERIC ++ fi ++ CHANGEID=$(echo $CLIRES | awk '{ print $12 }') ++ ocf_log debug "Change id: $CHANGEID" ++ rmtempfile $ROUTE53RECORD ++ CHANGEID=$(echo $CHANGEID | cut -d'/' -f 3 | cut -d'"' -f 1 ) ++ ocf_log debug "Change id: $CHANGEID" + STATUS="PENDING" +- MYSECONDS=2 ++ MYSECONDS=20 + while [ "$STATUS" = 'PENDING' ]; do +- sleep ${MYSECONDS} +- STATUS="$(aws --profile ${OCF_RESKEY_profile} route53 get-change --id $CHANGEID | grep CHANGEINFO | /usr/bin/awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )" +- ocf_log debug "Waited for ${MYSECONDS} seconds and checked execution of Route 53 update status: ${STATUS} " ++ sleep $MYSECONDS ++ STATUS="$(aws --profile $OCF_RESKEY_profile route53 get-change --id $CHANGEID | grep CHANGEINFO | awk -F'\t' '{ print $4 }' |cut -d'"' -f 2 )" ++ ocf_log debug "Waited for $MYSECONDS seconds and checked execution of Route 53 update status: $STATUS " + done + } + +-ec2ip_stop() { +- ocf_log info "Bringing down Route53 agent. (Will remove ARECORD)" +- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')" +- ARECORD="$(aws $AWS_PROFILE_OPT route53 list-resource-record-sets --hosted-zone-id $OCF_RESKEY_hostedzoneid --query "ResourceRecordSets[?Name=='$OCF_RESKEY_fullname']" | grep RESOURCERECORDS | /usr/bin/awk '{ print $2 }' )" +- ocf_log debug "Found IP address: $ARECORD ." +- if [ ${ARECORD} != ${IPADDRESS} ]; then +- ocf_log debug "No ARECORD found" +- return $OCF_SUCCESS +- else +- # determine IP address +- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')" +- # Patch file +- ocf_log debug "Deleting IP address to ${IPADDRESS}" +- return $OCF_SUCCESS +- fi +- +- _update_record "DELETE" "$IPADDRESS" ++r53_stop() { ++ # ++ # Stop operation doesn't perform any API call or try to remove the DNS record ++ # this mostly because this is not necessarily mandatory or desired ++ # the start and monitor functions will take care of changing the DNS record ++ # if the agent starts in a different cluster node ++ # ++ ocf_log info "Bringing down Route53 agent. (Will NOT remove Route53 DNS record)" + return $OCF_SUCCESS + } + +-ec2ip_start() { +- IPADDRESS="$(ec2metadata aws ip | grep local-ipv4 | /usr/bin/awk '{ print $2 }')" +- _update_record "UPSERT" "$IPADDRESS" ++r53_start() { ++ # ++ # Start agent and config DNS in Route53 ++ # ++ ocf_log info "Starting Route53 DNS update...." ++ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ r53_monitor ++ if [ $? != $OCF_SUCCESS ]; then ++ ocf_log info "Could not start agent - check configurations" ++ return $OCF_ERR_GENERIC ++ fi + return $OCF_SUCCESS + } + +@@ -284,16 +376,16 @@ + exit $OCF_SUCCESS + ;; + monitor) +- ec2ip_monitor ++ r53_monitor + ;; + stop) +- ec2ip_stop ++ r53_stop + ;; + validate-all) +- ec2ip_validate ++ r53_validate + ;; + start) +- ec2ip_start ++ r53_start + ;; + *) + usage diff --git a/SOURCES/bz1759115-aws-vpc-route53-2-add-public-and-secondary-ip-support.patch b/SOURCES/bz1759115-aws-vpc-route53-2-add-public-and-secondary-ip-support.patch new file mode 100644 index 0000000..afb8bb6 --- /dev/null +++ b/SOURCES/bz1759115-aws-vpc-route53-2-add-public-and-secondary-ip-support.patch @@ -0,0 +1,220 @@ +From 9b77d06bfe3308692946b8ac08bc7ec3399a762b Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 2 Apr 2020 13:38:30 +0200 +Subject: [PATCH 1/2] aws-vpc-route53: cleanup and improvements + +--- + heartbeat/aws-vpc-route53.in | 73 ++++++++++++++++++++---------------- + 1 file changed, 41 insertions(+), 32 deletions(-) + +diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in +index b276dfb3c..1cfc2b01f 100644 +--- a/heartbeat/aws-vpc-route53.in ++++ b/heartbeat/aws-vpc-route53.in +@@ -43,8 +43,14 @@ + : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} + . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs + ++OCF_RESKEY_hostedzoneid_default="" ++OCF_RESKEY_fullname_default="" ++OCF_RESKEY_ip_default="local" + OCF_RESKEY_ttl_default=10 + ++: ${OCF_RESKEY_hostedzoneid:=${OCF_RESKEY_hostedzoneid_default}} ++: ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}} ++: ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}} + : ${OCF_RESKEY_ttl:=${OCF_RESKEY_ttl_default}} + + ####################################################################### +@@ -104,7 +110,7 @@ Hosted zone ID of Route 53. This is the table of + the Route 53 record. + + AWS hosted zone ID +- ++ + + + +@@ -113,7 +119,7 @@ Example: service.cloud.example.corp. + Note: The trailing dot is important to Route53! + + Full service name +- ++ + + + +@@ -189,6 +195,31 @@ r53_validate() { + return $OCF_SUCCESS + } + ++r53_start() { ++ # ++ # Start agent and config DNS in Route53 ++ # ++ ocf_log info "Starting Route53 DNS update...." ++ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ r53_monitor ++ if [ $? != $OCF_SUCCESS ]; then ++ ocf_log info "Could not start agent - check configurations" ++ return $OCF_ERR_GENERIC ++ fi ++ return $OCF_SUCCESS ++} ++ ++r53_stop() { ++ # ++ # Stop operation doesn't perform any API call or try to remove the DNS record ++ # this mostly because this is not necessarily mandatory or desired ++ # the start and monitor functions will take care of changing the DNS record ++ # if the agent starts in a different cluster node ++ # ++ ocf_log info "Bringing down Route53 agent. (Will NOT remove Route53 DNS record)" ++ return $OCF_SUCCESS ++} ++ + r53_monitor() { + # + # For every start action the agent will call Route53 API to check for DNS record +@@ -339,31 +370,6 @@ _update_record() { + done + } + +-r53_stop() { +- # +- # Stop operation doesn't perform any API call or try to remove the DNS record +- # this mostly because this is not necessarily mandatory or desired +- # the start and monitor functions will take care of changing the DNS record +- # if the agent starts in a different cluster node +- # +- ocf_log info "Bringing down Route53 agent. (Will NOT remove Route53 DNS record)" +- return $OCF_SUCCESS +-} +- +-r53_start() { +- # +- # Start agent and config DNS in Route53 +- # +- ocf_log info "Starting Route53 DNS update...." +- IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" +- r53_monitor +- if [ $? != $OCF_SUCCESS ]; then +- ocf_log info "Could not start agent - check configurations" +- return $OCF_ERR_GENERIC +- fi +- return $OCF_SUCCESS +-} +- + ############################################################################### + + case $__OCF_ACTION in +@@ -375,20 +381,23 @@ case $__OCF_ACTION in + metadata + exit $OCF_SUCCESS + ;; +- monitor) +- r53_monitor ++ start) ++ r53_validate || exit $? ++ r53_start + ;; + stop) + r53_stop + ;; ++ monitor) ++ r53_monitor ++ ;; + validate-all) + r53_validate + ;; +- start) +- r53_start +- ;; + *) + usage + exit $OCF_ERR_UNIMPLEMENTED + ;; + esac ++ ++exit $? + +From 745c6b9b3e331ed3705a641f1ec03a2604de3a1d Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 2 Apr 2020 13:40:33 +0200 +Subject: [PATCH 2/2] aws-vpc-route53: add support for public and secondary + private IPs + +--- + heartbeat/aws-vpc-route53.in | 31 +++++++++++++++++++++++++++++-- + 1 file changed, 29 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in +index 1cfc2b01f..ca6556951 100644 +--- a/heartbeat/aws-vpc-route53.in ++++ b/heartbeat/aws-vpc-route53.in +@@ -121,6 +121,15 @@ Note: The trailing dot is important to Route53! + Full service name + + ++ ++ ++IP (local (default), public or secondary private IP address (e.g. 10.0.0.1). ++ ++A secondary private IP can be setup with the awsvip agent. ++ ++Type of IP or secondary private IP address (local, public or e.g. 10.0.0.1) ++ ++ + + + Time to live for Route53 ARECORD +@@ -173,6 +182,15 @@ r53_validate() { + # Hosted Zone ID + [[ -z "$OCF_RESKEY_hostedzoneid" ]] && ocf_log error "Hosted Zone ID parameter not set $OCF_RESKEY_hostedzoneid!" && exit $OCF_ERR_CONFIGURED + ++ # Type of IP/secondary IP address ++ case $OCF_RESKEY_ip in ++ local|public|*.*.*.*) ++ ;; ++ *) ++ ocf_exit_reason "Invalid value for ip: ${OCF_RESKEY_ip}" ++ exit $OCF_ERR_CONFIGURED ++ esac ++ + # profile + [[ -z "$OCF_RESKEY_profile" ]] && ocf_log error "AWS CLI profile not set $OCF_RESKEY_profile!" && exit $OCF_ERR_CONFIGURED + +@@ -200,7 +218,7 @@ r53_start() { + # Start agent and config DNS in Route53 + # + ocf_log info "Starting Route53 DNS update...." +- IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ _get_ip + r53_monitor + if [ $? != $OCF_SUCCESS ]; then + ocf_log info "Could not start agent - check configurations" +@@ -239,7 +257,7 @@ r53_monitor() { + r53_validate + ocf_log debug "Checking Route53 record sets" + # +- IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/local-ipv4)" ++ _get_ip + # + if [ "$__OCF_ACTION" = "start" ] || ocf_is_probe ; then + # +@@ -308,6 +326,15 @@ r53_monitor() { + return $OCF_SUCCESS + } + ++_get_ip() { ++ case $OCF_RESKEY_ip in ++ local|public) ++ IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4)";; ++ *.*.*.*) ++ IPADDRESS="${OCF_RESKEY_ip}";; ++ esac ++} ++ + _update_record() { + # + # This function is the one that will actually execute Route53's API call diff --git a/SOURCES/bz1792237-redis-1-fix-validate-all.patch b/SOURCES/bz1792237-redis-1-fix-validate-all.patch new file mode 100644 index 0000000..1d94852 --- /dev/null +++ b/SOURCES/bz1792237-redis-1-fix-validate-all.patch @@ -0,0 +1,32 @@ +From 617adbf651e9e1767f8f52218beb0a572be7bc50 Mon Sep 17 00:00:00 2001 +From: zaenk +Date: Fri, 17 Jan 2020 09:23:28 +0100 +Subject: [PATCH] redis: validate_all: fixes file status tests + +--- + heartbeat/redis.in | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/redis.in b/heartbeat/redis.in +index 1486e48b0..b030a8223 100755 +--- a/heartbeat/redis.in ++++ b/heartbeat/redis.in +@@ -698,15 +698,15 @@ redis_notify() { + } + + redis_validate() { +- if [[ -x "$REDIS_SERVER" ]]; then ++ if [[ ! -x "$REDIS_SERVER" ]]; then + ocf_log err "validate: $REDIS_SERVER does not exist or is not executable" + return $OCF_ERR_INSTALLED + fi +- if [[ -x "$REDIS_CLIENT" ]]; then ++ if [[ ! -x "$REDIS_CLIENT" ]]; then + ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable" + return $OCF_ERR_INSTALLED + fi +- if [[ -f "$REDIS_CONFIG" ]]; then ++ if [[ ! -f "$REDIS_CONFIG" ]]; then + ocf_log err "validate: $REDIS_CONFIG does not exist" + return $OCF_ERR_CONFIGURED + fi diff --git a/SOURCES/bz1792237-redis-2-run-validate-during-start.patch b/SOURCES/bz1792237-redis-2-run-validate-during-start.patch new file mode 100644 index 0000000..c5d3107 --- /dev/null +++ b/SOURCES/bz1792237-redis-2-run-validate-during-start.patch @@ -0,0 +1,24 @@ +From 10b94d052f7f0fd710af7da8a46270b7548efbdf Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 19 Mar 2020 10:55:35 +0100 +Subject: [PATCH] redis: run validate-action during start + +--- + heartbeat/redis.in | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/heartbeat/redis.in b/heartbeat/redis.in +index b030a8223..da7230a49 100755 +--- a/heartbeat/redis.in ++++ b/heartbeat/redis.in +@@ -673,6 +673,10 @@ + clientpasswd="$(sed -n -e 's/^\s*requirepass\s*\(.*\)\s*$/\1/p' < $REDIS_CONFIG | tail -n 1)" + fi + ++if [ "$__OCF_ACTION" = "start" ]; then ++ redis_validate || exit $? ++fi ++ + ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}" + + case "${1:-$__OCF_ACTION}" in diff --git a/SOURCES/bz1804658-azure-lb-1-remove-status-metadata.patch b/SOURCES/bz1804658-azure-lb-1-remove-status-metadata.patch new file mode 100644 index 0000000..f77788b --- /dev/null +++ b/SOURCES/bz1804658-azure-lb-1-remove-status-metadata.patch @@ -0,0 +1,21 @@ +From c5e465fc1e51d0c59c3427cad918a507f8424b99 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 24 Jan 2019 13:04:18 +0100 +Subject: [PATCH] azure-lb: remove reference to status from usage + +--- + heartbeat/azure-lb | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/heartbeat/azure-lb b/heartbeat/azure-lb +index 3129648d4..35d364be7 100755 +--- a/heartbeat/azure-lb ++++ b/heartbeat/azure-lb +@@ -33,7 +33,6 @@ lb_usage() { + $0 manages service that answers Azure Load Balancer health probe requests as a OCF HA resource. + The 'start' operation starts the instance. + The 'stop' operation stops the instance. +- The 'status' operation reports whether the instance is running + The 'monitor' operation reports whether the instance seems to be working + The 'validate-all' operation reports whether the parameters are valid + END diff --git a/SOURCES/bz1804658-azure-lb-2-add-socat-support.patch b/SOURCES/bz1804658-azure-lb-2-add-socat-support.patch new file mode 100644 index 0000000..204b1d4 --- /dev/null +++ b/SOURCES/bz1804658-azure-lb-2-add-socat-support.patch @@ -0,0 +1,38 @@ +From 8bc6c37607c3908976e502c1b6a3d9b1a79c010a Mon Sep 17 00:00:00 2001 +From: Peter Varkoly +Date: Mon, 30 Sep 2019 14:31:39 +0200 +Subject: [PATCH] Adapt azure-lb Resource Agent to support socat usage too + +--- + heartbeat/azure-lb | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/azure-lb b/heartbeat/azure-lb +index 35d364be7..5ee767474 100755 +--- a/heartbeat/azure-lb ++++ b/heartbeat/azure-lb +@@ -53,9 +53,11 @@ Resource agent to answer Azure Load Balancer health probe requests + + + +-The full name of the nc binary. ++The full path of the used binary. This can be nc or socat path. ++The default is /usr/bin/nc. ++If you need /usr/bin/socat this parameter should be set. + +-Full path name of the nc binary ++Full path of the used binary (nc or socat are allowed) + + + +@@ -100,6 +102,10 @@ lb_monitor() { + + lb_start() { + cmd="$OCF_RESKEY_nc -l -k $OCF_RESKEY_port" ++ if [ $( basename $OCF_RESKEY_nc ) = 'socat' ]; then ++ #socat has different parameters ++ cmd="$OCF_RESKEY_nc -U TCP-LISTEN:$OCF_RESKEY_port,backlog=10,fork,reuseaddr /dev/null" ++ fi + if ! lb_monitor; then + ocf_log debug "Starting $process: $cmd" + # Execute the command as created above diff --git a/SOURCES/bz1810466-aws-vpc-move-ip-1-add-routing_table_role.patch b/SOURCES/bz1810466-aws-vpc-move-ip-1-add-routing_table_role.patch new file mode 100644 index 0000000..190f522 --- /dev/null +++ b/SOURCES/bz1810466-aws-vpc-move-ip-1-add-routing_table_role.patch @@ -0,0 +1,78 @@ +From 4bc84bccec88abcd9bd0b840532b23ed0934c0d4 Mon Sep 17 00:00:00 2001 +From: Jason McCloskey +Date: Mon, 10 Feb 2020 15:18:30 -0800 +Subject: [PATCH] Allow user to specify role with which to query/update route + table + +Allow user to specify role with which to query/update route table + +Allow user to specify role with which to query/update route table +--- + heartbeat/aws-vpc-move-ip | 40 +++++++++++++++++++++++++++++++++------ + 1 file changed, 34 insertions(+), 6 deletions(-) + +diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip +index b27d0bfcd..23eec18cc 100755 +--- a/heartbeat/aws-vpc-move-ip ++++ b/heartbeat/aws-vpc-move-ip +@@ -145,6 +145,24 @@ Enable enhanced monitoring using AWS API calls to check route table entry + END + } + ++ ++execute_cmd_as_role(){ ++ cmd=$1 ++ role=$2 ++ output="$(aws sts assume-role --role-arn $role --role-session-name AWSCLI-RouteTableUpdate --profile $OCF_RESKEY_profile --output=text)" ++ export AWS_ACCESS_KEY_ID="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $5}')" ++ export AWS_SECRET_ACCESS_KEY="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $7}')" ++ export AWS_SESSION_TOKEN="$(echo $output | awk -F" " '$4=="CREDENTIALS" {print $8}')" ++ ++ #Execute command ++ ocf_log debug "Assumed Role ${role}" ++ ocf_log debug "$(aws sts get-caller-identity)" ++ ocf_log debug "executing command: $cmd" ++ response="$($cmd)" ++ unset output AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN ++ echo $response ++} ++ + ec2ip_set_address_param_compat(){ + # Include backward compatibility for the deprecated address parameter + if [ -z "$OCF_RESKEY_ip" ] && [ -n "$OCF_RESKEY_address" ]; then +@@ -177,9 +195,14 @@ ec2ip_monitor() { + if ocf_is_true ${OCF_RESKEY_monapi} || [ "$__OCF_ACTION" = "start" ] || ocf_is_probe; then + for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do + ocf_log info "monitor: check routing table (API call) - $rtb" +- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $rtb --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].InstanceId" +- ocf_log debug "executing command: $cmd" +- ROUTE_TO_INSTANCE="$($cmd)" ++ if [[ -z "${OCF_RESKEY_routing_table_role}" ]]; then ++ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 describe-route-tables --route-table-ids $rtb --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].InstanceId" ++ ocf_log debug "executing command: $cmd" ++ ROUTE_TO_INSTANCE="$($cmd)" ++ else ++ cmd="$OCF_RESKEY_awscli --output text ec2 describe-route-tables --route-table-ids $rtb --query RouteTables[*].Routes[?DestinationCidrBlock=='$OCF_RESKEY_ip/32'].InstanceId" ++ ROUTE_TO_INSTANCE="$(execute_cmd_as_role "$cmd" $OCF_RESKEY_routing_table_role)" ++ fi + ocf_log debug "Overlay IP is currently routed to ${ROUTE_TO_INSTANCE}" + if [ -z "$ROUTE_TO_INSTANCE" ]; then + ROUTE_TO_INSTANCE="" +@@ -253,9 +276,14 @@ ec2ip_get_and_configure() { + ocf_log debug "network interface id associated MAC address ${MAC_ADDR}: ${EC2_NETWORK_INTERFACE_ID}" + + for rtb in $(echo $OCF_RESKEY_routing_table | sed -e 's/,/ /g'); do +- cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --network-interface-id $EC2_NETWORK_INTERFACE_ID" +- ocf_log debug "executing command: $cmd" +- $cmd ++ if [[ -z "${OCF_RESKEY_routing_table_role}" ]]; then ++ cmd="$OCF_RESKEY_awscli --profile $OCF_RESKEY_profile --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --network-interface-id $EC2_NETWORK_INTERFACE_ID" ++ ocf_log debug "executing command: $cmd" ++ $cmd ++ else ++ cmd="$OCF_RESKEY_awscli --output text ec2 replace-route --route-table-id $rtb --destination-cidr-block ${OCF_RESKEY_ip}/32 --network-interface-id $EC2_NETWORK_INTERFACE_ID" ++ update_response="$(execute_cmd_as_role "$cmd" $OCF_RESKEY_routing_table_role)" ++ fi + rc=$? + if [ "$rc" != 0 ]; then + ocf_log warn "command failed, rc: $rc" diff --git a/SOURCES/bz1810466-aws-vpc-move-ip-2-update-metadata.patch b/SOURCES/bz1810466-aws-vpc-move-ip-2-update-metadata.patch new file mode 100644 index 0000000..241494f --- /dev/null +++ b/SOURCES/bz1810466-aws-vpc-move-ip-2-update-metadata.patch @@ -0,0 +1,30 @@ +--- ClusterLabs-resource-agents-e711383f/heartbeat/aws-vpc-move-ip 2020-03-18 14:03:22.060153078 +0100 ++++ aws-vpc-move-ip 2020-03-18 14:03:14.770281256 +0100 +@@ -37,10 +37,12 @@ + # Defaults + OCF_RESKEY_awscli_default="/usr/bin/aws" + OCF_RESKEY_profile_default="default" ++OCF_RESKEY_routing_table_role_default="" + OCF_RESKEY_monapi_default="false" + + : ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}} + : ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}} ++: ${OCF_RESKEY_routing_table_role=${OCF_RESKEY_routing_table_role_default}} + : ${OCF_RESKEY_monapi=${OCF_RESKEY_monapi_default}} + ####################################################################### + +@@ -109,6 +111,14 @@ + + + ++ ++ ++Role to use to query/update the route table ++ ++route table query/update role ++ ++ ++ + + + Name of the network interface, i.e. eth0 diff --git a/SOURCES/bz1817432-use-safe-temp-file-location.patch b/SOURCES/bz1817432-use-safe-temp-file-location.patch new file mode 100644 index 0000000..0149d72 --- /dev/null +++ b/SOURCES/bz1817432-use-safe-temp-file-location.patch @@ -0,0 +1,44 @@ +diff -uNr a/heartbeat/ClusterMon b/heartbeat/ClusterMon +--- a/heartbeat/ClusterMon 2018-06-29 14:05:02.000000000 +0200 ++++ b/heartbeat/ClusterMon 2020-03-27 12:09:23.636845893 +0100 +@@ -86,7 +86,7 @@ + PID file location to ensure only one instance is running + + PID file +- ++ + + + +@@ -94,7 +94,7 @@ + Location to write HTML output to. + + HTML output +- ++ + + + +@@ -233,8 +233,8 @@ + fi + + : ${OCF_RESKEY_update:="15000"} +-: ${OCF_RESKEY_pidfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} +-: ${OCF_RESKEY_htmlfile:="/tmp/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} ++: ${OCF_RESKEY_pidfile:="${HA_RSCTMP}/ClusterMon_${OCF_RESOURCE_INSTANCE}.pid"} ++: ${OCF_RESKEY_htmlfile:="${HA_RSCTMP}/ClusterMon_${OCF_RESOURCE_INSTANCE}.html"} + + OCF_RESKEY_update=`expr $OCF_RESKEY_update / 1000` + +diff -uNr a/heartbeat/sapdb-nosha.sh b/heartbeat/sapdb-nosha.sh +--- a/heartbeat/sapdb-nosha.sh 2018-06-29 14:05:02.000000000 +0200 ++++ b/heartbeat/sapdb-nosha.sh 2020-03-27 12:07:16.183958164 +0100 +@@ -740,5 +740,5 @@ + } + + # Set a tempfile and make sure to clean it up again +-TEMPFILE="/tmp/SAPDatabase.$$.tmp" +-trap trap_handler INT TERM +\ No newline at end of file ++TEMPFILE="${HA_RSCTMP}/SAPDatabase.$$.tmp" ++trap trap_handler INT TERM diff --git a/SOURCES/bz1817598-ocf_is_clone-1-fix-clone-max-can-be-0.patch b/SOURCES/bz1817598-ocf_is_clone-1-fix-clone-max-can-be-0.patch new file mode 100644 index 0000000..2b025c5 --- /dev/null +++ b/SOURCES/bz1817598-ocf_is_clone-1-fix-clone-max-can-be-0.patch @@ -0,0 +1,23 @@ +From bb9e54cdac71a1f26aa626d234e38c8ae8417e9f Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 26 Mar 2020 16:26:14 +0100 +Subject: [PATCH] ocf-shellfuncs: fix ocf_is_clone() (clone_max can be 0 with + cloned resources) + +--- + heartbeat/ocf-shellfuncs.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in +index 7a97558a5..e0eaae1d5 100644 +--- a/heartbeat/ocf-shellfuncs.in ++++ b/heartbeat/ocf-shellfuncs.in +@@ -557,7 +557,7 @@ ocf_is_probe() { + # defined as a resource where the clone-max meta attribute is present, + # and set to greater than zero. + ocf_is_clone() { +- [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] && [ "${OCF_RESKEY_CRM_meta_clone_max}" -gt 0 ] ++ [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] + } + + # returns true if the resource is configured as a multistate diff --git a/SOURCES/bz1817598-ocf_is_clone-2-update-comment.patch b/SOURCES/bz1817598-ocf_is_clone-2-update-comment.patch new file mode 100644 index 0000000..4b9be99 --- /dev/null +++ b/SOURCES/bz1817598-ocf_is_clone-2-update-comment.patch @@ -0,0 +1,24 @@ +From 420e55da2eb542b35fe8af5d05496b129cd190d5 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 27 Mar 2020 08:44:12 +0100 +Subject: [PATCH] ocf-shellfuncs: ocf_is_clone: update comment based on + clone-max fix in previous commit + +--- + heartbeat/ocf-shellfuncs.in | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in +index e0eaae1d5..c4d40e382 100644 +--- a/heartbeat/ocf-shellfuncs.in ++++ b/heartbeat/ocf-shellfuncs.in +@@ -554,8 +554,7 @@ ocf_is_probe() { + } + + # returns true if the resource is configured as a clone. This is +-# defined as a resource where the clone-max meta attribute is present, +-# and set to greater than zero. ++# defined as a resource where the clone-max meta attribute is present. + ocf_is_clone() { + [ ! -z "${OCF_RESKEY_CRM_meta_clone_max}" ] + } diff --git a/SOURCES/bz1819021-aws-vpc-move-ip-delete-remaining-route-entries.patch b/SOURCES/bz1819021-aws-vpc-move-ip-delete-remaining-route-entries.patch new file mode 100644 index 0000000..85355b3 --- /dev/null +++ b/SOURCES/bz1819021-aws-vpc-move-ip-delete-remaining-route-entries.patch @@ -0,0 +1,24 @@ +From 390d1cb8b057ef0e6869fb57dc1e6b6997af49f0 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 3 Apr 2020 16:10:04 +0200 +Subject: [PATCH] aws-vpc-move-ip: delete remaining route entries + +--- + heartbeat/aws-vpc-move-ip | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip +index 97a467217..26ca6007d 100755 +--- a/heartbeat/aws-vpc-move-ip ++++ b/heartbeat/aws-vpc-move-ip +@@ -256,6 +256,10 @@ ec2ip_drop() { + return $OCF_ERR_GENERIC + fi + ++ # delete remaining route-entries if any ++ ip route show to exact ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface | xargs -r ip route delete ++ ip route show table local to exact ${OCF_RESKEY_ip}/32 dev $OCF_RESKEY_interface | xargs -r ip route delete ++ + return $OCF_SUCCESS + } + diff --git a/SOURCES/bz1819965-1-ocf.py-update.patch b/SOURCES/bz1819965-1-ocf.py-update.patch new file mode 100644 index 0000000..e94deb7 --- /dev/null +++ b/SOURCES/bz1819965-1-ocf.py-update.patch @@ -0,0 +1,357 @@ +--- a/heartbeat/ocf.py 2020-04-08 13:03:20.543477544 +0200 ++++ b/heartbeat/ocf.py 2020-04-06 10:23:45.950913519 +0200 +@@ -88,6 +88,10 @@ + + OCF_RESOURCE_INSTANCE = env.get("OCF_RESOURCE_INSTANCE") + ++OCF_ACTION = env.get("__OCF_ACTION") ++if OCF_ACTION is None and len(argv) == 2: ++ OCF_ACTION = argv[1] ++ + HA_DEBUG = env.get("HA_debug", 0) + HA_DATEFMT = env.get("HA_DATEFMT", "%b %d %T ") + HA_LOGFACILITY = env.get("HA_LOGFACILITY") +@@ -135,3 +139,343 @@ + log.addHandler(dfh) + + logger = logging.LoggerAdapter(log, {'OCF_RESOURCE_INSTANCE': OCF_RESOURCE_INSTANCE}) ++ ++ ++_exit_reason_set = False ++ ++def ocf_exit_reason(msg): ++ """ ++ Print exit error string to stderr. ++ ++ Allows the OCF agent to provide a string describing ++ why the exit code was returned. ++ """ ++ global _exit_reason_set ++ cookie = env.get("OCF_EXIT_REASON_PREFIX", "ocf-exit-reason:") ++ sys.stderr.write("{}{}\n".format(cookie, msg)) ++ sys.stderr.flush() ++ logger.error(msg) ++ _exit_reason_set = True ++ ++ ++def have_binary(name): ++ """ ++ True if binary exists, False otherwise. ++ """ ++ def _access_check(fn): ++ return (os.path.exists(fn) and ++ os.access(fn, os.F_OK | os.X_OK) and ++ not os.path.isdir(fn)) ++ if _access_check(name): ++ return True ++ path = env.get("PATH", os.defpath).split(os.pathsep) ++ seen = set() ++ for dir in path: ++ dir = os.path.normcase(dir) ++ if dir not in seen: ++ seen.add(dir) ++ name2 = os.path.join(dir, name) ++ if _access_check(name2): ++ return True ++ return False ++ ++ ++def is_true(val): ++ """ ++ Convert an OCF truth value to a ++ Python boolean. ++ """ ++ return val in ("yes", "true", "1", 1, "YES", "TRUE", "ja", "on", "ON", True) ++ ++ ++def is_probe(): ++ """ ++ A probe is defined as a monitor operation ++ with an interval of zero. This is called ++ by Pacemaker to check the status of a possibly ++ not running resource. ++ """ ++ return (OCF_ACTION == "monitor" and ++ env.get("OCF_RESKEY_CRM_meta_interval", "") == "0") ++ ++ ++def get_parameter(name, default=None): ++ """ ++ Extract the parameter value from the environment ++ """ ++ return env.get("OCF_RESKEY_{}".format(name), default) ++ ++ ++def distro(): ++ """ ++ Return name of distribution/platform. ++ ++ If possible, returns "name/version", else ++ just "name". ++ """ ++ import subprocess ++ import platform ++ try: ++ ret = subprocess.check_output(["lsb_release", "-si"]) ++ if type(ret) != str: ++ ret = ret.decode() ++ distro = ret.strip() ++ ret = subprocess.check_output(["lsb_release", "-sr"]) ++ if type(ret) != str: ++ ret = ret.decode() ++ version = ret.strip() ++ return "{}/{}".format(distro, version) ++ except Exception: ++ if os.path.exists("/etc/debian_version"): ++ return "Debian" ++ if os.path.exists("/etc/SuSE-release"): ++ return "SUSE" ++ if os.path.exists("/etc/redhat-release"): ++ return "Redhat" ++ return platform.system() ++ ++ ++class Parameter(object): ++ def __init__(self, name, shortdesc, longdesc, content_type, unique, required, default): ++ self.name = name ++ self.shortdesc = shortdesc ++ self.longdesc = longdesc ++ self.content_type = content_type ++ self.unique = unique ++ self.required = required ++ self.default = default ++ ++ def __str__(self): ++ return self.to_xml() ++ ++ def to_xml(self): ++ ret = '' + "\n" ++ ret += '' + self.shortdesc + '' + "\n" ++ ret += ' ++ ++ ++1.0 ++ ++{longdesc} ++ ++{shortdesc} ++ ++ ++{parameters} ++ ++ ++ ++{actions} ++ ++ ++ ++""".format(name=self.name, ++ longdesc=self.longdesc, ++ shortdesc=self.shortdesc, ++ parameters="".join(p.to_xml() for p in self.parameters), ++ actions="".join(a.to_xml() for a in self.actions)) ++ ++ def run(self): ++ run(self) ++ ++ ++def run(agent, handlers=None): ++ """ ++ Main loop implementation for resource agents. ++ Does not return. ++ ++ Arguments: ++ ++ agent: Agent object. ++ ++ handlers: Dict of action name to handler function. ++ ++ Handler functions can take parameters as arguments, ++ the run loop will read parameter values from the ++ environment and pass to the handler. ++ """ ++ import inspect ++ ++ agent._handlers.update(handlers or {}) ++ handlers = agent._handlers ++ ++ def check_required_params(): ++ for p in agent.parameters: ++ if p.required and get_parameter(p.name) is None: ++ ocf_exit_reason("{}: Required parameter not set".format(p.name)) ++ sys.exit(OCF_ERR_CONFIGURED) ++ ++ def call_handler(func): ++ if hasattr(inspect, 'signature'): ++ params = inspect.signature(func).parameters.keys() ++ else: ++ params = inspect.getargspec(func).args ++ def value_for_parameter(param): ++ val = get_parameter(param) ++ if val is not None: ++ return val ++ for p in agent.parameters: ++ if p.name == param: ++ return p.default ++ arglist = [value_for_parameter(p) for p in params] ++ try: ++ rc = func(*arglist) ++ if rc is None: ++ rc = OCF_SUCCESS ++ return rc ++ except Exception as err: ++ if not _exit_reason_set: ++ ocf_exit_reason(str(err)) ++ else: ++ logger.error(str(err)) ++ return OCF_ERR_GENERIC ++ ++ meta_data_action = False ++ for action in agent.actions: ++ if action.name == "meta-data": ++ meta_data_action = True ++ break ++ if not meta_data_action: ++ agent.add_action("meta-data", timeout=10) ++ ++ if len(sys.argv) == 2 and sys.argv[1] in ("-h", "--help"): ++ sys.stdout.write("usage: %s {%s}\n\n" % (sys.argv[0], "|".join(sorted(handlers.keys()))) + ++ "Expects to have a fully populated OCF RA compliant environment set.\n") ++ sys.exit(OCF_SUCCESS) ++ ++ if OCF_ACTION is None: ++ ocf_exit_reason("No action argument set") ++ sys.exit(OCF_ERR_UNIMPLEMENTED) ++ if OCF_ACTION in ('meta-data', 'usage', 'methods'): ++ sys.stdout.write(agent.to_xml() + "\n") ++ sys.exit(OCF_SUCCESS) ++ ++ check_required_params() ++ if OCF_ACTION in handlers: ++ rc = call_handler(handlers[OCF_ACTION]) ++ sys.exit(rc) ++ sys.exit(OCF_ERR_UNIMPLEMENTED) ++ ++ ++if __name__ == "__main__": ++ import unittest ++ ++ class TestMetadata(unittest.TestCase): ++ def test_noparams_noactions(self): ++ m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc") ++ self.assertEqual(""" ++ ++ ++1.0 ++ ++longdesc ++ ++shortdesc ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++""", str(m)) ++ ++ def test_params_actions(self): ++ m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc") ++ m.add_parameter("testparam") ++ m.add_action("start") ++ self.assertEqual(str(m.actions[0]), '\n') ++ ++ unittest.main() diff --git a/SOURCES/bz1819965-2-azure-events.patch b/SOURCES/bz1819965-2-azure-events.patch new file mode 100644 index 0000000..220d2ba --- /dev/null +++ b/SOURCES/bz1819965-2-azure-events.patch @@ -0,0 +1,1060 @@ +diff -uNr a/configure.ac b/configure.ac +--- a/configure.ac 2020-04-16 11:54:08.466619607 +0200 ++++ b/configure.ac 2020-04-16 12:05:17.241352586 +0200 +@@ -30,6 +30,8 @@ + PKG_FEATURES="" + + AC_CONFIG_AUX_DIR(.) ++AC_CONFIG_MACRO_DIR([m4]) ++ + AC_CANONICAL_HOST + + dnl Where #defines go (e.g. `AC_CHECK_HEADERS' below) +@@ -72,6 +74,11 @@ + [AC_MSG_ERROR([systemd support requested but pkg-config unable to query systemd package])]) + with_systemdsystemunitdir=no], + [with_systemdsystemunitdir="$def_systemdsystemunitdir"])]) ++if test "x$with_systemdsystemunitdir" != "xno" && \ ++ test "x${prefix}" != "xNONE" && \ ++ test "x${prefix}" != "x/usr"; then ++ with_systemdsystemunitdir="${prefix}/$with_systemdsystemunitdir" ++fi + AS_IF([test "x$with_systemdsystemunitdir" != "xno"], + [AC_SUBST([systemdsystemunitdir], [$with_systemdsystemunitdir])]) + AM_CONDITIONAL([HAVE_SYSTEMD], [test "x$with_systemdsystemunitdir" != "xno"]) +@@ -79,6 +86,11 @@ + AC_ARG_WITH([systemdtmpfilesdir], + AS_HELP_STRING([--with-systemdtmpfilesdir=DIR], [Directory for systemd tmp files]), + [], [with_systemdtmpfilesdir=$($PKGCONFIG --variable=tmpfilesdir systemd)]) ++ if test "x$with_systemdtmpfilesdir" != xno && \ ++ test "x${prefix}" != "xNONE" && \ ++ test "x${prefix}" != "x/usr"; then ++ with_systemdtmpfilesdir="${prefix}/$with_systemdtmpfilesdir" ++ fi + if test "x$with_systemdtmpfilesdir" != xno; then + AC_SUBST([systemdtmpfilesdir], [$with_systemdtmpfilesdir]) + fi +@@ -501,12 +513,35 @@ + AC_SUBST(RM) + AC_SUBST(TEST) + ++dnl Ensure PYTHON is an absolute path ++AC_PATH_PROG([PYTHON], [$PYTHON]) ++ + AM_PATH_PYTHON + if test -z "$PYTHON"; then + echo "*** Essential program python not found" 1>&2 +- exit 1 + fi + ++AC_PYTHON_MODULE(googleapiclient) ++AC_PYTHON_MODULE(pyroute2) ++ ++AS_VERSION_COMPARE([$PYTHON_VERSION], [2.7], [BUILD_OCF_PY=0], [BUILD_OCF_PY=1], [BUILD_OCF_PY=1]) ++ ++BUILD_AZURE_EVENTS=1 ++if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then ++ BUILD_AZURE_EVENTS=0 ++ AC_MSG_WARN("Not building azure-events") ++fi ++AM_CONDITIONAL(BUILD_AZURE_EVENTS, test $BUILD_AZURE_EVENTS -eq 1) ++ ++BUILD_GCP_PD_MOVE=1 ++AM_CONDITIONAL(BUILD_GCP_PD_MOVE, test $BUILD_GCP_PD_MOVE -eq 1) ++ ++BUILD_GCP_VPC_MOVE_ROUTE=1 ++AM_CONDITIONAL(BUILD_GCP_VPC_MOVE_ROUTE, test $BUILD_GCP_VPC_MOVE_ROUTE -eq 1) ++ ++BUILD_GCP_VPC_MOVE_VIP=1 ++AM_CONDITIONAL(BUILD_GCP_VPC_MOVE_VIP, test $BUILD_GCP_VPC_MOVE_VIP -eq 1) ++ + AC_PATH_PROGS(ROUTE, route) + AC_DEFINE_UNQUOTED(ROUTE, "$ROUTE", path to route command) + +@@ -541,6 +576,12 @@ + if test x"${STYLESHEET_PREFIX}" = x""; then + DIRS=$(find "${datadir}" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \ + -type d | LC_ALL=C sort) ++ if test x"${DIRS}" = x""; then ++ # when datadir is not standard OS path, we cannot find docbook.xsl ++ # use standard OS path as backup ++ DIRS=$(find "/usr/share" -name $(basename $(dirname ${DOCBOOK_XSL_PATH})) \ ++ -type d | LC_ALL=C sort) ++ fi + XSLT=$(basename ${DOCBOOK_XSL_PATH}) + for d in ${DIRS}; do + if test -f "${d}/${XSLT}"; then +@@ -948,6 +989,7 @@ + ) + + dnl Files we output that need to be executable ++AC_CONFIG_FILES([heartbeat/azure-events], [chmod +x heartbeat/azure-events]) + AC_CONFIG_FILES([heartbeat/AoEtarget], [chmod +x heartbeat/AoEtarget]) + AC_CONFIG_FILES([heartbeat/ManageRAID], [chmod +x heartbeat/ManageRAID]) + AC_CONFIG_FILES([heartbeat/ManageVE], [chmod +x heartbeat/ManageVE]) +@@ -1021,7 +1063,7 @@ + AC_MSG_RESULT([]) + AC_MSG_RESULT([$PACKAGE configuration:]) + AC_MSG_RESULT([ Version = ${VERSION}]) +-AC_MSG_RESULT([ Build Version = e711383fd5c7bef9c24ff6bc85465e59f91080f9]) ++AC_MSG_RESULT([ Build Version = $Format:%H$]) + AC_MSG_RESULT([ Features =${PKG_FEATURES}]) + AC_MSG_RESULT([]) + AC_MSG_RESULT([ Prefix = ${prefix}]) +diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am +--- a/doc/man/Makefile.am 2020-04-16 11:54:08.466619607 +0200 ++++ b/doc/man/Makefile.am 2020-04-16 12:08:34.913726440 +0200 +@@ -55,7 +55,7 @@ + # 12126 on savannah.gnu.org. But, maybe it gets fixed soon, it was + # first reported in 1995 and added to Savannah in in 2005... + if BUILD_DOC +-man_MANS = ocf_heartbeat_AoEtarget.7 \ ++man_MANS = ocf_heartbeat_AoEtarget.7 \ + ocf_heartbeat_AudibleAlarm.7 \ + ocf_heartbeat_ClusterMon.7 \ + ocf_heartbeat_CTDB.7 \ +@@ -183,6 +183,22 @@ + man_MANS += ocf_heartbeat_IPv6addr.7 + endif + ++if BUILD_AZURE_EVENTS ++man_MANS += ocf_heartbeat_azure-events.7 ++endif ++ ++if BUILD_GCP_PD_MOVE ++man_MANS += ocf_heartbeat_gcp-pd-move.7 ++endif ++ ++if BUILD_GCP_VPC_MOVE_ROUTE ++man_MANS += ocf_heartbeat_gcp-vpc-move-route.7 ++endif ++ ++if BUILD_GCP_VPC_MOVE_VIP ++man_MANS += ocf_heartbeat_gcp-vpc-move-vip.7 ++endif ++ + xmlfiles = $(man_MANS:.7=.xml) + + %.1 %.5 %.7 %.8: %.xml +diff -uNr a/heartbeat/azure-events.in b/heartbeat/azure-events.in +--- a/heartbeat/azure-events.in 1970-01-01 01:00:00.000000000 +0100 ++++ b/heartbeat/azure-events.in 2020-04-16 12:02:15.114693551 +0200 +@@ -0,0 +1,824 @@ ++#!@PYTHON@ -tt ++# ++# Resource agent for monitoring Azure Scheduled Events ++# ++# License: GNU General Public License (GPL) ++# (c) 2018 Tobias Niekamp, Microsoft Corp. ++# and Linux-HA contributors ++ ++import os ++import sys ++import time ++import subprocess ++import json ++try: ++ import urllib2 ++except ImportError: ++ import urllib.request as urllib2 ++import socket ++from collections import defaultdict ++ ++OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")) ++sys.path.append(OCF_FUNCTIONS_DIR) ++import ocf ++ ++############################################################################## ++ ++ ++VERSION = "0.10" ++USER_AGENT = "Pacemaker-ResourceAgent/%s %s" % (VERSION, ocf.distro()) ++ ++attr_globalPullState = "azure-events_globalPullState" ++attr_lastDocVersion = "azure-events_lastDocVersion" ++attr_curNodeState = "azure-events_curNodeState" ++attr_pendingEventIDs = "azure-events_pendingEventIDs" ++ ++default_loglevel = ocf.logging.INFO ++default_relevantEventTypes = set(["Reboot", "Redeploy"]) ++ ++global_pullMaxAttempts = 3 ++global_pullDelaySecs = 1 ++ ++############################################################################## ++ ++class attrDict(defaultdict): ++ """ ++ A wrapper for accessing dict keys like an attribute ++ """ ++ def __init__(self, data): ++ super(attrDict, self).__init__(attrDict) ++ for d in data.keys(): ++ self.__setattr__(d, data[d]) ++ ++ def __getattr__(self, key): ++ try: ++ return self[key] ++ except KeyError: ++ raise AttributeError(key) ++ ++ def __setattr__(self, key, value): ++ self[key] = value ++ ++############################################################################## ++ ++class azHelper: ++ """ ++ Helper class for Azure's metadata API (including Scheduled Events) ++ """ ++ metadata_host = "http://169.254.169.254/metadata" ++ instance_api = "instance" ++ events_api = "scheduledevents" ++ api_version = "2017-08-01" ++ ++ @staticmethod ++ def _sendMetadataRequest(endpoint, postData=None): ++ """ ++ Send a request to Azure's Azure Metadata Service API ++ """ ++ url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, azHelper.api_version) ++ ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s" % (endpoint, postData)) ++ ocf.logger.debug("_sendMetadataRequest: url = %s" % url) ++ ++ req = urllib2.Request(url, postData) ++ req.add_header("Metadata", "true") ++ req.add_header("User-Agent", USER_AGENT) ++ resp = urllib2.urlopen(req) ++ data = resp.read() ++ ocf.logger.debug("_sendMetadataRequest: response = %s" % data) ++ if data: ++ data = json.loads(data) ++ ++ ocf.logger.debug("_sendMetadataRequest: finished") ++ return data ++ ++ @staticmethod ++ def getInstanceInfo(): ++ """ ++ Fetch details about the current VM from Azure's Azure Metadata Service API ++ """ ++ ocf.logger.debug("getInstanceInfo: begin") ++ ++ jsondata = azHelper._sendMetadataRequest(azHelper.instance_api) ++ ocf.logger.debug("getInstanceInfo: json = %s" % jsondata) ++ ++ ocf.logger.debug("getInstanceInfo: finished, returning {}".format(jsondata["compute"])) ++ return attrDict(jsondata["compute"]) ++ ++ @staticmethod ++ def pullScheduledEvents(): ++ """ ++ Retrieve all currently scheduled events via Azure Metadata Service API ++ """ ++ ocf.logger.debug("pullScheduledEvents: begin") ++ ++ jsondata = azHelper._sendMetadataRequest(azHelper.events_api) ++ ocf.logger.debug("pullScheduledEvents: json = %s" % jsondata) ++ ++ ocf.logger.debug("pullScheduledEvents: finished") ++ return attrDict(jsondata) ++ ++ @staticmethod ++ def forceEvents(eventIDs): ++ """ ++ Force a set of events to start immediately ++ """ ++ ocf.logger.debug("forceEvents: begin") ++ ++ events = [] ++ for e in eventIDs: ++ events.append({ ++ "EventId": e, ++ }) ++ postData = { ++ "StartRequests" : events ++ } ++ ocf.logger.info("forceEvents: postData = %s" % postData) ++ resp = azHelper._sendMetadataRequest(azHelper.events_api, postData=json.dumps(postData)) ++ ++ ocf.logger.debug("forceEvents: finished") ++ return ++ ++############################################################################## ++ ++class clusterHelper: ++ """ ++ Helper functions for Pacemaker control via crm ++ """ ++ @staticmethod ++ def _getLocation(node): ++ """ ++ Helper function to retrieve local/global attributes ++ """ ++ if node: ++ return ["--node", node] ++ else: ++ return ["--type", "crm_config"] ++ ++ @staticmethod ++ def _exec(command, *args): ++ """ ++ Helper function to execute a UNIX command ++ """ ++ args = list(args) ++ ocf.logger.debug("_exec: begin; command = %s, args = %s" % (command, str(args))) ++ ++ def flatten(*n): ++ return (str(e) for a in n ++ for e in (flatten(*a) if isinstance(a, (tuple, list)) else (str(a),))) ++ command = list(flatten([command] + args)) ++ ocf.logger.debug("_exec: cmd = %s" % " ".join(command)) ++ try: ++ ret = subprocess.check_output(command) ++ ocf.logger.debug("_exec: return = %s" % ret) ++ return ret.rstrip() ++ except Exception as err: ++ ocf.logger.exception(err) ++ return None ++ ++ @staticmethod ++ def setAttr(key, value, node=None): ++ """ ++ Set the value of a specific global/local attribute in the Pacemaker cluster ++ """ ++ ocf.logger.debug("setAttr: begin; key = %s, value = %s, node = %s" % (key, value, node)) ++ ++ if value: ++ ret = clusterHelper._exec("crm_attribute", ++ "--name", key, ++ "--update", value, ++ clusterHelper._getLocation(node)) ++ else: ++ ret = clusterHelper._exec("crm_attribute", ++ "--name", key, ++ "--delete", ++ clusterHelper._getLocation(node)) ++ ++ ocf.logger.debug("setAttr: finished") ++ return len(ret) == 0 ++ ++ @staticmethod ++ def getAttr(key, node=None): ++ """ ++ Retrieve a global/local attribute from the Pacemaker cluster ++ """ ++ ocf.logger.debug("getAttr: begin; key = %s, node = %s" % (key, node)) ++ ++ val = clusterHelper._exec("crm_attribute", ++ "--name", key, ++ "--query", "--quiet", ++ "--default", "", ++ clusterHelper._getLocation(node)) ++ ocf.logger.debug("getAttr: finished") ++ if not val: ++ return None ++ return val if not val.isdigit() else int(val) ++ ++ @staticmethod ++ def getAllNodes(): ++ """ ++ Get a list of hostnames for all nodes in the Pacemaker cluster ++ """ ++ ocf.logger.debug("getAllNodes: begin") ++ ++ nodes = [] ++ nodeList = clusterHelper._exec("crm_node", "--list") ++ for n in nodeList.decode().split("\n"): ++ nodes.append(n.split()[1]) ++ ocf.logger.debug("getAllNodes: finished; return %s" % str(nodes)) ++ ++ return nodes ++ ++ @staticmethod ++ def getHostNameFromAzName(azName): ++ """ ++ Helper function to get the actual host name from an Azure node name ++ """ ++ return clusterHelper.getAttr("hostName_%s" % azName) ++ ++ @staticmethod ++ def removeHoldFromNodes(): ++ """ ++ Remove the ON_HOLD state from all nodes in the Pacemaker cluster ++ """ ++ ocf.logger.debug("removeHoldFromNodes: begin") ++ ++ for n in clusterHelper.getAllNodes(): ++ if clusterHelper.getAttr(attr_curNodeState, node=n) == "ON_HOLD": ++ clusterHelper.setAttr(attr_curNodeState, "AVAILABLE", node=n) ++ ocf.logger.info("removeHoldFromNodes: removed ON_HOLD from node %s" % n) ++ ++ ocf.logger.debug("removeHoldFromNodes: finished") ++ return False ++ ++ @staticmethod ++ def otherNodesAvailable(exceptNode): ++ """ ++ Check if there are any nodes (except a given node) in the Pacemaker cluster that have state AVAILABLE ++ """ ++ ocf.logger.debug("otherNodesAvailable: begin; exceptNode = %s" % exceptNode) ++ ++ for n in clusterHelper.getAllNodes(): ++ state = clusterHelper.getAttr(attr_curNodeState, node=n) ++ state = stringToNodeState(state) if state else AVAILABLE ++ if state == AVAILABLE and n != exceptNode.hostName: ++ ocf.logger.info("otherNodesAvailable: at least %s is available" % n) ++ ocf.logger.debug("otherNodesAvailable: finished") ++ return True ++ ocf.logger.info("otherNodesAvailable: no other nodes are available") ++ ocf.logger.debug("otherNodesAvailable: finished") ++ ++ return False ++ ++ @staticmethod ++ def transitionSummary(): ++ """ ++ Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby) ++ """ ++ # Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node? ++ # # crm_simulate -Ls ++ # Transition Summary: ++ # * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1) ++ # * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0) ++ # * Move rsc_ip_HN1_HDB03 (Started hsr3-db0 -> hsr3-db1) ++ # * Start rsc_nc_HN1_HDB03 (hsr3-db1) ++ # # Excepted result when there are no pending actions: ++ # Transition Summary: ++ ocf.logger.debug("transitionSummary: begin") ++ ++ summary = clusterHelper._exec("crm_simulate", "-Ls") ++ if not summary: ++ ocf.logger.warning("transitionSummary: could not load transition summary") ++ return False ++ if summary.find("Transition Summary:") < 0: ++ ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary) ++ return False ++ summary = summary.split("Transition Summary:")[1] ++ ret = summary.decode().split("\n").pop(0) ++ ++ ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret)) ++ return ret ++ ++ @staticmethod ++ def listOperationsOnNode(node): ++ """ ++ Get a list of all current operations for a given node (used to check if any resources are pending) ++ """ ++ # hsr3-db1:/home/tniek # crm_resource --list-operations -N hsr3-db0 ++ # rsc_azure-events (ocf::heartbeat:azure-events): Started: rsc_azure-events_start_0 (node=hsr3-db0, call=91, rc=0, last-rc-change=Fri Jun 8 22:37:46 2018, exec=115ms): complete ++ # rsc_azure-events (ocf::heartbeat:azure-events): Started: rsc_azure-events_monitor_10000 (node=hsr3-db0, call=93, rc=0, last-rc-change=Fri Jun 8 22:37:47 2018, exec=197ms): complete ++ # rsc_SAPHana_HN1_HDB03 (ocf::suse:SAPHana): Master: rsc_SAPHana_HN1_HDB03_start_0 (node=hsr3-db0, call=-1, rc=193, last-rc-change=Fri Jun 8 22:37:46 2018, exec=0ms): pending ++ # rsc_SAPHanaTopology_HN1_HDB03 (ocf::suse:SAPHanaTopology): Started: rsc_SAPHanaTopology_HN1_HDB03_start_0 (node=hsr3-db0, call=90, rc=0, last-rc-change=Fri Jun 8 22:37:46 2018, exec=3214ms): complete ++ ocf.logger.debug("listOperationsOnNode: begin; node = %s" % node) ++ ++ resources = clusterHelper._exec("crm_resource", "--list-operations", "-N", node) ++ if len(resources) == 0: ++ ret = [] ++ else: ++ ret = resources.decode().split("\n") ++ ++ ocf.logger.debug("listOperationsOnNode: finished; return = %s" % str(ret)) ++ return ret ++ ++ @staticmethod ++ def noPendingResourcesOnNode(node): ++ """ ++ Check that there are no pending resources on a given node ++ """ ++ ocf.logger.debug("noPendingResourcesOnNode: begin; node = %s" % node) ++ ++ for r in clusterHelper.listOperationsOnNode(node): ++ ocf.logger.debug("noPendingResourcesOnNode: * %s" % r) ++ resource = r.split()[-1] ++ if resource == "pending": ++ ocf.logger.info("noPendingResourcesOnNode: found resource %s that is still pending" % resource) ++ ocf.logger.debug("noPendingResourcesOnNode: finished; return = False") ++ return False ++ ocf.logger.info("noPendingResourcesOnNode: no pending resources on node %s" % node) ++ ocf.logger.debug("noPendingResourcesOnNode: finished; return = True") ++ ++ return True ++ ++ @staticmethod ++ def allResourcesStoppedOnNode(node): ++ """ ++ Check that all resources on a given node are stopped ++ """ ++ ocf.logger.debug("allResourcesStoppedOnNode: begin; node = %s" % node) ++ ++ if clusterHelper.noPendingResourcesOnNode(node): ++ if len(clusterHelper.transitionSummary()) == 0: ++ ocf.logger.info("allResourcesStoppedOnNode: no pending resources on node %s and empty transition summary" % node) ++ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = True") ++ return True ++ ocf.logger.info("allResourcesStoppedOnNode: transition summary is not empty") ++ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False") ++ return False ++ ++ ocf.logger.info("allResourcesStoppedOnNode: still pending resources on node %s" % node) ++ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False") ++ return False ++ ++############################################################################## ++ ++AVAILABLE = 0 # Node is online and ready to handle events ++STOPPING = 1 # Standby has been triggered, but some resources are still running ++IN_EVENT = 2 # All resources are stopped, and event has been initiated via Azure Metadata Service ++ON_HOLD = 3 # Node has a pending event that cannot be started there are no other nodes available ++ ++def stringToNodeState(name): ++ if type(name) == int: return name ++ if name == "STOPPING": return STOPPING ++ if name == "IN_EVENT": return IN_EVENT ++ if name == "ON_HOLD": return ON_HOLD ++ return AVAILABLE ++ ++def nodeStateToString(state): ++ if state == STOPPING: return "STOPPING" ++ if state == IN_EVENT: return "IN_EVENT" ++ if state == ON_HOLD: return "ON_HOLD" ++ return "AVAILABLE" ++ ++############################################################################## ++ ++class Node: ++ """ ++ Core class implementing logic for a cluster node ++ """ ++ def __init__(self, ra): ++ self.raOwner = ra ++ self.azInfo = azHelper.getInstanceInfo() ++ self.azName = self.azInfo.name ++ self.hostName = socket.gethostname() ++ self.setAttr("azName", self.azName) ++ clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName) ++ ++ def getAttr(self, key): ++ """ ++ Get a local attribute ++ """ ++ return clusterHelper.getAttr(key, node=self.hostName) ++ ++ def setAttr(self, key, value): ++ """ ++ Set a local attribute ++ """ ++ return clusterHelper.setAttr(key, value, node=self.hostName) ++ ++ def selfOrOtherNode(self, node): ++ """ ++ Helper function to distinguish self/other node ++ """ ++ return node if node else self.hostName ++ ++ def setState(self, state, node=None): ++ """ ++ Set the state for a given node (or self) ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("setState: begin; node = %s, state = %s" % (node, nodeStateToString(state))) ++ ++ clusterHelper.setAttr(attr_curNodeState, nodeStateToString(state), node=node) ++ ++ ocf.logger.debug("setState: finished") ++ ++ def getState(self, node=None): ++ """ ++ Get the state for a given node (or self) ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("getState: begin; node = %s" % node) ++ ++ state = clusterHelper.getAttr(attr_curNodeState, node=node) ++ ocf.logger.debug("getState: state = %s" % state) ++ ocf.logger.debug("getState: finished") ++ if not state: ++ return AVAILABLE ++ return stringToNodeState(state) ++ ++ def setEventIDs(self, eventIDs, node=None): ++ """ ++ Set pending EventIDs for a given node (or self) ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("setEventIDs: begin; node = %s, eventIDs = %s" % (node, str(eventIDs))) ++ ++ if eventIDs: ++ eventIDStr = ",".join(eventIDs) ++ else: ++ eventIDStr = None ++ clusterHelper.setAttr(attr_pendingEventIDs, eventIDStr, node=node) ++ ++ ocf.logger.debug("setEventIDs: finished") ++ return ++ ++ def getEventIDs(self, node=None): ++ """ ++ Get pending EventIDs for a given node (or self) ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("getEventIDs: begin; node = %s" % node) ++ ++ eventIDStr = clusterHelper.getAttr(attr_pendingEventIDs, node=node) ++ if eventIDStr: ++ eventIDs = eventIDStr.decode().split(",") ++ else: ++ eventIDs = None ++ ++ ocf.logger.debug("getEventIDs: finished; eventIDs = %s" % str(eventIDs)) ++ return eventIDs ++ ++ def updateNodeStateAndEvents(self, state, eventIDs, node=None): ++ """ ++ Set the state and pending EventIDs for a given node (or self) ++ """ ++ ocf.logger.debug("updateNodeStateAndEvents: begin; node = %s, state = %s, eventIDs = %s" % (node, nodeStateToString(state), str(eventIDs))) ++ ++ self.setState(state, node=node) ++ self.setEventIDs(eventIDs, node=node) ++ ++ ocf.logger.debug("updateNodeStateAndEvents: finished") ++ return state ++ ++ def putNodeStandby(self, node=None): ++ """ ++ Put self to standby ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("putNodeStandby: begin; node = %s" % node) ++ ++ clusterHelper._exec("crm_attribute", ++ "-t", "nodes", ++ "-N", node, ++ "-n", "standby", ++ "-v", "on", ++ "--lifetime=forever") ++ ++ ocf.logger.debug("putNodeStandby: finished") ++ ++ def putNodeOnline(self, node=None): ++ """ ++ Put self back online ++ """ ++ node = self.selfOrOtherNode(node) ++ ocf.logger.debug("putNodeOnline: begin; node = %s" % node) ++ ++ clusterHelper._exec("crm_attribute", ++ "-t", "nodes", ++ "-N", node, ++ "-n", "standby", ++ "-v", "off", ++ "--lifetime=forever") ++ ++ ocf.logger.debug("putNodeOnline: finished") ++ ++ def separateEvents(self, events): ++ """ ++ Split own/other nodes' events ++ """ ++ ocf.logger.debug("separateEvents: begin; events = %s" % str(events)) ++ ++ localEvents = [] ++ remoteEvents = [] ++ for e in events: ++ e = attrDict(e) ++ if e.EventType not in self.raOwner.relevantEventTypes: ++ continue ++ if self.azName in e.Resources: ++ localEvents.append(e) ++ else: ++ remoteEvents.append(e) ++ ocf.logger.debug("separateEvents: finished; localEvents = %s, remoteEvents = %s" % (str(localEvents), str(remoteEvents))) ++ return (localEvents, remoteEvents) ++ ++ def removeOrphanedEvents(self, azEvents): ++ """ ++ Remove remote events that are already finished ++ """ ++ ocf.logger.debug("removeOrphanedEvents: begin; azEvents = %s" % str(azEvents)) ++ ++ azEventIDs = set() ++ for e in azEvents: ++ azEventIDs.add(e.EventId) ++ # for all nodes except self ... ++ for n in clusterHelper.getAllNodes(): ++ if n == self.hostName: ++ continue ++ curState = self.getState(node=n) ++ # ... that still show in an event or shutting down resources ... ++ if curState in (STOPPING, IN_EVENT): ++ ocf.logger.info("removeOrphanedEvents: node %s has state %s" % (n, curState)) ++ clusterEventIDs = self.getEventIDs(node=n) ++ stillActive = False ++ # ... but don't have any more events running according to Azure, ... ++ for p in clusterEventIDs: ++ if p in azEventIDs: ++ ocf.logger.info("removeOrphanedEvents: (at least) event %s on node %s has not yet finished" % (str(p), n)) ++ stillActive = True ++ break ++ if not stillActive: ++ # ... put them back online. ++ ocf.logger.info("removeOrphanedEvents: clusterEvents %s on node %s are not in azEvents %s -> bring node back online" % (str(clusterEventIDs), n, str(azEventIDs))) ++ self.putNodeOnline(node=n) ++ ++ ocf.logger.debug("removeOrphanedEvents: finished") ++ ++ def handleRemoteEvents(self, azEvents): ++ """ ++ Handle a list of events (as provided by Azure Metadata Service) for other nodes ++ """ ++ ocf.logger.debug("handleRemoteEvents: begin; hostName = %s, events = %s" % (self.hostName, str(azEvents))) ++ ++ if len(azEvents) == 0: ++ ocf.logger.debug("handleRemoteEvents: no remote events to handle") ++ ocf.logger.debug("handleRemoteEvents: finished") ++ return ++ eventIDsForNode = {} ++ ++ # iterate through all current events as per Azure ++ for e in azEvents: ++ ocf.logger.info("handleRemoteEvents: handling remote event %s (%s; nodes = %s)" % (e.EventId, e.EventType, str(e.Resources))) ++ # before we can force an event to start, we need to ensure all nodes involved have stopped their resources ++ if e.EventStatus == "Scheduled": ++ allNodesStopped = True ++ for azName in e.Resources: ++ hostName = clusterHelper.getHostNameFromAzName(azName) ++ state = self.getState(node=hostName) ++ if state == STOPPING: ++ # the only way we can continue is when node state is STOPPING, but all resources have been stopped ++ if not clusterHelper.allResourcesStoppedOnNode(hostName): ++ ocf.logger.info("handleRemoteEvents: (at least) node %s has still resources running -> wait" % hostName) ++ allNodesStopped = False ++ break ++ elif state in (AVAILABLE, IN_EVENT, ON_HOLD): ++ ocf.logger.info("handleRemoteEvents: node %s is still %s -> remote event needs to be picked up locally" % (hostName, nodeStateToString(state))) ++ allNodesStopped = False ++ break ++ if allNodesStopped: ++ ocf.logger.info("handleRemoteEvents: nodes %s are stopped -> add remote event %s to force list" % (str(e.Resources), e.EventId)) ++ for n in e.Resources: ++ hostName = clusterHelper.getHostNameFromAzName(n) ++ if hostName in eventIDsForNode: ++ eventIDsForNode[hostName].append(e.EventId) ++ else: ++ eventIDsForNode[hostName] = [e.EventId] ++ elif e.EventStatus == "Started": ++ ocf.logger.info("handleRemoteEvents: remote event already started") ++ ++ # force the start of all events whose nodes are ready (i.e. have no more resources running) ++ if len(eventIDsForNode.keys()) > 0: ++ eventIDsToForce = set([item for sublist in eventIDsForNode.values() for item in sublist]) ++ ocf.logger.info("handleRemoteEvents: set nodes %s to IN_EVENT; force remote events %s" % (str(eventIDsForNode.keys()), str(eventIDsToForce))) ++ for node, eventId in eventIDsForNode.items(): ++ self.updateNodeStateAndEvents(IN_EVENT, eventId, node=node) ++ azHelper.forceEvents(eventIDsToForce) ++ ++ ocf.logger.debug("handleRemoteEvents: finished") ++ ++ def handleLocalEvents(self, azEvents): ++ """ ++ Handle a list of own events (as provided by Azure Metadata Service) ++ """ ++ ocf.logger.debug("handleLocalEvents: begin; hostName = %s, azEvents = %s" % (self.hostName, str(azEvents))) ++ ++ azEventIDs = set() ++ for e in azEvents: ++ azEventIDs.add(e.EventId) ++ ++ curState = self.getState() ++ clusterEventIDs = self.getEventIDs() ++ mayUpdateDocVersion = False ++ ocf.logger.info("handleLocalEvents: current state = %s; pending local clusterEvents = %s" % (nodeStateToString(curState), str(clusterEventIDs))) ++ ++ # check if there are currently/still events set for the node ++ if clusterEventIDs: ++ # there are pending events set, so our state must be STOPPING or IN_EVENT ++ i = 0; touchedEventIDs = False ++ while i < len(clusterEventIDs): ++ # clean up pending events that are already finished according to AZ ++ if clusterEventIDs[i] not in azEventIDs: ++ ocf.logger.info("handleLocalEvents: remove finished local clusterEvent %s" % (clusterEventIDs[i])) ++ clusterEventIDs.pop(i) ++ touchedEventIDs = True ++ else: ++ i += 1 ++ if len(clusterEventIDs) > 0: ++ # there are still pending events (either because we're still stopping, or because the event is still in place) ++ # either way, we need to wait ++ if touchedEventIDs: ++ ocf.logger.info("handleLocalEvents: added new local clusterEvent %s" % str(clusterEventIDs)) ++ self.setEventIDs(clusterEventIDs) ++ else: ++ ocf.logger.info("handleLocalEvents: no local clusterEvents were updated") ++ else: ++ # there are no more pending events left after cleanup ++ if clusterHelper.noPendingResourcesOnNode(self.hostName): ++ # and no pending resources on the node -> set it back online ++ ocf.logger.info("handleLocalEvents: all local events finished -> clean up, put node online and AVAILABLE") ++ curState = self.updateNodeStateAndEvents(AVAILABLE, None) ++ self.putNodeOnline() ++ clusterHelper.removeHoldFromNodes() ++ # repeat handleLocalEvents() since we changed status to AVAILABLE ++ else: ++ ocf.logger.info("handleLocalEvents: all local events finished, but some resources have not completed startup yet -> wait") ++ else: ++ # there are no pending events set for us (yet) ++ if curState == AVAILABLE: ++ if len(azEventIDs) > 0: ++ if clusterHelper.otherNodesAvailable(self): ++ ocf.logger.info("handleLocalEvents: can handle local events %s -> set state STOPPING" % (str(azEventIDs))) ++ # this will also set mayUpdateDocVersion = True ++ curState = self.updateNodeStateAndEvents(STOPPING, azEventIDs) ++ else: ++ ocf.logger.info("handleLocalEvents: cannot handle azEvents %s (only node available) -> set state ON_HOLD" % str(azEventIDs)) ++ self.setState(ON_HOLD) ++ else: ++ ocf.logger.debug("handleLocalEvents: no local azEvents to handle") ++ if curState == STOPPING: ++ if clusterHelper.noPendingResourcesOnNode(self.hostName): ++ ocf.logger.info("handleLocalEvents: all local resources are started properly -> put node standby") ++ self.putNodeStandby() ++ mayUpdateDocVersion = True ++ else: ++ ocf.logger.info("handleLocalEvents: some local resources are not clean yet -> wait") ++ ++ ocf.logger.debug("handleLocalEvents: finished; mayUpdateDocVersion = %s" % str(mayUpdateDocVersion)) ++ return mayUpdateDocVersion ++ ++############################################################################## ++ ++class raAzEvents: ++ """ ++ Main class for resource agent ++ """ ++ def __init__(self, relevantEventTypes): ++ self.node = Node(self) ++ self.relevantEventTypes = relevantEventTypes ++ ++ def monitor(self): ++ ocf.logger.debug("monitor: begin") ++ ++ pullFailedAttemps = 0 ++ while True: ++ # check if another node is pulling at the same time; ++ # this should only be a concern for the first pull, as setting up Scheduled Events may take up to 2 minutes. ++ if clusterHelper.getAttr(attr_globalPullState) == "PULLING": ++ pullFailedAttemps += 1 ++ if pullFailedAttemps == global_pullMaxAttempts: ++ ocf.logger.warning("monitor: exceeded maximum number of attempts (%d) to pull events" % global_pullMaxAttempts) ++ ocf.logger.debug("monitor: finished") ++ return ocf.OCF_SUCCESS ++ else: ++ ocf.logger.info("monitor: another node is pulling; retry in %d seconds" % global_pullDelaySecs) ++ time.sleep(global_pullDelaySecs) ++ continue ++ ++ # we can pull safely from Azure Metadata Service ++ clusterHelper.setAttr(attr_globalPullState, "PULLING") ++ events = azHelper.pullScheduledEvents() ++ clusterHelper.setAttr(attr_globalPullState, "IDLE") ++ ++ # get current document version ++ curDocVersion = events.DocumentIncarnation ++ lastDocVersion = self.node.getAttr(attr_lastDocVersion) ++ ocf.logger.debug("monitor: lastDocVersion = %s; curDocVersion = %s" % (lastDocVersion, curDocVersion)) ++ ++ # split events local/remote ++ (localEvents, remoteEvents) = self.node.separateEvents(events.Events) ++ ++ # ensure local events are only executing once ++ if curDocVersion != lastDocVersion: ++ ocf.logger.debug("monitor: curDocVersion has not been handled yet") ++ # handleLocalEvents() returns True if mayUpdateDocVersion is True; ++ # this is only the case if we can ensure there are no pending events ++ if self.node.handleLocalEvents(localEvents): ++ ocf.logger.info("monitor: handleLocalEvents completed successfully -> update curDocVersion") ++ self.node.setAttr(attr_lastDocVersion, curDocVersion) ++ else: ++ ocf.logger.debug("monitor: handleLocalEvents still waiting -> keep curDocVersion") ++ else: ++ ocf.logger.info("monitor: already handled curDocVersion, skip") ++ ++ # remove orphaned remote events and then handle the remaining remote events ++ self.node.removeOrphanedEvents(remoteEvents) ++ self.node.handleRemoteEvents(remoteEvents) ++ break ++ ++ ocf.logger.debug("monitor: finished") ++ return ocf.OCF_SUCCESS ++ ++############################################################################## ++ ++def setLoglevel(verbose): ++ # set up writing into syslog ++ loglevel = default_loglevel ++ if verbose: ++ opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=1)) ++ urllib2.install_opener(opener) ++ loglevel = ocf.logging.DEBUG ++ ocf.log.setLevel(loglevel) ++ ++description = ( ++ "Microsoft Azure Scheduled Events monitoring agent", ++ """This resource agent implements a monitor for scheduled ++(maintenance) events for a Microsoft Azure VM. ++ ++If any relevant events are found, it moves all Pacemaker resources ++away from the affected node to allow for a graceful shutdown. ++ ++ Usage: ++ [OCF_RESKEY_eventTypes=VAL] [OCF_RESKEY_verbose=VAL] azure-events ACTION ++ ++ action (required): Supported values: monitor, help, meta-data ++ eventTypes (optional): List of event types to be considered ++ relevant by the resource agent (comma-separated). ++ Supported values: Freeze,Reboot,Redeploy ++ Default = Reboot,Redeploy ++/ verbose (optional): If set to true, displays debug info. ++ Default = false ++ ++ Deployment: ++ crm configure primitive rsc_azure-events ocf:heartbeat:azure-events \ ++ op monitor interval=10s ++ crm configure clone cln_azure-events rsc_azure-events ++ ++For further information on Microsoft Azure Scheduled Events, please ++refer to the following documentation: ++https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events ++""") ++ ++def monitor_action(eventTypes): ++ relevantEventTypes = set(eventTypes.split(",") if eventTypes else []) ++ ra = raAzEvents(relevantEventTypes) ++ return ra.monitor() ++ ++def validate_action(eventTypes): ++ if eventTypes: ++ for event in eventTypes.split(","): ++ if event not in ("Freeze", "Reboot", "Redeploy"): ++ ocf.ocf_exit_reason("Event type not one of Freeze, Reboot, Redeploy: " + eventTypes) ++ return ocf.OCF_ERR_CONFIGURED ++ return ocf.OCF_SUCCESS ++ ++def main(): ++ agent = ocf.Agent("azure-events", shortdesc=description[0], longdesc=description[1]) ++ agent.add_parameter( ++ "eventTypes", ++ shortdesc="List of resources to be considered", ++ longdesc="A comma-separated list of event types that will be handled by this resource agent. (Possible values: Freeze,Reboot,Redeploy)", ++ content_type="string", ++ default="Reboot,Redeploy") ++ agent.add_parameter( ++ "verbose", ++ shortdesc="Enable verbose agent logging", ++ longdesc="Set to true to enable verbose logging", ++ content_type="boolean", ++ default="false") ++ agent.add_action("start", timeout=10, handler=lambda: ocf.OCF_SUCCESS) ++ agent.add_action("stop", timeout=10, handler=lambda: ocf.OCF_SUCCESS) ++ agent.add_action("validate-all", timeout=20, handler=validate_action) ++ agent.add_action("monitor", timeout=240, interval=10, handler=monitor_action) ++ setLoglevel(ocf.is_true(ocf.get_parameter("verbose", "false"))) ++ agent.run() ++ ++if __name__ == '__main__': ++ main() +diff -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am +--- a/heartbeat/Makefile.am 2020-04-16 11:54:08.467619588 +0200 ++++ b/heartbeat/Makefile.am 2020-04-16 12:08:07.788224036 +0200 +@@ -55,7 +55,7 @@ + osp_SCRIPTS = nova-compute-wait \ + NovaEvacuate + +-ocf_SCRIPTS = AoEtarget \ ++ocf_SCRIPTS = AoEtarget \ + AudibleAlarm \ + ClusterMon \ + CTDB \ +@@ -116,10 +116,7 @@ + fio \ + galera \ + garbd \ +- gcp-pd-move \ + gcp-vpc-move-ip \ +- gcp-vpc-move-vip \ +- gcp-vpc-move-route \ + iSCSILogicalUnit \ + iSCSITarget \ + ids \ +@@ -177,6 +174,22 @@ + vsftpd \ + zabbixserver + ++if BUILD_AZURE_EVENTS ++ocf_SCRIPTS += azure-events ++endif ++ ++if BUILD_GCP_PD_MOVE ++ocf_SCRIPTS += gcp-pd-move ++endif ++ ++if BUILD_GCP_VPC_MOVE_ROUTE ++ocf_SCRIPTS += gcp-vpc-move-route ++endif ++ ++if BUILD_GCP_VPC_MOVE_VIP ++ocf_SCRIPTS += gcp-vpc-move-vip ++endif ++ + ocfcommondir = $(OCF_LIB_DIR_PREFIX)/heartbeat + ocfcommon_DATA = ocf-shellfuncs \ + ocf-binaries \ +@@ -205,3 +218,13 @@ + + %.check: % + OCF_ROOT=$(abs_srcdir) OCF_FUNCTIONS_DIR=$(abs_srcdir) ./$< meta-data | xmllint --path $(abs_srcdir) --noout --relaxng $(abs_srcdir)/metadata.rng - ++ ++do_spellcheck = printf '[%s]\n' "$(agent)"; \ ++ OCF_ROOT=$(abs_srcdir) OCF_FUNCTIONS_DIR=$(abs_srcdir) \ ++ ./$(agent) meta-data 2>/dev/null \ ++ | xsltproc $(top_srcdir)/make/extract_text.xsl - \ ++ | aspell pipe list -d en_US --ignore-case \ ++ --home-dir=$(top_srcdir)/make -p spellcheck-ignore \ ++ | sed -n 's|^&\([^:]*\):.*|\1|p'; ++spellcheck: ++ @$(foreach agent,$(ocf_SCRIPTS), $(do_spellcheck)) +diff -uNr a/m4/ac_python_module.m4 b/m4/ac_python_module.m4 +--- a/m4/ac_python_module.m4 1970-01-01 01:00:00.000000000 +0100 ++++ b/m4/ac_python_module.m4 2020-04-14 11:11:26.325806378 +0200 +@@ -0,0 +1,30 @@ ++dnl @synopsis AC_PYTHON_MODULE(modname[, fatal]) ++dnl ++dnl Checks for Python module. ++dnl ++dnl If fatal is non-empty then absence of a module will trigger an ++dnl error. ++dnl ++dnl @category InstalledPackages ++dnl @author Andrew Collier . ++dnl @version 2004-07-14 ++dnl @license AllPermissive ++ ++AC_DEFUN([AC_PYTHON_MODULE],[ ++ AC_MSG_CHECKING(python module: $1) ++ $PYTHON -c "import $1" 2>/dev/null ++ if test $? -eq 0; ++ then ++ AC_MSG_RESULT(yes) ++ eval AS_TR_CPP(HAVE_PYMOD_$1)=yes ++ else ++ AC_MSG_RESULT(no) ++ eval AS_TR_CPP(HAVE_PYMOD_$1)=no ++ # ++ if test -n "$2" ++ then ++ AC_MSG_ERROR(failed to find required module $1) ++ exit 1 ++ fi ++ fi ++]) diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index 70c7b75..1255d56 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -66,7 +66,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.1.1 -Release: 44%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 48%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} @@ -188,6 +188,23 @@ Patch101: bz1744224-IPsrcaddr-4-fix-hardcoded-device.patch Patch102: bz1792196-rabbitmq-cluster-delete-nodename-when-stop-fails.patch Patch103: bz1808468-1-lvmlockd-fix-conditionals.patch Patch104: bz1808468-2-remove-locking_type.patch +Patch105: bz1759115-aws-vpc-route53-1-update.patch +Patch106: bz1804658-azure-lb-1-remove-status-metadata.patch +Patch107: bz1804658-azure-lb-2-add-socat-support.patch +Patch108: bz1810466-aws-vpc-move-ip-1-add-routing_table_role.patch +Patch109: bz1810466-aws-vpc-move-ip-2-update-metadata.patch +Patch110: bz1792237-redis-1-fix-validate-all.patch +Patch111: bz1792237-redis-2-run-validate-during-start.patch +Patch112: bz1817432-use-safe-temp-file-location.patch +Patch113: bz1817598-ocf_is_clone-1-fix-clone-max-can-be-0.patch +Patch114: bz1817598-ocf_is_clone-2-update-comment.patch +Patch115: bz1819021-aws-vpc-move-ip-delete-remaining-route-entries.patch +Patch116: bz1759115-aws-vpc-route53-2-add-public-and-secondary-ip-support.patch +Patch117: bz1633251-gcp-pd-move-1.patch +Patch118: bz1633251-gcp-pd-move-2-use-OCF_FUNCTIONS_DIR.patch +Patch119: bz1633251-gcp-pd-move-3-add-stackdriver_logging-to-metadata.patch +Patch120: bz1819965-1-ocf.py-update.patch +Patch121: bz1819965-2-azure-events.patch # bundle patches Patch1000: 7-gcp-bundled.patch @@ -441,6 +458,23 @@ exit 1 %patch102 -p1 %patch103 -p1 %patch104 -p1 +%patch105 -p1 +%patch106 -p1 +%patch107 -p1 +%patch108 -p1 +%patch109 -p1 +%patch110 -p1 +%patch111 -p1 +%patch112 -p1 +%patch113 -p1 +%patch114 -p1 +%patch115 -p1 +%patch116 -p1 +%patch117 -p1 +%patch118 -p1 +%patch119 -p1 +%patch120 -p1 +%patch121 -p1 chmod 755 heartbeat/nova-compute-wait chmod 755 heartbeat/NovaEvacuate @@ -827,7 +861,6 @@ rm -rf %{buildroot} %exclude /usr/lib/ocf/resource.d/heartbeat/Xen %exclude /usr/lib/ocf/resource.d/heartbeat/anything %exclude /usr/lib/ocf/resource.d/heartbeat/asterisk -%exclude /usr/lib/ocf/resource.d/heartbeat/aws-vpc-route53 %exclude /usr/lib/ocf/resource.d/heartbeat/dnsupdate %exclude /usr/lib/ocf/resource.d/heartbeat/eDir88 %exclude /usr/lib/ocf/resource.d/heartbeat/fio @@ -890,7 +923,6 @@ rm -rf %{buildroot} %exclude %{_mandir}/man7/ocf_heartbeat_Xen.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_anything.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_asterisk.7.gz -%exclude %{_mandir}/man7/ocf_heartbeat_aws-vpc-route53.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_dnsupdate.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_eDir88.7.gz %exclude %{_mandir}/man7/ocf_heartbeat_fio.7.gz @@ -985,6 +1017,37 @@ ccs_update_schema > /dev/null 2>&1 ||: %endif %changelog +* Thu Apr 16 2020 Oyvind Albrigtsen - 4.1.1-48 +- gcp-pd-move: new resource agent for Google Cloud +- azure-events: new resource agent for Azure + + Resolves: rhbz#1633251 + Resolves: rhbz#1819965 + +* Mon Apr 6 2020 Oyvind Albrigtsen - 4.1.1-47 +- Add aws-vpc-route53 agent +- aws-vpc-move-ip: delete remaining route entries + + Resolves: rhbz#1759115 + Resolves: rhbz#1819021 + +* Fri Mar 27 2020 Oyvind Albrigtsen - 4.1.1-46 +- use safe temp file location +- ocf-shellfuncs: ocf_is_clone(): fix to return true when clone-max + is set to 0 + + Resolves: rhbz#1817432 + Resolves: rhbz#1817598 + +* Wed Mar 18 2020 Oyvind Albrigtsen - 4.1.1-45 +- azure-lb: support using socat instead of nc +- aws-vpc-move-ip: add "routing_table_role" parameter +- redis: fix validate-all action and run it during start + + Resolves: rhbz#1804658 + Resolves: rhbz#1810466 + Resolves: rhbz#1792237 + * Fri Mar 6 2020 Oyvind Albrigtsen - 4.1.1-44 - lvmlockd: automatically remove locking_type from lvm.conf for LVM v2.03+