diff --git a/SOURCES/bz1886262-podman-recover-from-killed-conmon.patch b/SOURCES/bz1886262-podman-recover-from-killed-conmon.patch new file mode 100644 index 0000000..3fa5934 --- /dev/null +++ b/SOURCES/bz1886262-podman-recover-from-killed-conmon.patch @@ -0,0 +1,63 @@ +From 3aa0dda4e0c2a3b801d65aeacc4fdfd713a604f2 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Tue, 27 Oct 2020 18:01:36 +0100 +Subject: [PATCH] podman: recover from killed conmon side process + +When podman containers are created by the resource-agent, the podman +runtime spawns a side process (conmon) to monitor the container and +record the exit status. + +If the conmon process dies unexpectedly (e.g. kill -9), the podman +container can still be stopped, even if the cli returns a generic +error. + +Try to distinguish this specific failure condition and make the stop +operation resilient; when it happens, just log a warning and finish +the usual stop actions. +--- + heartbeat/podman | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/podman b/heartbeat/podman +index 81b00ee6f..9f8c2a091 100755 +--- a/heartbeat/podman ++++ b/heartbeat/podman +@@ -419,6 +419,7 @@ podman_start() + podman_stop() + { + local timeout=60 ++ local rc + podman_simple_status + if [ $? -eq $OCF_NOT_RUNNING ]; then + remove_container +@@ -434,16 +435,27 @@ podman_stop() + + if ocf_is_true "$OCF_RESKEY_force_kill"; then + ocf_run podman kill $CONTAINER ++ rc=$? + else + ocf_log debug "waiting $timeout second[s] before killing container" + ocf_run podman stop -t=$timeout $CONTAINER ++ rc=$? + # on stop, systemd will automatically delete any transient + # drop-in conf that has been created earlier + fi + +- if [ $? -ne 0 ]; then +- ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." +- return $OCF_ERR_GENERIC ++ if [ $rc -ne 0 ]; then ++ # If the stop failed, it could be because the controlling conmon ++ # process died unexpectedly. If so, a generic error code is returned ++ # but the associated container exit code is -1. If that's the case, ++ # assume there's no failure and continue with the rm as usual. ++ if [ $rc -eq 125 ] && \ ++ podman inspect --format '{{.State.Status}}:{{.State.ExitCode}}' $CONTAINER | grep -wq "stopped:-1"; then ++ ocf_log warn "Container ${CONTAINER} had an unexpected stop outcome. Trying to remove it anyway." ++ else ++ ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}." ++ return $OCF_ERR_GENERIC ++ fi + fi + + remove_container diff --git a/SOURCES/bz1897570-aws-add-imdsv2-support.patch b/SOURCES/bz1897570-aws-add-imdsv2-support.patch new file mode 100644 index 0000000..09772cc --- /dev/null +++ b/SOURCES/bz1897570-aws-add-imdsv2-support.patch @@ -0,0 +1,97 @@ +From 8f10d0eb1e33d38ab6e89015a903620c54edd7c1 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 13 Nov 2020 16:36:20 +0100 +Subject: [PATCH] AWS agents: add support for IMDSv2 + +--- + heartbeat/aws-vpc-move-ip | 5 +++-- + heartbeat/aws-vpc-route53.in | 3 ++- + heartbeat/awseip | 9 +++++---- + heartbeat/awsvip | 7 ++++--- + 4 files changed, 14 insertions(+), 10 deletions(-) + +diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip +index 72a89ecb1..cbb629b00 100755 +--- a/heartbeat/aws-vpc-move-ip ++++ b/heartbeat/aws-vpc-move-ip +@@ -215,7 +215,8 @@ ec2ip_validate() { + return $OCF_ERR_CONFIGURED + fi + +- EC2_INSTANCE_ID="$(curl -s http://169.254.169.254/latest/meta-data/instance-id)" ++ TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") ++ EC2_INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token: $TOKEN") + + if [ -z "${EC2_INSTANCE_ID}" ]; then + ocf_exit_reason "Instance ID not found. Is this a EC2 instance?" +@@ -329,7 +330,7 @@ ec2ip_get_instance_eni() { + fi + ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}" + +- cmd="curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id" ++ cmd="curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id -H \"X-aws-ec2-metadata-token: $TOKEN\"" + ocf_log debug "executing command: $cmd" + EC2_NETWORK_INTERFACE_ID="$(eval $cmd)" + rc=$? +diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in +index b06b93726..4fb17019b 100644 +--- a/heartbeat/aws-vpc-route53.in ++++ b/heartbeat/aws-vpc-route53.in +@@ -347,7 +347,8 @@ r53_monitor() { + _get_ip() { + case $OCF_RESKEY_ip in + local|public) +- IPADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4)";; ++ TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") ++ IPADDRESS=$(curl -s http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4 -H "X-aws-ec2-metadata-token: $TOKEN");; + *.*.*.*) + IPADDRESS="${OCF_RESKEY_ip}";; + esac +diff --git a/heartbeat/awseip b/heartbeat/awseip +index 445a03666..de1967774 100755 +--- a/heartbeat/awseip ++++ b/heartbeat/awseip +@@ -149,12 +149,12 @@ awseip_start() { + awseip_monitor && return $OCF_SUCCESS + + if [ -n "${PRIVATE_IP_ADDRESS}" ]; then +- NETWORK_INTERFACES_MACS="$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/)" ++ NETWORK_INTERFACES_MACS=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/ -H "X-aws-ec2-metadata-token: $TOKEN") + for MAC in ${NETWORK_INTERFACES_MACS}; do +- curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC}/local-ipv4s | ++ curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC}/local-ipv4s -H "X-aws-ec2-metadata-token: $TOKEN" | + grep -q "^${PRIVATE_IP_ADDRESS}$" + if [ $? -eq 0 ]; then +- NETWORK_ID="$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC}/interface-id)" ++ NETWORK_ID=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC}/interface-id -H "X-aws-ec2-metadata-token: $TOKEN") + fi + done + $AWSCLI --profile $OCF_RESKEY_profile ec2 associate-address \ +@@ -244,7 +244,8 @@ AWSCLI="${OCF_RESKEY_awscli}" + ELASTIC_IP="${OCF_RESKEY_elastic_ip}" + ALLOCATION_ID="${OCF_RESKEY_allocation_id}" + PRIVATE_IP_ADDRESS="${OCF_RESKEY_private_ip_address}" +-INSTANCE_ID="$(curl -s http://169.254.169.254/latest/meta-data/instance-id)" ++TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") ++INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token: $TOKEN") + + case $__OCF_ACTION in + start) +diff --git a/heartbeat/awsvip b/heartbeat/awsvip +index 3eb31e6ae..8050107e8 100755 +--- a/heartbeat/awsvip ++++ b/heartbeat/awsvip +@@ -206,9 +206,10 @@ esac + + AWSCLI="${OCF_RESKEY_awscli}" + SECONDARY_PRIVATE_IP="${OCF_RESKEY_secondary_private_ip}" +-INSTANCE_ID="$(curl -s http://169.254.169.254/latest/meta-data/instance-id)" +-MAC_ADDRESS="$(curl -s http://169.254.169.254/latest/meta-data/mac)" +-NETWORK_ID="$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDRESS}/interface-id)" ++TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600") ++INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token: $TOKEN") ++MAC_ADDRESS=$(curl -s http://169.254.169.254/latest/meta-data/mac -H "X-aws-ec2-metadata-token: $TOKEN") ++NETWORK_ID=$(curl -s http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDRESS}/interface-id -H "X-aws-ec2-metadata-token: $TOKEN") + + case $__OCF_ACTION in + start) diff --git a/SOURCES/bz1898690-crypt-make-key_file-crypt_type_not-unique.patch b/SOURCES/bz1898690-crypt-make-key_file-crypt_type_not-unique.patch new file mode 100644 index 0000000..8cecc16 --- /dev/null +++ b/SOURCES/bz1898690-crypt-make-key_file-crypt_type_not-unique.patch @@ -0,0 +1,31 @@ +From 16236f76d086187f6ae6202153519c1eb2fe4f87 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 24 Nov 2020 10:49:14 +0100 +Subject: [PATCH] crypt: make key_file and crypt_type parameters not unique + +--- + heartbeat/crypt | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/crypt b/heartbeat/crypt +index 0e49b6c2d..7d0a5607c 100755 +--- a/heartbeat/crypt ++++ b/heartbeat/crypt +@@ -86,7 +86,7 @@ The resulting block device path is /dev/mapper/name. + + + +- ++ + + Key file path containing the encryption passphrase + (aka key; see cryptsetup(8)). For LUKS, the passphrase as of the key_file +@@ -96,7 +96,7 @@ parameter is used to decrypt a randomly selected key when the device was created + + + +- ++ + + Encryption (device) type (e.g. "luks" or "luks2"). + diff --git a/SOURCES/bz1899551-NovaEvacuate-fix-delay_evacuate-unset.patch b/SOURCES/bz1899551-NovaEvacuate-fix-delay_evacuate-unset.patch new file mode 100644 index 0000000..7af35cc --- /dev/null +++ b/SOURCES/bz1899551-NovaEvacuate-fix-delay_evacuate-unset.patch @@ -0,0 +1,33 @@ +From 11ac2db8f55aa3e6858d6c1b2ab29ee36b612f03 Mon Sep 17 00:00:00 2001 +From: Michele Baldessari +Date: Tue, 17 Nov 2020 15:16:29 +0100 +Subject: [PATCH] Fix delay_evacuate being unset + +In Ie2fe784202d754eda38092479b1ab3ff4d02136a we added an additional +parameter to allow for setting a delay on the evacuation. +While it was tested with a specific delay, the case with a delay +being unset was missed. +Since OCF does not set the defaults from the metadata specification +for a parameter, we need to manually set it ourselves. + +This fixes the following error: +Nov 17 13:00:21 database-1.foo.local pacemaker-execd [185805] (log_op_output) notice: nova-evacuate_monitor_10000[1038417] error output [ /usr/lib/ocf/resource.d/openstack/NovaEvacuate: line 228: [: !=: unary operator expected ] + +Change-Id: I0b7aacd67b77bc44c67fe7da4c494807abbbb4f3 +--- + +diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate +index 596f520..8aa778c 100644 +--- a/heartbeat/NovaEvacuate ++++ b/heartbeat/NovaEvacuate +@@ -359,6 +359,10 @@ + fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}" + fi + ++ if [ -z "${OCF_RESKEY_evacuate_delay}" ]; then ++ OCF_RESKEY_evacuate_delay=0 ++ fi ++ + if [ $rc != $OCF_SUCCESS ]; then + exit $rc + fi diff --git a/SOURCES/bz1900015-podman-recover-from-storage-out-of-sync.patch b/SOURCES/bz1900015-podman-recover-from-storage-out-of-sync.patch new file mode 100644 index 0000000..e022612 --- /dev/null +++ b/SOURCES/bz1900015-podman-recover-from-storage-out-of-sync.patch @@ -0,0 +1,64 @@ +From 52d09b57a499ed7b3757e0e2954c2783198d5b23 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Mon, 9 Nov 2020 20:42:19 +0100 +Subject: [PATCH] podman: recover from podman's storage being out of sync + +If a system crash while podman is stopping a container (e.g. a fencing action +took place), it might happen that on reboot, podman is not able to recreate +a container as requested by the resource agent. + +When such a start operation fails, it might be because the internal storage +layer still references an old container with the same name, even though podman +itself thinks there is no such container. If so, purge the storage layer to try +to clean the corruption and try recreating the container. +--- + heartbeat/podman | 29 +++++++++++++++++++++++++++-- + 1 file changed, 27 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/podman b/heartbeat/podman +index 81b00ee6f..d4d608ca3 100755 +--- a/heartbeat/podman ++++ b/heartbeat/podman +@@ -345,6 +345,32 @@ create_transient_drop_in_dependency() + } + + ++run_new_container() ++{ ++ local opts=$1 ++ local image=$2 ++ local cmd=$3 ++ local rc ++ ++ ocf_log info "running container $CONTAINER for the first time" ++ ocf_run podman run $opts $image $cmd ++ rc=$? ++ if [ $rc -eq 125 ]; then ++ # If an internal podman error occurred, it might be because ++ # the internal storage layer still references an old container ++ # with the same name, even though podman itself thinks there ++ # is no such container. If so, purge the storage layer to try ++ # to clean the corruption and try again. ++ ocf_log warn "Internal podman error while creating new container $CONTAINER. Retrying." ++ ocf_run podman rm --storage $CONTAINER ++ ocf_run podman run $opts $image $cmd ++ rc=$? ++ fi ++ ++ return $rc ++} ++ ++ + podman_start() + { + local cid +@@ -378,8 +404,7 @@ podman_start() + # make sure any previous container matching our container name is cleaned up first. + # we already know at this point it wouldn't be running + remove_container +- ocf_log info "running container $CONTAINER for the first time" +- ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd ++ run_new_container "$run_opts" $OCF_RESKEY_image "$OCF_RESKEY_run_cmd" + fi + rc=$? + diff --git a/SOURCES/bz1901357-crypt-support-symlink-devices.patch b/SOURCES/bz1901357-crypt-support-symlink-devices.patch new file mode 100644 index 0000000..6b4f385 --- /dev/null +++ b/SOURCES/bz1901357-crypt-support-symlink-devices.patch @@ -0,0 +1,23 @@ +From 4ded33d34505af19ddf19bfa125b5e6c243ebd94 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 26 Nov 2020 12:56:03 +0100 +Subject: [PATCH] crypt: allow encrypted_dev to be symlink to support using + devices in /dev/disk/... or UUID + +--- + heartbeat/crypt | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/crypt b/heartbeat/crypt +index 7d0a5607c..3ca28b92d 100755 +--- a/heartbeat/crypt ++++ b/heartbeat/crypt +@@ -177,7 +177,7 @@ crypt_validate_all() { + esac + esac + fi +- if [ ! -b "$encrypted_dev" ]; then ++ if [ ! -b "$encrypted_dev" ] && [ ! -L "$encrypted_dev" ]; then + ocf_exit_reason "Encrypted device $encrypted_dev not accessible" + return $OCF_ERR_ARGS + fi diff --git a/SOURCES/bz1902208-LVM-activate-stop-before-storage-service.patch b/SOURCES/bz1902208-LVM-activate-stop-before-storage-service.patch new file mode 100644 index 0000000..1486b29 --- /dev/null +++ b/SOURCES/bz1902208-LVM-activate-stop-before-storage-service.patch @@ -0,0 +1,60 @@ +From 79fb4b2d3d862f4e83b1df72107b6322b420ea34 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 28 Nov 2020 18:10:03 -0800 +Subject: [PATCH] LVM-activate: Stop before blk-availability.service + +If storage services (e.g., iscsi-shutdown.service) stop before an +LVM-activate resource stops, the managed VG may become unavailable. Then +the LVM-activate resource may fail to deactivate the volume group and +thus fail its stop operation. + +This commit adds a systemd drop-in "After=blk-availability.service" +directive for resource-agents-deps.target during the LVM-activate start +op. blk-availability includes "After=" directives for other storage +services and thus serves as a convenient wrapper. + +blk-availability is not enabled by default, and a "Wants=" drop-in +that's created after Pacemaker starts would not be able to start +blk-availability automatically. So here we also start blk-availability +during LVM_start(). + +Resolves RHBZ#1902208 + +Signed-off-by: Reid Wahl +--- + heartbeat/LVM-activate | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate +index 94f9e5813..b8abd7579 100755 +--- a/heartbeat/LVM-activate ++++ b/heartbeat/LVM-activate +@@ -830,6 +830,28 @@ lvm_start() { + local rc + local vol + ++ if systemd_is_running ; then ++ # Create drop-in to deactivate VG before stopping ++ # storage services during shutdown/reboot. ++ after=$(systemctl show resource-agents-deps.target.d \ ++ --property=After | cut -d'=' -f2) ++ ++ case "$after" in ++ *" blk-availability.service "*) ++ ;; ++ *) ++ systemd_drop_in "99-LVM-activate" "After" \ ++ "blk-availability.service" ++ ;; ++ esac ++ ++ # If blk-availability isn't started, the "After=" ++ # directive has no effect. ++ if ! systemctl is-active blk-availability.service ; then ++ systemctl start blk-availability.service ++ fi ++ fi ++ + if lvm_status ; then + ocf_log info "${vol}: is already active." + return $OCF_SUCCESS diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index 0e656e8..1ca9a61 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -70,7 +70,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.1.1 -Release: 74%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 79%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} @@ -253,6 +253,13 @@ Patch161: bz1471182-crypt-1-new-ra.patch Patch162: bz1471182-crypt-2-fix-bashism.patch Patch163: bz1471182-crypt-3-fix-missing-and.patch Patch164: bz1895811-aws-vpc-move-ip-dont-warn-for-expected-scenarios.patch +Patch165: bz1897570-aws-add-imdsv2-support.patch +Patch166: bz1886262-podman-recover-from-killed-conmon.patch +Patch167: bz1900015-podman-recover-from-storage-out-of-sync.patch +Patch168: bz1898690-crypt-make-key_file-crypt_type_not-unique.patch +Patch169: bz1899551-NovaEvacuate-fix-delay_evacuate-unset.patch +Patch170: bz1901357-crypt-support-symlink-devices.patch +Patch171: bz1902208-LVM-activate-stop-before-storage-service.patch # bundle patches Patch1000: 7-gcp-bundled.patch @@ -573,6 +580,13 @@ exit 1 %patch162 -p1 %patch163 -p1 %patch164 -p1 +%patch165 -p1 +%patch166 -p1 +%patch167 -p1 +%patch168 -p1 +%patch169 -p1 -F2 +%patch170 -p1 +%patch171 -p1 chmod 755 heartbeat/nova-compute-wait chmod 755 heartbeat/NovaEvacuate @@ -1136,6 +1150,36 @@ ccs_update_schema > /dev/null 2>&1 ||: %endif %changelog +* Mon Nov 30 2020 Oyvind Albrigtsen - 4.1.1-79 +- LVM-activate: add drop-in during start-action to avoid getting + fenced during reboot + + Resolves: rhbz#1902208 + +* Thu Nov 26 2020 Oyvind Albrigtsen - 4.1.1-78 +- crypt: support symlink devices + + Resolves: rhbz#1901357 + +* Wed Nov 25 2020 Oyvind Albrigtsen - 4.1.1-77 +- NovaEvacuate: set delay_evacuate to 0 when it's not set + + Resolves: rhbz#1899551 + +* Tue Nov 24 2020 Oyvind Albrigtsen - 4.1.1-76 +- podman: recover from killed conmon process +- podman: recover from podman's storage being out of sync +- crypt: make key_file and crypt_type parameters not unique + + Resolves: rhbz#1886262 + Resolves: rhbz#1900015 + Resolves: rhbz#1898690 + +* Fri Nov 13 2020 Oyvind Albrigtsen - 4.1.1-75 +- AWS agents: add support for IMDSv2 + + Resolves: rhbz#1897570 + * Wed Nov 11 2020 Oyvind Albrigtsen - 4.1.1-74 - aws-vpc-move-ip: don't warn for expected scenarios