From a606c335246babf71c94952118bcbfcc730eeaf1 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Jun 19 2021 04:26:02 +0000 Subject: import resource-agents-4.1.1-97.el8 --- diff --git a/SOURCES/bz1972743-podman-fix-container-creation-race.patch b/SOURCES/bz1972743-podman-fix-container-creation-race.patch new file mode 100644 index 0000000..561e0a2 --- /dev/null +++ b/SOURCES/bz1972743-podman-fix-container-creation-race.patch @@ -0,0 +1,74 @@ +From 7850aea1600389beb16c7aad40bba1b76ae694c4 Mon Sep 17 00:00:00 2001 +From: Damien Ciabrini +Date: Tue, 15 Jun 2021 20:03:20 +0200 +Subject: [PATCH] podman: workaround race during container creation + +podman and OCI runtime have a race that sometimes causes +a container to fail to be created and run [1] if the +cgroup to be used is not available yet. When that happens, +try to recreate it until it succeeds or the start +timeout is reached. + +[1] https://bugzilla.redhat.com/show_bug.cgi?id=1972209 +--- + heartbeat/podman | 32 ++++++++++++++++++++++++++++++-- + 1 file changed, 30 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/podman b/heartbeat/podman +index 5b707f3f5..034dfff76 100755 +--- a/heartbeat/podman ++++ b/heartbeat/podman +@@ -358,8 +358,18 @@ run_new_container() + local rc + + ocf_log info "running container $CONTAINER for the first time" +- ocf_run podman run $opts $image $cmd ++ out=$(podman run $opts $image $cmd 2>&1) + rc=$? ++ ++ if [ -n "$out" ]; then ++ out="$(echo "$out" | tr -s ' \t\r\n' ' ')" ++ if [ $rc -eq 0 ]; then ++ ocf_log info "$out" ++ else ++ ocf_log err "$out" ++ fi ++ fi ++ + if [ $rc -eq 125 ]; then + # If an internal podman error occurred, it might be because + # the internal storage layer still references an old container +@@ -370,6 +380,24 @@ run_new_container() + ocf_run podman rm --storage $CONTAINER + ocf_run podman run $opts $image $cmd + rc=$? ++ elif [ $rc -eq 127 ]; then ++ # rhbz#1972209: podman 3.0.x seems to be hit by a race ++ # where the cgroup is not yet set up properly when the OCI ++ # runtime configures the container. If that happens, recreate ++ # the container as long as we get the same error code or ++ # until start timeout preempts us. ++ while [ $rc -eq 127 ] && (echo "$out" | grep -q "cgroup.*scope not found") ; do ++ ocf_log warn "Internal podman error while assigning cgroup. Retrying." ++ # Arbitrary sleep to prevent consuming all CPU while looping ++ sleep 1 ++ podman rm -f "$CONTAINER" ++ out=$(podman run $opts $image $cmd 2>&1) ++ rc=$? ++ done ++ # Log the created container ID if it succeeded ++ if [ $rc -eq 0 ]; then ++ ocf_log info "$out" ++ fi + fi + + return $rc +@@ -422,7 +450,7 @@ podman_start() + fi + + if [ $rc -ne 0 ]; then +- ocf_exit_reason "podman failed to launch container" ++ ocf_exit_reason "podman failed to launch container (rc: $rc)" + return $OCF_ERR_GENERIC + fi + diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index 6787b4f..66f2005 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -70,7 +70,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.1.1 -Release: 96%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 97%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} @@ -281,6 +281,7 @@ Patch189: bz1872754-pgsqlms-new-ra.patch Patch190: bz1957765-gcp-vpc-move-vip-retry.patch Patch191: bz1969968-lvmlockd-remove-with_cmirrord.patch Patch192: bz1972035-LVM-activate-fix-drop-in.patch +Patch193: bz1972743-podman-fix-container-creation-race.patch # bundle patches Patch1000: 7-gcp-bundled.patch @@ -646,6 +647,7 @@ exit 1 %patch190 -p1 %patch191 -p1 %patch192 -p1 +%patch193 -p1 chmod 755 heartbeat/nova-compute-wait chmod 755 heartbeat/NovaEvacuate @@ -1227,6 +1229,11 @@ ccs_update_schema > /dev/null 2>&1 ||: %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm %changelog +* Thu Jun 17 2021 Oyvind Albrigtsen - 4.1.1-97 +- podman: fix possible race during container creation + + Resolves: rhbz#1972743 + * Tue Jun 15 2021 Oyvind Albrigtsen - 4.1.1-96 - LVM-activate: fix drop-in check to avoid re-creating drop-in