Blame SOURCES/bz1900015-podman-recover-from-storage-out-of-sync.patch

184fb6
From 52d09b57a499ed7b3757e0e2954c2783198d5b23 Mon Sep 17 00:00:00 2001
184fb6
From: Damien Ciabrini <damien.ciabrini@gmail.com>
184fb6
Date: Mon, 9 Nov 2020 20:42:19 +0100
184fb6
Subject: [PATCH] podman: recover from podman's storage being out of sync
184fb6
184fb6
If a system crash while podman is stopping a container (e.g. a fencing action
184fb6
took place), it might happen that on reboot, podman is not able to recreate
184fb6
a container as requested by the resource agent.
184fb6
184fb6
When such a start operation fails, it might be because the internal storage
184fb6
layer still references an old container with the same name, even though podman
184fb6
itself thinks there is no such container. If so, purge the storage layer to try
184fb6
to clean the corruption and try recreating the container.
184fb6
---
184fb6
 heartbeat/podman | 29 +++++++++++++++++++++++++++--
184fb6
 1 file changed, 27 insertions(+), 2 deletions(-)
184fb6
184fb6
diff --git a/heartbeat/podman b/heartbeat/podman
184fb6
index 81b00ee6f..d4d608ca3 100755
184fb6
--- a/heartbeat/podman
184fb6
+++ b/heartbeat/podman
184fb6
@@ -345,6 +345,32 @@ create_transient_drop_in_dependency()
184fb6
 }
184fb6
 
184fb6
 
184fb6
+run_new_container()
184fb6
+{
184fb6
+	local opts=$1
184fb6
+	local image=$2
184fb6
+	local cmd=$3
184fb6
+	local rc
184fb6
+	
184fb6
+	ocf_log info "running container $CONTAINER for the first time"
184fb6
+	ocf_run podman run $opts $image $cmd
184fb6
+	rc=$?
184fb6
+	if [ $rc -eq 125 ]; then
184fb6
+		# If an internal podman error occurred, it might be because
184fb6
+		# the internal storage layer still references an old container
184fb6
+		# with the same name, even though podman itself thinks there
184fb6
+		# is no such container. If so, purge the storage layer to try
184fb6
+		# to clean the corruption and try again.
184fb6
+		ocf_log warn "Internal podman error while creating new container $CONTAINER. Retrying."
184fb6
+		ocf_run podman rm --storage $CONTAINER
184fb6
+		ocf_run podman run $opts $image $cmd
184fb6
+		rc=$?
184fb6
+	fi
184fb6
+	
184fb6
+	return $rc
184fb6
+}
184fb6
+
184fb6
+
184fb6
 podman_start()
184fb6
 {
184fb6
 	local cid
184fb6
@@ -378,8 +404,7 @@ podman_start()
184fb6
 		# make sure any previous container matching our container name is cleaned up first.
184fb6
 		# we already know at this point it wouldn't be running
184fb6
 		remove_container
184fb6
-		ocf_log info "running container $CONTAINER for the first time"
184fb6
-		ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
184fb6
+		run_new_container "$run_opts" $OCF_RESKEY_image "$OCF_RESKEY_run_cmd"
184fb6
 	fi
184fb6
 	rc=$?
184fb6