Blame SOURCES/bz1900015-podman-recover-from-storage-out-of-sync.patch

bcdf71
From 52d09b57a499ed7b3757e0e2954c2783198d5b23 Mon Sep 17 00:00:00 2001
bcdf71
From: Damien Ciabrini <damien.ciabrini@gmail.com>
bcdf71
Date: Mon, 9 Nov 2020 20:42:19 +0100
bcdf71
Subject: [PATCH] podman: recover from podman's storage being out of sync
bcdf71
bcdf71
If a system crash while podman is stopping a container (e.g. a fencing action
bcdf71
took place), it might happen that on reboot, podman is not able to recreate
bcdf71
a container as requested by the resource agent.
bcdf71
bcdf71
When such a start operation fails, it might be because the internal storage
bcdf71
layer still references an old container with the same name, even though podman
bcdf71
itself thinks there is no such container. If so, purge the storage layer to try
bcdf71
to clean the corruption and try recreating the container.
bcdf71
---
bcdf71
 heartbeat/podman | 29 +++++++++++++++++++++++++++--
bcdf71
 1 file changed, 27 insertions(+), 2 deletions(-)
bcdf71
bcdf71
diff --git a/heartbeat/podman b/heartbeat/podman
bcdf71
index 81b00ee6f..d4d608ca3 100755
bcdf71
--- a/heartbeat/podman
bcdf71
+++ b/heartbeat/podman
bcdf71
@@ -345,6 +345,32 @@ create_transient_drop_in_dependency()
bcdf71
 }
bcdf71
 
bcdf71
 
bcdf71
+run_new_container()
bcdf71
+{
bcdf71
+	local opts=$1
bcdf71
+	local image=$2
bcdf71
+	local cmd=$3
bcdf71
+	local rc
bcdf71
+	
bcdf71
+	ocf_log info "running container $CONTAINER for the first time"
bcdf71
+	ocf_run podman run $opts $image $cmd
bcdf71
+	rc=$?
bcdf71
+	if [ $rc -eq 125 ]; then
bcdf71
+		# If an internal podman error occurred, it might be because
bcdf71
+		# the internal storage layer still references an old container
bcdf71
+		# with the same name, even though podman itself thinks there
bcdf71
+		# is no such container. If so, purge the storage layer to try
bcdf71
+		# to clean the corruption and try again.
bcdf71
+		ocf_log warn "Internal podman error while creating new container $CONTAINER. Retrying."
bcdf71
+		ocf_run podman rm --storage $CONTAINER
bcdf71
+		ocf_run podman run $opts $image $cmd
bcdf71
+		rc=$?
bcdf71
+	fi
bcdf71
+	
bcdf71
+	return $rc
bcdf71
+}
bcdf71
+
bcdf71
+
bcdf71
 podman_start()
bcdf71
 {
bcdf71
 	local cid
bcdf71
@@ -378,8 +404,7 @@ podman_start()
bcdf71
 		# make sure any previous container matching our container name is cleaned up first.
bcdf71
 		# we already know at this point it wouldn't be running
bcdf71
 		remove_container
bcdf71
-		ocf_log info "running container $CONTAINER for the first time"
bcdf71
-		ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
bcdf71
+		run_new_container "$run_opts" $OCF_RESKEY_image "$OCF_RESKEY_run_cmd"
bcdf71
 	fi
bcdf71
 	rc=$?
bcdf71