From 52d09b57a499ed7b3757e0e2954c2783198d5b23 Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <damien.ciabrini@gmail.com>
Date: Mon, 9 Nov 2020 20:42:19 +0100
Subject: [PATCH] podman: recover from podman's storage being out of sync
If a system crash while podman is stopping a container (e.g. a fencing action
took place), it might happen that on reboot, podman is not able to recreate
a container as requested by the resource agent.
When such a start operation fails, it might be because the internal storage
layer still references an old container with the same name, even though podman
itself thinks there is no such container. If so, purge the storage layer to try
to clean the corruption and try recreating the container.
---
heartbeat/podman | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index 81b00ee6f..d4d608ca3 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -345,6 +345,32 @@ create_transient_drop_in_dependency()
}
+run_new_container()
+{
+ local opts=$1
+ local image=$2
+ local cmd=$3
+ local rc
+
+ ocf_log info "running container $CONTAINER for the first time"
+ ocf_run podman run $opts $image $cmd
+ rc=$?
+ if [ $rc -eq 125 ]; then
+ # If an internal podman error occurred, it might be because
+ # the internal storage layer still references an old container
+ # with the same name, even though podman itself thinks there
+ # is no such container. If so, purge the storage layer to try
+ # to clean the corruption and try again.
+ ocf_log warn "Internal podman error while creating new container $CONTAINER. Retrying."
+ ocf_run podman rm --storage $CONTAINER
+ ocf_run podman run $opts $image $cmd
+ rc=$?
+ fi
+
+ return $rc
+}
+
+
podman_start()
{
local cid
@@ -378,8 +404,7 @@ podman_start()
# make sure any previous container matching our container name is cleaned up first.
# we already know at this point it wouldn't be running
remove_container
- ocf_log info "running container $CONTAINER for the first time"
- ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
+ run_new_container "$run_opts" $OCF_RESKEY_image "$OCF_RESKEY_run_cmd"
fi
rc=$?