|
|
bcdf71 |
From 52d09b57a499ed7b3757e0e2954c2783198d5b23 Mon Sep 17 00:00:00 2001
|
|
|
bcdf71 |
From: Damien Ciabrini <damien.ciabrini@gmail.com>
|
|
|
bcdf71 |
Date: Mon, 9 Nov 2020 20:42:19 +0100
|
|
|
bcdf71 |
Subject: [PATCH] podman: recover from podman's storage being out of sync
|
|
|
bcdf71 |
|
|
|
bcdf71 |
If a system crash while podman is stopping a container (e.g. a fencing action
|
|
|
bcdf71 |
took place), it might happen that on reboot, podman is not able to recreate
|
|
|
bcdf71 |
a container as requested by the resource agent.
|
|
|
bcdf71 |
|
|
|
bcdf71 |
When such a start operation fails, it might be because the internal storage
|
|
|
bcdf71 |
layer still references an old container with the same name, even though podman
|
|
|
bcdf71 |
itself thinks there is no such container. If so, purge the storage layer to try
|
|
|
bcdf71 |
to clean the corruption and try recreating the container.
|
|
|
bcdf71 |
---
|
|
|
bcdf71 |
heartbeat/podman | 29 +++++++++++++++++++++++++++--
|
|
|
bcdf71 |
1 file changed, 27 insertions(+), 2 deletions(-)
|
|
|
bcdf71 |
|
|
|
bcdf71 |
diff --git a/heartbeat/podman b/heartbeat/podman
|
|
|
bcdf71 |
index 81b00ee6f..d4d608ca3 100755
|
|
|
bcdf71 |
--- a/heartbeat/podman
|
|
|
bcdf71 |
+++ b/heartbeat/podman
|
|
|
bcdf71 |
@@ -345,6 +345,32 @@ create_transient_drop_in_dependency()
|
|
|
bcdf71 |
}
|
|
|
bcdf71 |
|
|
|
bcdf71 |
|
|
|
bcdf71 |
+run_new_container()
|
|
|
bcdf71 |
+{
|
|
|
bcdf71 |
+ local opts=$1
|
|
|
bcdf71 |
+ local image=$2
|
|
|
bcdf71 |
+ local cmd=$3
|
|
|
bcdf71 |
+ local rc
|
|
|
bcdf71 |
+
|
|
|
bcdf71 |
+ ocf_log info "running container $CONTAINER for the first time"
|
|
|
bcdf71 |
+ ocf_run podman run $opts $image $cmd
|
|
|
bcdf71 |
+ rc=$?
|
|
|
bcdf71 |
+ if [ $rc -eq 125 ]; then
|
|
|
bcdf71 |
+ # If an internal podman error occurred, it might be because
|
|
|
bcdf71 |
+ # the internal storage layer still references an old container
|
|
|
bcdf71 |
+ # with the same name, even though podman itself thinks there
|
|
|
bcdf71 |
+ # is no such container. If so, purge the storage layer to try
|
|
|
bcdf71 |
+ # to clean the corruption and try again.
|
|
|
bcdf71 |
+ ocf_log warn "Internal podman error while creating new container $CONTAINER. Retrying."
|
|
|
bcdf71 |
+ ocf_run podman rm --storage $CONTAINER
|
|
|
bcdf71 |
+ ocf_run podman run $opts $image $cmd
|
|
|
bcdf71 |
+ rc=$?
|
|
|
bcdf71 |
+ fi
|
|
|
bcdf71 |
+
|
|
|
bcdf71 |
+ return $rc
|
|
|
bcdf71 |
+}
|
|
|
bcdf71 |
+
|
|
|
bcdf71 |
+
|
|
|
bcdf71 |
podman_start()
|
|
|
bcdf71 |
{
|
|
|
bcdf71 |
local cid
|
|
|
bcdf71 |
@@ -378,8 +404,7 @@ podman_start()
|
|
|
bcdf71 |
# make sure any previous container matching our container name is cleaned up first.
|
|
|
bcdf71 |
# we already know at this point it wouldn't be running
|
|
|
bcdf71 |
remove_container
|
|
|
bcdf71 |
- ocf_log info "running container $CONTAINER for the first time"
|
|
|
bcdf71 |
- ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
|
|
|
bcdf71 |
+ run_new_container "$run_opts" $OCF_RESKEY_image "$OCF_RESKEY_run_cmd"
|
|
|
bcdf71 |
fi
|
|
|
bcdf71 |
rc=$?
|
|
|
bcdf71 |
|