Blame SOURCES/bz1736746-podman-drop-in-support.patch

a89620
From 462ada6164cb77c81f5291d88287d68506d38056 Mon Sep 17 00:00:00 2001
a89620
From: Damien Ciabrini <dciabrin@redhat.com>
a89620
Date: Tue, 9 Jul 2019 23:14:21 +0200
a89620
Subject: [PATCH] Generate addition drop-in dependencies for podman containers
a89620
a89620
When podman creates a container, it creates two additional systemd
a89620
scope files dynamically:
a89620
a89620
  - libpod-conmon-<CONTAINERID>.scope - runs a conmon process that
a89620
    tracks a container's pid1 into a dedicated pidfile.
a89620
  - libpod-<CONTAINERID>.scope - created dynamically by runc,
a89620
    for cgroups accounting
a89620
a89620
On shutdown, it can happen that systemd stops those scope early,
a89620
which in turn sends a SIGTERM to pacemaker-managed containers
a89620
before pacemaker has scheduled any stop operation. That
a89620
confuses the cluster and may break shutdown.
a89620
a89620
Add a new option in the resource-agent to inject additional
a89620
dependencies into the dynamically created scope files, so that
a89620
systemd is not allowed to stop scopes before the pacemaker
a89620
service itself is stopped.
a89620
a89620
When that option is enabled, the scopes look like:
a89620
a89620
    # podman ps | grep galera
a89620
    c329819a1227  192.168.122.8:8787/rhosp15/openstack-mariadb:latest                     dumb-init -- /bin...  About an hour ago  Up About an hour ago         galera-bundle-podman-0
a89620
a89620
    # systemctl cat libpod*c329819a1227*
a89620
    # /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope
a89620
    # This is a transient unit file, created programmatically via the systemd API. Do not edit.
a89620
    [Scope]
a89620
    Slice=machine.slice
a89620
    Delegate=yes
a89620
a89620
    [Unit]
a89620
    DefaultDependencies=no
a89620
a89620
    # /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf
a89620
    [Unit]
a89620
    Before=pacemaker.service
a89620
a89620
    # /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope
a89620
    # This is a transient unit file, created programmatically via the systemd API. Do not edit.
a89620
    [Unit]
a89620
    Description=libcontainer container c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b
a89620
a89620
    [Scope]
a89620
    Slice=machine.slice
a89620
    Delegate=yes
a89620
    MemoryAccounting=yes
a89620
    CPUAccounting=yes
a89620
    BlockIOAccounting=yes
a89620
a89620
    [Unit]
a89620
    DefaultDependencies=no
a89620
a89620
    # /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf
a89620
    [Unit]
a89620
    Before=pacemaker.service
a89620
a89620
Effectively, this prevents systemd from managing the shutdown of any
a89620
pacemaker-managed podman container.
a89620
a89620
Related: rhbz#1726442
a89620
---
a89620
 heartbeat/podman | 82 +++++++++++++++++++++++++++++++++++++++++++++++-
a89620
 1 file changed, 81 insertions(+), 1 deletion(-)
a89620
a89620
diff --git a/heartbeat/podman b/heartbeat/podman
a89620
index 8fc2c4695..8a916eb8c 100755
a89620
--- a/heartbeat/podman
a89620
+++ b/heartbeat/podman
a89620
@@ -158,6 +158,16 @@ to have the particular one persist when this happens.
a89620
 <shortdesc lang="en">reuse container</shortdesc>
a89620
 <content type="boolean" default="${OCF_RESKEY_reuse_default}"/>
a89620
 </parameter>
a89620
+
a89620
+<parameter name="drop_in_dependency" required="0" unique="0">
a89620
+<longdesc lang="en">
a89620
+Use transient drop-in files to add extra dependencies to the systemd
a89620
+scopes associated to the container. During reboot, this prevents systemd
a89620
+to stop the container before pacemaker.
a89620
+</longdesc>
a89620
+<shortdesc lang="en">drop-in dependency</shortdesc>
a89620
+<content type="boolean"/>
a89620
+</parameter>
a89620
 </parameters>
a89620
 
a89620
 <actions>
a89620
@@ -273,8 +283,57 @@ podman_create_mounts() {
a89620
 	IFS="$oldIFS"
a89620
 }
a89620
 
a89620
+podman_container_id()
a89620
+{
a89620
+	# Retrieve the container ID by doing a "podman ps" rather than
a89620
+	# a "podman inspect", because the latter has performance issues
a89620
+	# under IO load.
a89620
+	# We could have run "podman start $CONTAINER" to get the ID back
a89620
+	# but if the container is stopped, the command will return a
a89620
+	# name instead of a container ID. This would break us.
a89620
+	podman ps --no-trunc --format '{{.ID}} {{.Names}}' | grep -F -w -m1 "$CONTAINER" | cut -d' ' -f1
a89620
+}
a89620
+
a89620
+
a89620
+create_transient_drop_in_dependency()
a89620
+{
a89620
+	local cid=$1
a89620
+	local rc=$OCF_SUCCESS
a89620
+
a89620
+	if [ -z "$cid" ]; then
a89620
+		ocf_log error "Container ID not found for \"$CONTAINER\". Not creating drop-in dependency"
a89620
+		return $OCF_ERR_GENERIC
a89620
+	fi
a89620
+
a89620
+	ocf_log info "Creating drop-in dependency for \"$CONTAINER\" ($cid)"
a89620
+	for scope in "libpod-$cid.scope.d" "libpod-conmon-$cid.scope.d"; do
a89620
+		if [ $rc -eq $OCF_SUCCESS ] && [ ! -d /run/systemd/transient/"$scope" ]; then
a89620
+			mkdir -p /run/systemd/transient/"$scope" && \
a89620
+			echo -e "[Unit]\nBefore=pacemaker.service" > /run/systemd/transient/"$scope"/dep.conf && \
a89620
+			chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf
a89620
+			rc=$?
a89620
+		fi
a89620
+	done
a89620
+
a89620
+	if [ $rc -ne $OCF_SUCCESS ]; then
a89620
+		ocf_log error "Could not create drop-in dependency for \"$CONTAINER\" ($cid)"
a89620
+	else
a89620
+		systemctl daemon-reload
a89620
+		rc=$?
a89620
+		if [ $rc -ne $OCF_SUCCESS ]; then
a89620
+			ocf_log error "Could not refresh service definition after creating drop-in for \"$CONTAINER\""
a89620
+		fi
a89620
+	fi
a89620
+
a89620
+	return $rc
a89620
+}
a89620
+
a89620
+
a89620
 podman_start()
a89620
 {
a89620
+	local cid
a89620
+	local rc
a89620
+
a89620
 	podman_create_mounts
a89620
 	local run_opts="-d --name=${CONTAINER}"
a89620
 	# check to see if the container has already started
a89620
@@ -306,8 +365,17 @@ podman_start()
a89620
 		ocf_log info "running container $CONTAINER for the first time"
a89620
 		ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
a89620
 	fi
a89620
+	rc=$?
a89620
 
a89620
-	if [ $? -ne 0 ]; then
a89620
+	# if the container was stopped or didn't exist before, systemd
a89620
+	# removed the libpod* scopes. So always try to recreate the drop-ins
a89620
+	if [ $rc -eq 0 ] && ocf_is_true "$OCF_RESKEY_drop_in_dependency"; then
a89620
+		cid=$(podman_container_id)
a89620
+		create_transient_drop_in_dependency "$cid"
a89620
+		rc=$?
a89620
+	fi
a89620
+
a89620
+	if [ $rc -ne 0 ]; then
a89620
 		ocf_exit_reason "podman failed to launch container"
a89620
 		return $OCF_ERR_GENERIC
a89620
 	fi
a89620
@@ -353,6 +421,8 @@ podman_stop()
a89620
 	else
a89620
 		ocf_log debug "waiting $timeout second[s] before killing container"
a89620
 		ocf_run podman stop -t=$timeout $CONTAINER
a89620
+		# on stop, systemd will automatically delete any transient
a89620
+		# drop-in conf that has been created earlier
a89620
 	fi
a89620
 
a89620
 	if [ $? -ne 0 ]; then
a89620
@@ -456,6 +526,16 @@ CONTAINER=$OCF_RESKEY_name
a89620
 # exec command to be non-empty
a89620
 : ${OCF_RESKEY_monitor_cmd:=/bin/true}
a89620
 
a89620
+# When OCF_RESKEY_drop_in_dependency is not populated, we
a89620
+# look at another file-based way of enabling the option.
a89620
+# Otherwise, consider it disabled.
a89620
+if [ -z "$OCF_RESKEY_drop_in_dependency" ]; then
a89620
+	if [ -f "/etc/sysconfig/podman_drop_in" ] || \
a89620
+	   [ -f "/etc/default/podman_drop_in" ]; then
a89620
+		OCF_RESKEY_drop_in_dependency=yes
a89620
+	fi
a89620
+fi
a89620
+
a89620
 case $__OCF_ACTION in
a89620
 meta-data) meta_data
a89620
 		exit $OCF_SUCCESS;;