Blame SOURCES/bz1736746-podman-drop-in-support.patch

b4b3ce
From 462ada6164cb77c81f5291d88287d68506d38056 Mon Sep 17 00:00:00 2001
b4b3ce
From: Damien Ciabrini <dciabrin@redhat.com>
b4b3ce
Date: Tue, 9 Jul 2019 23:14:21 +0200
b4b3ce
Subject: [PATCH] Generate addition drop-in dependencies for podman containers
b4b3ce
b4b3ce
When podman creates a container, it creates two additional systemd
b4b3ce
scope files dynamically:
b4b3ce
b4b3ce
  - libpod-conmon-<CONTAINERID>.scope - runs a conmon process that
b4b3ce
    tracks a container's pid1 into a dedicated pidfile.
b4b3ce
  - libpod-<CONTAINERID>.scope - created dynamically by runc,
b4b3ce
    for cgroups accounting
b4b3ce
b4b3ce
On shutdown, it can happen that systemd stops those scope early,
b4b3ce
which in turn sends a SIGTERM to pacemaker-managed containers
b4b3ce
before pacemaker has scheduled any stop operation. That
b4b3ce
confuses the cluster and may break shutdown.
b4b3ce
b4b3ce
Add a new option in the resource-agent to inject additional
b4b3ce
dependencies into the dynamically created scope files, so that
b4b3ce
systemd is not allowed to stop scopes before the pacemaker
b4b3ce
service itself is stopped.
b4b3ce
b4b3ce
When that option is enabled, the scopes look like:
b4b3ce
b4b3ce
    # podman ps | grep galera
b4b3ce
    c329819a1227  192.168.122.8:8787/rhosp15/openstack-mariadb:latest                     dumb-init -- /bin...  About an hour ago  Up About an hour ago         galera-bundle-podman-0
b4b3ce
b4b3ce
    # systemctl cat libpod*c329819a1227*
b4b3ce
    # /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope
b4b3ce
    # This is a transient unit file, created programmatically via the systemd API. Do not edit.
b4b3ce
    [Scope]
b4b3ce
    Slice=machine.slice
b4b3ce
    Delegate=yes
b4b3ce
b4b3ce
    [Unit]
b4b3ce
    DefaultDependencies=no
b4b3ce
b4b3ce
    # /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf
b4b3ce
    [Unit]
b4b3ce
    Before=pacemaker.service
b4b3ce
b4b3ce
    # /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope
b4b3ce
    # This is a transient unit file, created programmatically via the systemd API. Do not edit.
b4b3ce
    [Unit]
b4b3ce
    Description=libcontainer container c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b
b4b3ce
b4b3ce
    [Scope]
b4b3ce
    Slice=machine.slice
b4b3ce
    Delegate=yes
b4b3ce
    MemoryAccounting=yes
b4b3ce
    CPUAccounting=yes
b4b3ce
    BlockIOAccounting=yes
b4b3ce
b4b3ce
    [Unit]
b4b3ce
    DefaultDependencies=no
b4b3ce
b4b3ce
    # /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf
b4b3ce
    [Unit]
b4b3ce
    Before=pacemaker.service
b4b3ce
b4b3ce
Effectively, this prevents systemd from managing the shutdown of any
b4b3ce
pacemaker-managed podman container.
b4b3ce
b4b3ce
Related: rhbz#1726442
b4b3ce
---
b4b3ce
 heartbeat/podman | 82 +++++++++++++++++++++++++++++++++++++++++++++++-
b4b3ce
 1 file changed, 81 insertions(+), 1 deletion(-)
b4b3ce
b4b3ce
diff --git a/heartbeat/podman b/heartbeat/podman
b4b3ce
index 8fc2c4695..8a916eb8c 100755
b4b3ce
--- a/heartbeat/podman
b4b3ce
+++ b/heartbeat/podman
b4b3ce
@@ -158,6 +158,16 @@ to have the particular one persist when this happens.
b4b3ce
 <shortdesc lang="en">reuse container</shortdesc>
b4b3ce
 <content type="boolean" default="${OCF_RESKEY_reuse_default}"/>
b4b3ce
 </parameter>
b4b3ce
+
b4b3ce
+<parameter name="drop_in_dependency" required="0" unique="0">
b4b3ce
+<longdesc lang="en">
b4b3ce
+Use transient drop-in files to add extra dependencies to the systemd
b4b3ce
+scopes associated to the container. During reboot, this prevents systemd
b4b3ce
+to stop the container before pacemaker.
b4b3ce
+</longdesc>
b4b3ce
+<shortdesc lang="en">drop-in dependency</shortdesc>
b4b3ce
+<content type="boolean"/>
b4b3ce
+</parameter>
b4b3ce
 </parameters>
b4b3ce
 
b4b3ce
 <actions>
b4b3ce
@@ -273,8 +283,57 @@ podman_create_mounts() {
b4b3ce
 	IFS="$oldIFS"
b4b3ce
 }
b4b3ce
 
b4b3ce
+podman_container_id()
b4b3ce
+{
b4b3ce
+	# Retrieve the container ID by doing a "podman ps" rather than
b4b3ce
+	# a "podman inspect", because the latter has performance issues
b4b3ce
+	# under IO load.
b4b3ce
+	# We could have run "podman start $CONTAINER" to get the ID back
b4b3ce
+	# but if the container is stopped, the command will return a
b4b3ce
+	# name instead of a container ID. This would break us.
b4b3ce
+	podman ps --no-trunc --format '{{.ID}} {{.Names}}' | grep -F -w -m1 "$CONTAINER" | cut -d' ' -f1
b4b3ce
+}
b4b3ce
+
b4b3ce
+
b4b3ce
+create_transient_drop_in_dependency()
b4b3ce
+{
b4b3ce
+	local cid=$1
b4b3ce
+	local rc=$OCF_SUCCESS
b4b3ce
+
b4b3ce
+	if [ -z "$cid" ]; then
b4b3ce
+		ocf_log error "Container ID not found for \"$CONTAINER\". Not creating drop-in dependency"
b4b3ce
+		return $OCF_ERR_GENERIC
b4b3ce
+	fi
b4b3ce
+
b4b3ce
+	ocf_log info "Creating drop-in dependency for \"$CONTAINER\" ($cid)"
b4b3ce
+	for scope in "libpod-$cid.scope.d" "libpod-conmon-$cid.scope.d"; do
b4b3ce
+		if [ $rc -eq $OCF_SUCCESS ] && [ ! -d /run/systemd/transient/"$scope" ]; then
b4b3ce
+			mkdir -p /run/systemd/transient/"$scope" && \
b4b3ce
+			echo -e "[Unit]\nBefore=pacemaker.service" > /run/systemd/transient/"$scope"/dep.conf && \
b4b3ce
+			chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf
b4b3ce
+			rc=$?
b4b3ce
+		fi
b4b3ce
+	done
b4b3ce
+
b4b3ce
+	if [ $rc -ne $OCF_SUCCESS ]; then
b4b3ce
+		ocf_log error "Could not create drop-in dependency for \"$CONTAINER\" ($cid)"
b4b3ce
+	else
b4b3ce
+		systemctl daemon-reload
b4b3ce
+		rc=$?
b4b3ce
+		if [ $rc -ne $OCF_SUCCESS ]; then
b4b3ce
+			ocf_log error "Could not refresh service definition after creating drop-in for \"$CONTAINER\""
b4b3ce
+		fi
b4b3ce
+	fi
b4b3ce
+
b4b3ce
+	return $rc
b4b3ce
+}
b4b3ce
+
b4b3ce
+
b4b3ce
 podman_start()
b4b3ce
 {
b4b3ce
+	local cid
b4b3ce
+	local rc
b4b3ce
+
b4b3ce
 	podman_create_mounts
b4b3ce
 	local run_opts="-d --name=${CONTAINER}"
b4b3ce
 	# check to see if the container has already started
b4b3ce
@@ -306,8 +365,17 @@ podman_start()
b4b3ce
 		ocf_log info "running container $CONTAINER for the first time"
b4b3ce
 		ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
b4b3ce
 	fi
b4b3ce
+	rc=$?
b4b3ce
 
b4b3ce
-	if [ $? -ne 0 ]; then
b4b3ce
+	# if the container was stopped or didn't exist before, systemd
b4b3ce
+	# removed the libpod* scopes. So always try to recreate the drop-ins
b4b3ce
+	if [ $rc -eq 0 ] && ocf_is_true "$OCF_RESKEY_drop_in_dependency"; then
b4b3ce
+		cid=$(podman_container_id)
b4b3ce
+		create_transient_drop_in_dependency "$cid"
b4b3ce
+		rc=$?
b4b3ce
+	fi
b4b3ce
+
b4b3ce
+	if [ $rc -ne 0 ]; then
b4b3ce
 		ocf_exit_reason "podman failed to launch container"
b4b3ce
 		return $OCF_ERR_GENERIC
b4b3ce
 	fi
b4b3ce
@@ -353,6 +421,8 @@ podman_stop()
b4b3ce
 	else
b4b3ce
 		ocf_log debug "waiting $timeout second[s] before killing container"
b4b3ce
 		ocf_run podman stop -t=$timeout $CONTAINER
b4b3ce
+		# on stop, systemd will automatically delete any transient
b4b3ce
+		# drop-in conf that has been created earlier
b4b3ce
 	fi
b4b3ce
 
b4b3ce
 	if [ $? -ne 0 ]; then
b4b3ce
@@ -456,6 +526,16 @@ CONTAINER=$OCF_RESKEY_name
b4b3ce
 # exec command to be non-empty
b4b3ce
 : ${OCF_RESKEY_monitor_cmd:=/bin/true}
b4b3ce
 
b4b3ce
+# When OCF_RESKEY_drop_in_dependency is not populated, we
b4b3ce
+# look at another file-based way of enabling the option.
b4b3ce
+# Otherwise, consider it disabled.
b4b3ce
+if [ -z "$OCF_RESKEY_drop_in_dependency" ]; then
b4b3ce
+	if [ -f "/etc/sysconfig/podman_drop_in" ] || \
b4b3ce
+	   [ -f "/etc/default/podman_drop_in" ]; then
b4b3ce
+		OCF_RESKEY_drop_in_dependency=yes
b4b3ce
+	fi
b4b3ce
+fi
b4b3ce
+
b4b3ce
 case $__OCF_ACTION in
b4b3ce
 meta-data) meta_data
b4b3ce
 		exit $OCF_SUCCESS;;