Blame SOURCES/bz2014415-nfsserver-add-nfs_server_scope-parameter.patch

b7d26e
From 764dacb6195f8940f13b9c322b1bc8189c5619fc Mon Sep 17 00:00:00 2001
b7d26e
From: Lars Ellenberg <lars.ellenberg@linbit.com>
b7d26e
Date: Mon, 6 Sep 2021 12:13:42 +0200
b7d26e
Subject: [PATCH 1/6] Fix NFSv4 lock failover: set NFS Server Scope
b7d26e
b7d26e
Problem: https://github.com/ClusterLabs/resource-agents/issues/1644
b7d26e
RFC8881, 8.4.2.1 State Reclaim:
b7d26e
b7d26e
| If the server scope is different, the client should not attempt to
b7d26e
| reclaim locks. In this situation, no lock reclaim is possible.
b7d26e
| Any attempt to re-obtain the locks with non-reclaim operations is
b7d26e
| problematic since there is no guarantee that the existing
b7d26e
| filehandles will be recognized by the new server, or that if
b7d26e
| recognized, they denote the same objects. It is best to treat the
b7d26e
| locks as having been revoked by the reconfiguration event.
b7d26e
b7d26e
That's why for lock reclaim to even be attempted, we have to define and set
b7d26e
the same server scope for NFSD on all cluster nodes in the NFS failover
b7d26e
cluster. And in linux, that is done by setting the uts nodename for the
b7d26e
command that starts the nfsd kernel threads.
b7d26e
b7d26e
For "init scripts", just set it directly using unshare --uts.
b7d26e
For systemd units, add NFS_SERVER_SCOPE to some environment files
b7d26e
and inject the "unshare --uts" into the ExecStart command lines
b7d26e
using override drop-in files.
b7d26e
---
b7d26e
 heartbeat/nfsserver | 120 +++++++++++++++++++++++++++++++++++++++++++-
b7d26e
 1 file changed, 119 insertions(+), 1 deletion(-)
b7d26e
b7d26e
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
b7d26e
index 96b19abe36..0888378645 100755
b7d26e
--- a/heartbeat/nfsserver
b7d26e
+++ b/heartbeat/nfsserver
b7d26e
@@ -5,6 +5,18 @@
b7d26e
 # by hxinwei@gmail.com
b7d26e
 # License: GNU General Public License v2 (GPLv2) and later
b7d26e
 
b7d26e
+
b7d26e
+# I don't know for certain whether all services actuall _need_ this,
b7d26e
+# I know that at least nfs-server needs it.
b7d26e
+# The rgmanager resource agent in rgmanager/src/resources/nfsserver.sh.in
b7d26e
+# did the unshare for gssd and idmapd as well, even though it seems unclear why.
b7d26e
+# Let's start with just the nfs-server, and add others if/when we have clear
b7d26e
+# indication they need it.
b7d26e
+#NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-idmapd.service nfs-mountd.service nfs-server.service nfsdcld.service rpc-gssd.service rpc-statd.service rpcbind.service"
b7d26e
+NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-server.service"
b7d26e
+SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE=/run/sysconfig/nfs-server-scope
b7d26e
+SYSTEMD_UNSHARE_UTS_DROPIN=51-resource-agents-unshare-uts.conf
b7d26e
+
b7d26e
 if [ -n "$OCF_DEBUG_LIBRARY" ]; then
b7d26e
     . $OCF_DEBUG_LIBRARY
b7d26e
 else
b7d26e
@@ -99,6 +111,31 @@ Specifies the length of sm-notify retry time (minutes).
b7d26e
 <content type="integer" default="" />
b7d26e
 </parameter>
b7d26e
 
b7d26e
+<parameter name="nfs_server_scope" unique="0" required="0">
b7d26e
+<longdesc lang="en">
b7d26e
+RFC8881, 8.4.2.1 State Reclaim:
b7d26e
+
b7d26e
+If the server scope is different, the client should not attempt to
b7d26e
+reclaim locks. In this situation, no lock reclaim is possible.
b7d26e
+Any attempt to re-obtain the locks with non-reclaim operations is
b7d26e
+problematic since there is no guarantee that the existing
b7d26e
+filehandles will be recognized by the new server, or that if
b7d26e
+recognized, they denote the same objects. It is best to treat the
b7d26e
+locks as having been revoked by the reconfiguration event.
b7d26e
+
b7d26e
+For lock reclaim to even be attempted, we have to define and set the same
b7d26e
+server scope for NFSD on all cluster nodes in the NFS failover cluster.
b7d26e
+
b7d26e
+This agent won't "guess" a suitable server scope name for you, you need to
b7d26e
+explicitly specify this. But without it, NFSv4 lock reclaim after failover
b7d26e
+won't work properly.  Suggested value: the failover "service IP".
b7d26e
+</longdesc>
b7d26e
+<shortdesc lang="en">
b7d26e
+RFC8881 NFS server scope for (lock) state reclaim after failover.
b7d26e
+</shortdesc>
b7d26e
+<content type="string"/>
b7d26e
+</parameter>
b7d26e
+
b7d26e
 <parameter name="nfs_ip" unique="0" required="0">
b7d26e
 <longdesc lang="en">
b7d26e
 Comma separated list of floating IP addresses used to access the nfs service
b7d26e
@@ -269,7 +306,11 @@ nfs_exec()
b7d26e
 	set_exec_mode
b7d26e
 
b7d26e
 	case $EXEC_MODE in 
b7d26e
-		1) ${OCF_RESKEY_nfs_init_script} $cmd;;
b7d26e
+		1) if [ -z "$OCF_RESKEY_nfs_server_scope" ] ; then
b7d26e
+			${OCF_RESKEY_nfs_init_script} $cmd
b7d26e
+		   else
b7d26e
+			unshare -u /bin/sh -c "hostname ${OCF_RESKEY_nfs_server_scope}; exec ${OCF_RESKEY_nfs_init_script} $cmd"
b7d26e
+		   fi ;;
b7d26e
 		2) if ! echo $svc | grep -q "\."; then
b7d26e
 			svc="${svc}.service"
b7d26e
 		   fi
b7d26e
@@ -623,6 +664,74 @@ notify_locks()
b7d26e
 	fi
b7d26e
 }
b7d26e
 
b7d26e
+# Problem: https://github.com/ClusterLabs/resource-agents/issues/1644
b7d26e
+# RFC8881, 8.4.2.1 State Reclaim:
b7d26e
+#
b7d26e
+# | If the server scope is different, the client should not attempt to
b7d26e
+# | reclaim locks. In this situation, no lock reclaim is possible.
b7d26e
+# | Any attempt to re-obtain the locks with non-reclaim operations is
b7d26e
+# | problematic since there is no guarantee that the existing
b7d26e
+# | filehandles will be recognized by the new server, or that if
b7d26e
+# | recognized, they denote the same objects. It is best to treat the
b7d26e
+# | locks as having been revoked by the reconfiguration event.
b7d26e
+#
b7d26e
+# That's why for lock reclaim to even be attempted, we have to define and set
b7d26e
+# the same server scope for NFSD on all cluster nodes in the NFS failover
b7d26e
+# cluster. And in linux, that is done by setting the uts nodename for the
b7d26e
+# command that starts the nfsd kernel threads.
b7d26e
+#
b7d26e
+inject_unshare_uts_name_into_systemd_units ()
b7d26e
+{
b7d26e
+	local END_TAG="# END OF DROP-IN FOR NFS SERVER SCOPE"
b7d26e
+	local services
b7d26e
+	services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE | cut -d ' ' -f1)
b7d26e
+
b7d26e
+	local svc dir dropin edited_exec_start do_reload=false
b7d26e
+	for svc in $services ; do
b7d26e
+		dir=/run/systemd/system/$svc.d
b7d26e
+		dropin=$dir/$SYSTEMD_UNSHARE_UTS_DROPIN
b7d26e
+		grep -sqF "$END_TAG" "$dropin" && continue
b7d26e
+
b7d26e
+		test -d "$dir" || mkdir -p "$dir"
b7d26e
+		test -e "$dropin" && rm -f "$dropin"
b7d26e
+
b7d26e
+		edited_exec_start=$(systemctl cat $svc | sed -ne "s#^ExecStart=\\(.*\\)#ExecStart=/usr/bin/unshare --uts /bin/sh -ec 'hostname \${NFS_SERVER_SCOPE}; exec \"\$@\"' -- \\1#p")
b7d26e
+		cat > "$dropin" <<___
b7d26e
+[Service]
b7d26e
+EnvironmentFile=$SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE
b7d26e
+# reset list of exec start, then re-populate with unshared uts namespace
b7d26e
+ExecStart=
b7d26e
+$edited_exec_start
b7d26e
+$END_TAG
b7d26e
+___
b7d26e
+		do_reload=true
b7d26e
+		ocf_log debug "injected unshare --uts into $dropin"
b7d26e
+	done
b7d26e
+
b7d26e
+	mkdir -p "${SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE%/*}"
b7d26e
+	echo "NFS_SERVER_SCOPE=$OCF_RESKEY_nfs_server_scope" > "$SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE"
b7d26e
+
b7d26e
+	$do_reload && systemctl daemon-reload
b7d26e
+}
b7d26e
+
b7d26e
+remove_unshare_uts_dropins ()
b7d26e
+{
b7d26e
+	local services
b7d26e
+	services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE)
b7d26e
+
b7d26e
+	local svc dir dropin do_reload=false
b7d26e
+	for svc in $services ; do
b7d26e
+		dir=/run/systemd/system/$svc.d
b7d26e
+		dropin=$dir/$SYSTEMD_UNSHARE_UTS_DROPIN
b7d26e
+		test -e "$dropin" || continue
b7d26e
+		rm -f "$dropin"
b7d26e
+		do_reload=true
b7d26e
+		ocf_log debug "removed unshare --uts from $svc"
b7d26e
+	done
b7d26e
+	rm -f "${SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE}"
b7d26e
+	$do_reload && systemctl daemon-reload
b7d26e
+}
b7d26e
+
b7d26e
 nfsserver_start ()
b7d26e
 {
b7d26e
 	local rc;
b7d26e
@@ -636,6 +745,13 @@ nfsserver_start ()
b7d26e
 	is_redhat_based && set_env_args
b7d26e
 	bind_tree
b7d26e
 	prepare_directory
b7d26e
+	case $EXEC_MODE in [23])
b7d26e
+		if [ -z "$OCF_RESKEY_nfs_server_scope" ] ; then
b7d26e
+			remove_unshare_uts_dropins
b7d26e
+		else
b7d26e
+			inject_unshare_uts_name_into_systemd_units
b7d26e
+		fi ;;
b7d26e
+	esac
b7d26e
 
b7d26e
 	if ! `mount | grep -q " on $OCF_RESKEY_rpcpipefs_dir "`; then
b7d26e
 		mount -t rpc_pipefs sunrpc $OCF_RESKEY_rpcpipefs_dir
b7d26e
@@ -854,6 +970,8 @@ nfsserver_stop ()
b7d26e
 		ocf_log info "NFS server stopped"
b7d26e
 	fi
b7d26e
 
b7d26e
+	case $EXEC_MODE in [23]) remove_unshare_uts_dropins;; esac
b7d26e
+
b7d26e
 	return $rc
b7d26e
 }
b7d26e
 
b7d26e
b7d26e
From 515697b53c1614d05d39491c9af83e8d8b844b17 Mon Sep 17 00:00:00 2001
b7d26e
From: Lars Ellenberg <lars.ellenberg@linbit.com>
b7d26e
Date: Fri, 8 Oct 2021 12:01:41 +0200
b7d26e
Subject: [PATCH 2/6] Fix NFSv4 lock failover: set NFS Server Scope, regardless
b7d26e
 of EXEC_MODE
b7d26e
b7d26e
Debian (and other systems) may provide "init scripts",
b7d26e
which will only redirect back to systemd.
b7d26e
b7d26e
If we just unshare --uts the init script invocation,
b7d26e
the uts namespace is useless in that case.
b7d26e
b7d26e
If systemd is running, mangle the nfs-server.service unit,
b7d26e
independent of the "EXEC_MODE".
b7d26e
---
b7d26e
 heartbeat/nfsserver | 18 ++++++++++++++----
b7d26e
 1 file changed, 14 insertions(+), 4 deletions(-)
b7d26e
b7d26e
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
b7d26e
index 0888378645..054aabbaf6 100755
b7d26e
--- a/heartbeat/nfsserver
b7d26e
+++ b/heartbeat/nfsserver
b7d26e
@@ -745,13 +745,20 @@ nfsserver_start ()
b7d26e
 	is_redhat_based && set_env_args
b7d26e
 	bind_tree
b7d26e
 	prepare_directory
b7d26e
-	case $EXEC_MODE in [23])
b7d26e
+
b7d26e
+	# Debian (and other systems) may provide "init scripts",
b7d26e
+	# which will only redirect back to systemd.
b7d26e
+	# If we just unshare --uts the init script invocation,
b7d26e
+	# the uts namespace is useless in that case.
b7d26e
+	# If systemd is running, mangle the nfs-server.service unit,
b7d26e
+	# independent of the "EXEC_MODE" we detected.
b7d26e
+	if $systemd_is_running ; then
b7d26e
 		if [ -z "$OCF_RESKEY_nfs_server_scope" ] ; then
b7d26e
 			remove_unshare_uts_dropins
b7d26e
 		else
b7d26e
 			inject_unshare_uts_name_into_systemd_units
b7d26e
-		fi ;;
b7d26e
-	esac
b7d26e
+		fi
b7d26e
+	fi
b7d26e
 
b7d26e
 	if ! `mount | grep -q " on $OCF_RESKEY_rpcpipefs_dir "`; then
b7d26e
 		mount -t rpc_pipefs sunrpc $OCF_RESKEY_rpcpipefs_dir
b7d26e
@@ -970,7 +977,9 @@ nfsserver_stop ()
b7d26e
 		ocf_log info "NFS server stopped"
b7d26e
 	fi
b7d26e
 
b7d26e
-	case $EXEC_MODE in [23]) remove_unshare_uts_dropins;; esac
b7d26e
+	if $systemd_is_running; then
b7d26e
+		remove_unshare_uts_dropins
b7d26e
+	fi
b7d26e
 
b7d26e
 	return $rc
b7d26e
 }
b7d26e
@@ -1008,6 +1017,7 @@ nfsserver_validate ()
b7d26e
 }
b7d26e
 
b7d26e
 nfsserver_validate
b7d26e
+systemd_is_running && systemd_is_running=true || systemd_is_running=false
b7d26e
 
b7d26e
 case $__OCF_ACTION in
b7d26e
 	start)      nfsserver_start
b7d26e
b7d26e
From e83c20d88f404f9f9d829c654883d60eb6cc9ff3 Mon Sep 17 00:00:00 2001
b7d26e
From: Lars Ellenberg <lars.ellenberg@linbit.com>
b7d26e
Date: Fri, 8 Oct 2021 17:06:18 +0200
b7d26e
Subject: [PATCH 3/6] Fix NFSv4 lock failover: add missing "|cut -f1" in
b7d26e
 remove_unshare_uts_dropins
b7d26e
b7d26e
---
b7d26e
 heartbeat/nfsserver | 2 +-
b7d26e
 1 file changed, 1 insertion(+), 1 deletion(-)
b7d26e
b7d26e
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
b7d26e
index 054aabbaf6..d3db89a537 100755
b7d26e
--- a/heartbeat/nfsserver
b7d26e
+++ b/heartbeat/nfsserver
b7d26e
@@ -717,7 +717,7 @@ ___
b7d26e
 remove_unshare_uts_dropins ()
b7d26e
 {
b7d26e
 	local services
b7d26e
-	services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE)
b7d26e
+	services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE | cut -d ' ' -f1)
b7d26e
 
b7d26e
 	local svc dir dropin do_reload=false
b7d26e
 	for svc in $services ; do
b7d26e
b7d26e
From b5b0e4a0b60d285af576b2d8ecfbe95e5a177a87 Mon Sep 17 00:00:00 2001
b7d26e
From: Lars Ellenberg <lars.ellenberg@linbit.com>
b7d26e
Date: Fri, 8 Oct 2021 17:07:13 +0200
b7d26e
Subject: [PATCH 4/6] Fix NFSv4 lock failover: get rid of "world-inaccessible"
b7d26e
 warning
b7d26e
b7d26e
by temporarily changing the umask before generating the dropins
b7d26e
---
b7d26e
 heartbeat/nfsserver | 3 +++
b7d26e
 1 file changed, 3 insertions(+)
b7d26e
b7d26e
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
b7d26e
index d3db89a537..447e0302b2 100755
b7d26e
--- a/heartbeat/nfsserver
b7d26e
+++ b/heartbeat/nfsserver
b7d26e
@@ -687,6 +687,8 @@ inject_unshare_uts_name_into_systemd_units ()
b7d26e
 	services=$(systemctl list-unit-files --no-legend $NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE | cut -d ' ' -f1)
b7d26e
 
b7d26e
 	local svc dir dropin edited_exec_start do_reload=false
b7d26e
+	local old_umask=$(umask)
b7d26e
+	umask 0022
b7d26e
 	for svc in $services ; do
b7d26e
 		dir=/run/systemd/system/$svc.d
b7d26e
 		dropin=$dir/$SYSTEMD_UNSHARE_UTS_DROPIN
b7d26e
@@ -710,6 +712,7 @@ ___
b7d26e
 
b7d26e
 	mkdir -p "${SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE%/*}"
b7d26e
 	echo "NFS_SERVER_SCOPE=$OCF_RESKEY_nfs_server_scope" > "$SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE"
b7d26e
+	umask $old_umask
b7d26e
 
b7d26e
 	$do_reload && systemctl daemon-reload
b7d26e
 }
b7d26e
b7d26e
From 3c6c91ce5a00eeef9cd766389d73a0b42580a1e6 Mon Sep 17 00:00:00 2001
b7d26e
From: Lars Ellenberg <lars.ellenberg@linbit.com>
b7d26e
Date: Fri, 8 Oct 2021 17:08:09 +0200
b7d26e
Subject: [PATCH 5/6] Fix NFSv4 lock failover: deal with "special executable
b7d26e
 prefix" chars in ExecStart
b7d26e
b7d26e
---
b7d26e
 heartbeat/nfsserver | 2 +-
b7d26e
 1 file changed, 1 insertion(+), 1 deletion(-)
b7d26e
b7d26e
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
b7d26e
index 447e0302b2..5326bd2c6e 100755
b7d26e
--- a/heartbeat/nfsserver
b7d26e
+++ b/heartbeat/nfsserver
b7d26e
@@ -697,7 +697,7 @@ inject_unshare_uts_name_into_systemd_units ()
b7d26e
 		test -d "$dir" || mkdir -p "$dir"
b7d26e
 		test -e "$dropin" && rm -f "$dropin"
b7d26e
 
b7d26e
-		edited_exec_start=$(systemctl cat $svc | sed -ne "s#^ExecStart=\\(.*\\)#ExecStart=/usr/bin/unshare --uts /bin/sh -ec 'hostname \${NFS_SERVER_SCOPE}; exec \"\$@\"' -- \\1#p")
b7d26e
+		edited_exec_start=$(systemctl cat $svc | sed -ne "s#^ExecStart=\\([-+:!@]*\\)\\(.*\\)#ExecStart=\\1/usr/bin/unshare --uts /bin/sh -c 'hostname \${NFS_SERVER_SCOPE}; exec \"\$@\"' -- \\2#p")
b7d26e
 		cat > "$dropin" <<___
b7d26e
 [Service]
b7d26e
 EnvironmentFile=$SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE
b7d26e
b7d26e
From 512fbaf61e6d24a1236ef50e323ea17a62485c36 Mon Sep 17 00:00:00 2001
b7d26e
From: Lars Ellenberg <lars.ellenberg@linbit.com>
b7d26e
Date: Fri, 8 Oct 2021 17:08:59 +0200
b7d26e
Subject: [PATCH 6/6] Fix NFSv4 lock failover: add rpc-statd-notify to the
b7d26e
 comment list of potentially interesting services
b7d26e
b7d26e
---
b7d26e
 heartbeat/nfsserver | 2 +-
b7d26e
 1 file changed, 1 insertion(+), 1 deletion(-)
b7d26e
b7d26e
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
b7d26e
index 5326bd2c6e..240dd1a76c 100755
b7d26e
--- a/heartbeat/nfsserver
b7d26e
+++ b/heartbeat/nfsserver
b7d26e
@@ -12,7 +12,7 @@
b7d26e
 # did the unshare for gssd and idmapd as well, even though it seems unclear why.
b7d26e
 # Let's start with just the nfs-server, and add others if/when we have clear
b7d26e
 # indication they need it.
b7d26e
-#NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-idmapd.service nfs-mountd.service nfs-server.service nfsdcld.service rpc-gssd.service rpc-statd.service rpcbind.service"
b7d26e
+#NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-idmapd.service nfs-mountd.service nfs-server.service nfsdcld.service rpc-gssd.service rpc-statd.service rpc-statd-notify.service rpcbind.service"
b7d26e
 NFSD_RELATED_SYSTEMD_SERVICE_FOR_UNSHARE_UTS_NAMESPACE="nfs-server.service"
b7d26e
 SYSTEMD_ENVIRONMENT_FILE_NFS_SERVER_SCOPE=/run/sysconfig/nfs-server-scope
b7d26e
 SYSTEMD_UNSHARE_UTS_DROPIN=51-resource-agents-unshare-uts.conf