d2787b
From 053bb9c7356eae82b1089582bb2844388ae4df57 Mon Sep 17 00:00:00 2001
d2787b
From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
d2787b
Date: Wed, 2 Jun 2021 07:49:12 -0400
d2787b
Subject: [PATCH 550/584] common-ha: stability fixes for ganesha_grace and
d2787b
 ganesha_mon RAs
d2787b
d2787b
Include fixes suggested by ClusterHA devs.
d2787b
d2787b
1) It turns out that crm_attribute attrs and attrd_updater attrs really
d2787b
are one and the same, despite what I was told years ago.
d2787b
d2787b
attrs created with crm_attribute ... --lifetime=reboot ... or
d2787b
attrd_updater are one and same. As per ClusterHA devs having an attr
d2787b
created with crm_attribute ... --lifetime=forever and also
d2787b
creating/updating the same attr with attrd_updater is a recipe for
d2787b
weird things to happen that will be difficult to debug.
d2787b
d2787b
2) using hostname -s or hostname for node names in crm_attribute and
d2787b
attrd_updater potentially could use the wrong name if the host has
d2787b
been renamed; use ocf_local_nodename() (in ocf-shellfuncs) instead.
d2787b
d2787b
https://github.com/gluster/glusterfs/issues/2276
d2787b
https://github.com/gluster/glusterfs/pull/2283
d2787b
commit 9bd2c697686ec40e2c4f711df961860c8a735baa
d2787b
d2787b
Change-Id:If572d396fae9206628714fb2ce00f72e94f2258f
d2787b
BUG: 1945143
d2787b
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
d2787b
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244593
d2787b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d2787b
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d2787b
---
d2787b
 extras/ganesha/ocf/ganesha_grace | 28 +++++++++---------------
d2787b
 extras/ganesha/ocf/ganesha_mon   | 47 ++++++++++++++--------------------------
d2787b
 2 files changed, 26 insertions(+), 49 deletions(-)
d2787b
d2787b
diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
d2787b
index 825f716..edc6fa2 100644
d2787b
--- a/extras/ganesha/ocf/ganesha_grace
d2787b
+++ b/extras/ganesha/ocf/ganesha_grace
d2787b
@@ -94,25 +94,21 @@ esac
d2787b
 ganesha_grace_start()
d2787b
 {
d2787b
 	local rc=${OCF_ERR_GENERIC}
d2787b
-	local host=$(hostname -s)
d2787b
+	local host=$(ocf_local_nodename)
d2787b
 
d2787b
-	ocf_log debug "ganesha_grace_start()"
d2787b
-	# give ganesha_mon RA a chance to set the crm_attr first
d2787b
+	ocf_log debug "ganesha_grace_start ${host}"
d2787b
+	# give ganesha_mon RA a chance to set the attr first
d2787b
 	# I mislike the sleep, but it's not clear that looping
d2787b
 	# with a small sleep is necessarily better
d2787b
 	# start has a 40sec timeout, so a 5sec sleep here is okay
d2787b
         sleep 5
d2787b
-	attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
d2787b
+	attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
d2787b
         if [ $? -ne 0 ]; then
d2787b
-		host=$(hostname)
d2787b
-		attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
d2787b
-                if [ $? -ne 0 ]; then
d2787b
-	                ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
d2787b
-                fi
d2787b
+	        ocf_log info "grace start: attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
d2787b
         fi
d2787b
 
d2787b
 	# Three possibilities:
d2787b
-	# 1. There is no attribute at all and attr_updater returns
d2787b
+	# 1. There is no attribute at all and attrd_updater returns
d2787b
 	#    a zero length string. This happens when
d2787b
 	#    ganesha_mon::monitor hasn't run at least once to set
d2787b
 	#    the attribute. The assumption here is that the system
d2787b
@@ -164,17 +160,13 @@ ganesha_grace_notify()
d2787b
 
d2787b
 ganesha_grace_monitor()
d2787b
 {
d2787b
-	local host=$(hostname -s)
d2787b
+	local host=$(ocf_local_nodename)
d2787b
 
d2787b
-	ocf_log debug "monitor"
d2787b
+	ocf_log debug "ganesha_grace monitor ${host}"
d2787b
 
d2787b
-	attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
d2787b
+	attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
d2787b
         if [ $? -ne 0 ]; then
d2787b
-		host=$(hostname)
d2787b
-	        attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
d2787b
-                if [ $? -ne 0 ]; then
d2787b
-	                ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
d2787b
-                fi
d2787b
+	        ocf_log info "attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
d2787b
         fi
d2787b
 
d2787b
 	# if there is no attribute (yet), maybe it's because
d2787b
diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
d2787b
index 2b4a9d6..7fbbf70 100644
d2787b
--- a/extras/ganesha/ocf/ganesha_mon
d2787b
+++ b/extras/ganesha/ocf/ganesha_mon
d2787b
@@ -124,7 +124,6 @@ ganesha_mon_stop()
d2787b
 
d2787b
 ganesha_mon_monitor()
d2787b
 {
d2787b
-	local host=$(hostname -s)
d2787b
 	local pid_file="/var/run/ganesha.pid"
d2787b
 	local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
d2787b
 	local proc_pid="/proc/"
d2787b
@@ -141,31 +140,27 @@ ganesha_mon_monitor()
d2787b
 
d2787b
 	if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
d2787b
 
d2787b
-		attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
d2787b
+		attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1
d2787b
 		if [ $? -ne 0 ]; then
d2787b
-			ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
d2787b
+			ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1 failed"
d2787b
 		fi
d2787b
 
d2787b
 		# ganesha_grace (nfs-grace) RA follows grace-active attr
d2787b
 		# w/ constraint location
d2787b
-		attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
d2787b
+		attrd_updater --name ${OCF_RESKEY_grace_active} -v 1
d2787b
 		if [ $? -ne 0 ]; then
d2787b
-			ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
d2787b
+			ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_grace_active} -v 1 failed"
d2787b
 		fi
d2787b
 
d2787b
 		# ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
d2787b
-		# track grace-active crm_attr (attr != crm_attr)
d2787b
-		# we can't just use the attr as there's no way to query
d2787b
-		# its value in RHEL6 pacemaker
d2787b
-
d2787b
-		crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
d2787b
-		if [ $? -ne 0 ]; then
d2787b
-			host=$(hostname)
d2787b
-			crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
d2787b
-			if [ $? -ne 0 ]; then
d2787b
-				ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
d2787b
-			fi
d2787b
-		fi
d2787b
+		# track grace-active attr.
d2787b
+		#
d2787b
+		# Originally we were told that attrs set with attrd_updater
d2787b
+		# are different/distinct than attrs set with crm_attribute.
d2787b
+		# Now, years later, we are told that they are the same and
d2787b
+		# that the values of attrs set with attrd_updater can be
d2787b
+		# retrieved with crm_attribute. Or with attrd_updater -Q
d2787b
+		# now that we no longer have to deal with rhel6.
d2787b
 
d2787b
 		return ${OCF_SUCCESS}
d2787b
 	fi
d2787b
@@ -182,26 +177,16 @@ ganesha_mon_monitor()
d2787b
 	# the remaining ganesha.nfsds into grace before
d2787b
 	# initiating the VIP fail-over.
d2787b
 
d2787b
-	attrd_updater -D -n ${OCF_RESKEY_grace_active}
d2787b
-	if [ $? -ne 0 ]; then
d2787b
-		ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
d2787b
-	fi
d2787b
-
d2787b
-	host=$(hostname -s)
d2787b
-	crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
d2787b
+	attrd_updater --delete --name ${OCF_RESKEY_grace_active}
d2787b
 	if [ $? -ne 0 ]; then
d2787b
-		host=$(hostname)
d2787b
-		crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
d2787b
-		if [ $? -ne 0 ]; then
d2787b
-			ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
d2787b
-		fi
d2787b
+		ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_grace_active} failed"
d2787b
 	fi
d2787b
 
d2787b
 	sleep ${OCF_RESKEY_grace_delay}
d2787b
 
d2787b
-	attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
d2787b
+	attrd_updater --delete --name ${OCF_RESKEY_ganesha_active}
d2787b
 	if [ $? -ne 0 ]; then
d2787b
-		ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
d2787b
+		ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_ganesha_active} failed"
d2787b
 	fi
d2787b
 
d2787b
 	return ${OCF_SUCCESS}
d2787b
-- 
d2787b
1.8.3.1
d2787b