Blob Blame History Raw
From c8d1751014bc34d33dfe6db1820174ef7e733404 Mon Sep 17 00:00:00 2001
From: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Date: Fri, 5 Jun 2015 10:56:25 -0400
Subject: [PATCH 42/57]  nfs-ganesha: HA, fix race between setting grace and virt IP fail-over

upstream BZs: 1227028 (release-3.7), 1219485 (main), 1216039

Change-Id: I17eb2f7a5eae677123f12ab7eaf650fbf4d07682
BUG: 1224618
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/50134
Reviewed-by: Soumya Koduri <skoduri@redhat.com>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Tested-by: Niels de Vos <ndevos@redhat.com>
---
 extras/ganesha/ocf/ganesha_mon |   24 ++++++++++++++----------
 1 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
index 6ba7178..47943f8 100644
--- a/extras/ganesha/ocf/ganesha_mon
+++ b/extras/ganesha/ocf/ganesha_mon
@@ -36,6 +36,8 @@ else
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
 fi
 
+GRACE_DELAY=7
+
 ganesha_meta_data() {
         cat <<END
 <?xml version="1.0"?>
@@ -98,13 +100,7 @@ ganesha_mon_monitor()
 
 	if [ -e /var/run/ganesha.nfsd.pid -a \
 	     -d /proc/$(cat /var/run/ganesha.nfsd.pid) ]; then
-		# logger "note: ganesha_mon_monitor() pcs resource delete ${short_host}-dead_ip-1"
-		pcs resource delete ${short_host}-dead_ip-1
-		# if [ $? -ne 0 ]; then
-		# 	logger "warning: pcs resource delete ${short_host}-dead_ip-1"
-		# fi
-
-		sleep 1
+		( pcs resource delete ${short_host}-dead_ip-1 > /dev/null 2>&1 )
 
 		attrd_updater -n ganesha-active -v 1
 		if [ $? -ne 0 ]; then
@@ -112,13 +108,21 @@ ganesha_mon_monitor()
 		fi
 
 	else
-		# logger "note: ganesha_mon_montor(), pcs resource create ${short_host}-dead_ip-1 ocf:heartbeat:Dummy"
-		pcs resource create ${short_host}-dead_ip-1 ocf:heartbeat:Dummy
+		( pcs resource create ${short_host}-dead_ip-1 ocf:heartbeat:Dummy > /dev/null 2>&1 )
 		if [ $? -ne 0 ]; then
 			logger "warning: pcs resource create ${short_host}-dead_ip-1 ocf:heartbeat:Dummy failed"
 		fi
 
-		sleep 1
+		# The ${this-node}-dead_ip-1 resource is used to indicate
+		# that this ganesha.nfsd has died.
+		# VIP fail-over is then triggered by clearing the
+		# ganesha-active node attribute on this node.
+		#
+		# Meanwhile the ganesha_grace monitor() runs every 5
+		# seconds. We need to allow time for it to run and put
+		# the remaining ganesha.nfsds into grace before initiating
+		# the VIP fail-over.
+		sleep ${GRACE_DELAY}
 
 		attrd_updater -D -n ganesha-active
 		if [ $? -ne 0 ]; then
-- 
1.7.1