Blob Blame History Raw
From ed81e2ab50d4c7c0e473cede975dfa6a1d0dc19b Mon Sep 17 00:00:00 2001
From: Soumya Koduri <skoduri@redhat.com>
Date: Fri, 8 Jul 2016 12:30:25 +0530
Subject: [PATCH 160/162] commn-HA: Add portblock RA to tickle packets post failover(/back)

Portblock resource-agents are used to send tickle ACKs so as to
reset the oustanding tcp connections. This can be used to reduce
the time taken by the NFS clients to reconnect post IP
failover/failback.

Two new resource agents (nfs_block and nfs_unblock) of type
ocf:portblock with action block & unblock are created for each
Virtual-IP (cluster_ip-1). These resource agents along with cluster_ip-1
RA are grouped in the order of block->IP->unblock and also the entire
group maintains same colocation rules so that they reside on the same
node at any given point of time.

The contents of tickle_dir are of the following format -
* A file is created for each of the VIPs used in the ganesha cluster.
* Each of those files contain entries about clients connected
  as below:
SourceIP:port_num       DestinationIP:port_num

Hence when one server failsover, connections of the clients connected
to other VIPs are not affected.

Note: During testing I observed that tickle ACKs are sent during
failback but not during failover, though I/O successfully
resumed post failover.

Also added a dependency on portblock RA for glusterfs-ganesha package
as it may not be available (as part of resource-agents package) in
all the distributions.

This is backport of the below mainline bug -
  http://review.gluster.org/14878

> Change-Id: Icad6169449535f210d9abe302c2a6971a0a96d6f
> BUG: 1354439
> Signed-off-by: Soumya Koduri <skoduri@redhat.com>
> Reviewed-on: http://review.gluster.org/14878
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Niels de Vos <ndevos@redhat.com>

BUG: 1278336
Change-Id: Ia9f05b5fc6b6bf1ebf08d6402e9840b044c8c795
Signed-off-by: Soumya Koduri <skoduri@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/89512
Reviewed-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 extras/ganesha/scripts/ganesha-ha.sh |   38 +++++++++++++++++++++++++---------
 glusterfs.spec.in                    |    5 ++++
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
index de7c425..5162eb5 100644
--- a/extras/ganesha/scripts/ganesha-ha.sh
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -364,17 +364,17 @@ do_create_virt_ip_constraints()
 
     # first a constraint location rule that says the VIP must be where
     # there's a ganesha.nfsd running
-    pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 rule score=-INFINITY ganesha-active ne 1
+    pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1
     if [ $? -ne 0 ]; then
-        logger "warning: pcs constraint location ${primary}-cluster_ip-1 rule score=-INFINITY ganesha-active ne 1 failed"
+        logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed"
     fi
 
     # then a set of constraint location prefers to set the prefered order
     # for where a VIP should move
     while [[ ${1} ]]; do
-        pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 prefers ${1}=${weight}
+        pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight}
         if [ $? -ne 0 ]; then
-            logger "warning: pcs constraint location ${primary}-cluster_ip-1 prefers ${1}=${weight} failed"
+            logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed"
         fi
         weight=$(expr ${weight} + 1000)
         shift
@@ -384,9 +384,9 @@ do_create_virt_ip_constraints()
     # on Fedora setting appears to be additive, so to get the desired
     # value we adjust the weight
     # weight=$(expr ${weight} - 100)
-    pcs -f ${cibfile} constraint location ${primary}-cluster_ip-1 prefers ${primary}=${weight}
+    pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight}
     if [ $? -ne 0 ]; then
-        logger "warning: pcs constraint location ${primary}-cluster_ip-1 prefers ${primary}=${weight} failed"
+        logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed"
     fi
 }
 
@@ -481,9 +481,16 @@ setup_create_resources()
         eval tmp_ipaddr=\$${clean_name}
         ipaddr=${tmp_ipaddr//_/.}
 
-        pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s
+        pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+        portno=2049 action=block ip=${ipaddr} --group ${1}-group
+        if [ $? -ne 0 ]; then
+            logger "warning pcs resource create ${1}-nfs_block failed"
+        fi
+        pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+        cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
         if [ $? -ne 0 ]; then
-            logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} cidr_netmask=32 op monitor interval=15s failed"
+            logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+            cidr_netmask=32 op monitor interval=15s failed"
         fi
 
         pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
@@ -491,6 +498,14 @@ setup_create_resources()
             logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
         fi
 
+        pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+        portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+        tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1
+        if [ $? -ne 0 ]; then
+            logger "warning pcs resource create ${1}-nfs_unblock failed"
+        fi
+
+
         shift
     done
 
@@ -529,9 +544,9 @@ teardown_resources()
     fi
 
     while [[ ${1} ]]; do
-        pcs resource delete ${1}-cluster_ip-1
+        pcs resource delete ${1}-group
         if [ $? -ne 0 ]; then
-            logger "warning: pcs resource delete ${1}-cluster_ip-1 failed"
+            logger "warning: pcs resource delete ${1}-group failed"
         fi
         shift
     done
@@ -697,6 +712,9 @@ setup_state_volume()
         fi
 
 
+        if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then
+            mkdir ${mnt}/nfs-ganesha/tickle_dir
+        fi
         if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
             mkdir ${mnt}/nfs-ganesha/${dirname}
         fi
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 28517b9..29cc7ba 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -397,6 +397,8 @@ Requires:         nfs-ganesha-gluster, pcs, dbus
 %if ( 0%{?rhel} && 0%{?rhel} == 6 )
 Requires:         cman, pacemaker, corosync
 %endif
+# we need portblock resource-agent
+Requires:         %{_prefix}/lib/ocf/resource.d/portblock
 
 %description ganesha
 GlusterFS is a distributed file-system capable of scaling to several
@@ -2041,6 +2043,9 @@ end
 %endif
 
 %changelog
+* Fri Nov 11 2016 Soumya Koduri <skoduri@redhat.com>
+- Add dependency on portblock resource agent for ganesha package (#1278336)
+
 * Fri Oct 14 2016 Milind Changire <mchangir@redhat.com>
 - Changed pretrans scripts to use os.tmpname() for enhanced security
   for server builds only (#1362044)
-- 
1.7.1