Blob Blame History Raw
From 7cb9ed49baefdb4aa8ba5a8ca004eac84780766e Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Thu, 31 Mar 2016 14:40:09 +0530
Subject: [PATCH 084/104] cluster/afr: Fix spurious entries in heal info

Problem:
Locking schemes in afr-v1 were locking the directory/file completely during
self-heal. Newer schemes of locking don't require Full directory, file locking.
But afr-v2 still has compatibility code to work-well with older clients, where
in entry-self-heal it takes a lock on a special 256 character name which can't
be created on the fs. Similarly for data self-heal there used to be a lock on
(LLONG_MAX-2, 1). Old locking scheme requires heal info to take sh-domain locks
before examining heal-state.  If it doesn't take sh-domain locks, then there is
a possibility of heal-info hanging till self-heal completes because of
compatibility locks.  But the problem with heal-info taking sh-domain locks is
that if two heal-info or shd, heal-info try to inspect heal state in parallel
using trylocks on sh-domain, there is a possibility that both of them assuming
a heal is in progress. This was leading to spurious entries being shown in
heal-info.

Fix:
As long as there is afr-v1 way of locking, we can't fix this problem with
simple solutions.  If we know that the cluster is running newer versions of
locking schemes, in those cases we can give accurate information in heal-info.
So introduce a new option called 'locking-scheme' which if it is 'granular'
will give correct information in heal-info. Not only that, Extra network hops
for taking compatibility locks, sh-domain locks in heal info will not be
necessary anymore. Thus it improves performance.

 >BUG: 1322850
 >Change-Id: Ia563c5f096b5922009ff0ec1c42d969d55d827a3
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/13873
 >Smoke: Gluster Build System <jenkins@build.gluster.com>
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
 >Reviewed-by: Ashish Pandey <aspandey@redhat.com>
 >Reviewed-by: Anuradha Talur <atalur@redhat.com>
 >Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
 >(cherry picked from commit b6a0780d86e7c6afe7ae0d9a87e6fe5c62b4d792)

 >Change-Id: If7eee18843b48bbeff4c1355c102aa572b2c155a
 >BUG: 1294675
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Reviewed-on: http://review.gluster.org/14039
 >Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
 >Smoke: Gluster Build System <jenkins@build.gluster.com>
 >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
 >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>

BUG: 1311839
Change-Id: Ia7cc336a3dfced088b1a767cab4e03874e219876
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/72775
---
 tests/basic/afr/heal-info.t                     |   36 +++++++++++++++++++++++
 xlators/cluster/afr/src/afr-common.c            |   32 ++++++++++++++------
 xlators/cluster/afr/src/afr-self-heal-entry.c   |   17 +++++++---
 xlators/cluster/afr/src/afr.c                   |   14 ++++++++-
 xlators/cluster/afr/src/afr.h                   |    1 +
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |    7 ++++-
 6 files changed, 90 insertions(+), 17 deletions(-)
 create mode 100644 tests/basic/afr/heal-info.t

diff --git a/tests/basic/afr/heal-info.t b/tests/basic/afr/heal-info.t
new file mode 100644
index 0000000..b4da50b
--- /dev/null
+++ b/tests/basic/afr/heal-info.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+#Test that parallel heal-info command execution doesn't result in spurious
+#entries with locking-scheme granular
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function heal_info_to_file {
+        while [ -f $M0/a.txt ]; do
+                $CLI volume heal $V0 info | grep -i number | grep -v 0 >> $1
+        done
+}
+
+function write_and_del_file {
+        dd of=$M0/a.txt if=/dev/zero bs=1024k count=100
+        rm -f $M0/a.txt
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 locking-scheme granular
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/a.txt
+write_and_del_file &
+touch $B0/f1 $B0/f2
+heal_info_to_file $B0/f1 &
+heal_info_to_file $B0/f2 &
+wait
+EXPECT "^0$" echo $(wc -l $B0/f1 | awk '{print $1}')
+EXPECT "^0$" echo $(wc -l $B0/f2 | awk '{print $1}')
+
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index b799bff..c79ec06 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -4554,8 +4554,11 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
         afr_private_t   *priv = NULL;
         fd_t          *fd = NULL;
         struct afr_reply *locked_replies = NULL;
+        gf_boolean_t granular_locks = _gf_false;
 
         priv = this->private;
+	if (strcmp ("granular", priv->locking_scheme) == 0)
+	        granular_locks = _gf_true;
         locked_on = alloca0 (priv->child_count);
         data_lock = alloca0 (priv->child_count);
         sources = alloca0 (priv->child_count);
@@ -4576,10 +4579,12 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
 
         locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
 
-        ret = afr_selfheal_tryinodelk (frame, this, inode, priv->sh_domain,
-                                       0, 0, locked_on);
+        if (!granular_locks) {
+                ret = afr_selfheal_tryinodelk (frame, this, inode,
+                                              priv->sh_domain, 0, 0, locked_on);
+        }
         {
-                if (ret == 0) {
+                if (!granular_locks && (ret == 0)) {
                         ret = -afr_final_errno (frame->local, priv);
                         if (ret == 0)
                                 ret = -ENOTCONN;/* all invalid responses */
@@ -4606,8 +4611,9 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
                                         data_lock);
         }
 unlock:
-        afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0, 0,
-                                locked_on);
+        if (!granular_locks)
+                afr_selfheal_uninodelk (frame, this, inode, priv->sh_domain, 0,
+                                        0, locked_on);
 out:
         if (locked_replies)
                 afr_replies_wipe (locked_replies, priv->child_count);
@@ -4630,8 +4636,11 @@ afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
         unsigned char *sinks = NULL;
         unsigned char *healed_sinks = NULL;
         struct afr_reply *locked_replies = NULL;
+        gf_boolean_t granular_locks = _gf_false;
 
         priv = this->private;
+	if (strcmp ("granular", priv->locking_scheme) == 0)
+	        granular_locks = _gf_true;
         locked_on = alloca0 (priv->child_count);
         data_lock = alloca0 (priv->child_count);
         sources = alloca0 (priv->child_count);
@@ -4640,10 +4649,12 @@ afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
 
         locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
 
-        ret = afr_selfheal_tryentrylk (frame, this, inode, priv->sh_domain,
-                                       NULL, locked_on);
+        if (!granular_locks) {
+                ret = afr_selfheal_tryentrylk (frame, this, inode,
+                                              priv->sh_domain, NULL, locked_on);
+        }
         {
-                if (ret == 0) {
+                if (!granular_locks && ret == 0) {
                         ret = -afr_final_errno (frame->local, priv);
                         if (ret == 0)
                                 ret = -ENOTCONN;/* all invalid responses */
@@ -4673,8 +4684,9 @@ afr_selfheal_locked_entry_inspect (call_frame_t *frame, xlator_t *this,
                                         data_lock);
         }
 unlock:
-        afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL,
-                                locked_on);
+        if (!granular_locks)
+                afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain,
+                                        NULL, locked_on);
 out:
         if (locked_replies)
                 afr_replies_wipe (locked_replies, priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index a93c640..30db66b 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -749,8 +749,11 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
         unsigned char *long_name_locked = NULL;
 	fd_t *fd = NULL;
 	int ret = 0;
+	gf_boolean_t granular_locks = _gf_false;
 
 	priv = this->private;
+	if (strcmp ("granular", priv->locking_scheme) == 0)
+	        granular_locks = _gf_true;
 
 	fd = afr_selfheal_data_opendir (this, inode);
 	if (!fd)
@@ -777,10 +780,13 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
 			goto unlock;
 		}
 
-                ret = afr_selfheal_tryentrylk (frame, this, inode, this->name,
-                                               LONG_FILENAME, long_name_locked);
+                if (!granular_locks) {
+                        ret = afr_selfheal_tryentrylk (frame, this, inode,
+                                                      this->name, LONG_FILENAME,
+                                                      long_name_locked);
+                }
                 {
-                        if (ret < 1) {
+                        if (!granular_locks && ret < 1) {
                                 gf_msg_debug (this->name, 0, "%s: Skipping"
                                               " entry self-heal as only %d "
                                               "sub-volumes could be "
@@ -793,8 +799,9 @@ afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode)
                         }
                         ret = __afr_selfheal_entry (frame, this, fd, locked_on);
                 }
-                afr_selfheal_unentrylk (frame, this, inode, this->name,
-                                        LONG_FILENAME, long_name_locked);
+                if (!granular_locks)
+                        afr_selfheal_unentrylk (frame, this, inode, this->name,
+                                               LONG_FILENAME, long_name_locked);
 	}
 unlock:
 	afr_selfheal_unentrylk (frame, this, inode, priv->sh_domain, NULL, locked_on);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index c47e637..ad92eaf 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -186,7 +186,10 @@ reconfigure (xlator_t *this, dict_t *options)
                 priv->read_child = index;
         }
 
-        GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool, out);
+        GF_OPTION_RECONF ("pre-op-compat", priv->pre_op_compat, options, bool,
+                          out);
+        GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,
+                          out);
 
         GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
         GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
@@ -377,6 +380,7 @@ init (xlator_t *this)
         GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
 
         GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
+        GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out);
 
         GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
         GF_OPTION_INIT ("quorum-type", qtype, str, out);
@@ -862,5 +866,13 @@ struct volume_options options[] = {
            .description = "This option can be used to control number of heals"
                           " that can wait in SHD per subvolume",
         },
+        { .key = {"locking-scheme"},
+          .type = GF_OPTION_TYPE_STR,
+          .value = { "full", "granular"},
+          .default_value = "full",
+          .description = "If this option is set to granular, self-heal will "
+                         "stop being compatible with afr-v1, which helps afr "
+                         "be more granular while self-healing",
+        },
         { .key  = {NULL} },
 };
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 1a08ff5..d3c1e10 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -144,6 +144,7 @@ typedef struct _afr_private {
 	/* pump dependencies */
 	void                   *pump_private;
 	gf_boolean_t           use_afr_in_pump;
+	char                   *locking_scheme;
 } afr_private_t;
 
 
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 466e5cb..84bb441 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2717,7 +2717,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .op_version = GD_OP_VERSION_3_7_12,
           .flags      = OPT_FLAG_CLIENT_OPT
         },
-
+        { .key        = "cluster.locking-scheme",
+          .voltype    = "cluster/replicate",
+          .type       = DOC,
+          .op_version = GD_OP_VERSION_3_7_12,
+          .flags      = OPT_FLAG_CLIENT_OPT
+        },
         { .key         = NULL
         }
 };
-- 
1.7.1