Blob Blame History Raw
From ede515d765e55744bcbaa199a9ed703d265aa88b Mon Sep 17 00:00:00 2001
From: Shyam <srangana@redhat.com>
Date: Fri, 15 May 2015 15:50:42 -0400
Subject: [PATCH 19/57] dht: Add lookup-optimize configuration option for DHT

Currently with commit 4eaaf5 a mixed version cluster would
have issues if lookup-uhashed is set to auto, as older clients
would fail to validate the layouts if newer clients (i.e 3.7 or
upwards) create directories. Also, in a mixed version cluster
rebalance daemon would set commit hash for some subvolumes and
not for the others.

This commit fixes this problem by moving the enabling of the
functionality introduced in the above mentioned commit to a
new dht option. This option also has a op_version of 3_7_1
thereby preventing it from being set in a mixed version
cluster. It brings in the following changes,
- Option can be set only if min version of the cluster is
3.7.1 or more
- Rebalance and mkdir update the layout with the commit hashes
only if this option is set, hence ensuring rebalance works in a
mixed version cluster, and also directories created by newer
clients do not cause layout errors when read by older clients
- This option also supersedes lookup-unhased, to enable the
optimization for lookups more deterministic and not conflict
with lookup-unhashed settings.

Option added is cluster.lookup-optimize, which is a boolean.

Usage: # gluster volume set VOLNAME cluster.lookup-optimize on

Change-Id: Ifd1d4ce3f6438fcbcd60ffbfdbfb647355ea1ae0
BUG: 1222053
Signed-off-by: Shyam <srangana@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/50238
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
 tests/features/unhashed-auto.t                  |   28 ++++++++++-
 xlators/cluster/dht/src/dht-common.c            |   62 +++++++++++++++++-----
 xlators/cluster/dht/src/dht-common.h            |    1 +
 xlators/cluster/dht/src/dht-rebalance.c         |    6 ++-
 xlators/cluster/dht/src/dht-shared.c            |   13 +++++
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |    5 ++
 6 files changed, 98 insertions(+), 17 deletions(-)

diff --git a/tests/features/unhashed-auto.t b/tests/features/unhashed-auto.t
index 97663c2..cba5b77 100755
--- a/tests/features/unhashed-auto.t
+++ b/tests/features/unhashed-auto.t
@@ -39,6 +39,11 @@ get_xattr () {
 	$cmd $1 | od -tx1 -An | tr -d ' '
 }
 
+get_xattr_hash () {
+        cmd="getfattr --absolute-names --only-values -n trusted.glusterfs.dht"
+        $cmd $1 | od -tx1 -An | awk '{printf("%s%s%s%s\n", $1, $2, $3, $4);}'
+}
+
 cleanup
 
 TEST glusterd
@@ -49,7 +54,7 @@ TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
 EXPECT "$V0" volinfo_field $V0 'Volume Name'
 EXPECT 'Created' volinfo_field $V0 'Status'
 
-TEST $CLI volume set $V0 cluster.lookup-unhashed auto
+TEST $CLI volume set $V0 cluster.lookup-optimize ON
 
 TEST $CLI volume start $V0
 EXPECT 'Started' volinfo_field $V0 'Status'
@@ -96,4 +101,25 @@ TEST wait_for_rebalance
 new_val=$(get_xattr $B0/${V0}1/dir)
 TEST [ ! x"$old_val" = x"$new_val" ]
 
+# Force an anomoly on an existing layout and heal it
+## The healed layout should not carry a commit-hash (or should carry 1 in the
+## commit-hash)
+TEST setfattr -x trusted.glusterfs.dht $B0/${V0}1/dir
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST [ -d $M0/dir ]
+new_hash=$(get_xattr_hash $B0/${V0}1/dir)
+TEST [ x"$new_hash" = x"00000001" ]
+new_hash=$(get_xattr_hash $B0/${V0}2/dir)
+TEST [ x"$new_hash" = x"00000001" ]
+
+# Unset the option and check that newly created directories get 1 in the
+# disk layout
+TEST $CLI volume reset $V0 cluster.lookup-optimize
+TEST mkdir $M0/dir1
+new_hash=$(get_xattr_hash $B0/${V0}1/dir1)
+TEST [ x"$new_hash" = x"00000001" ]
+new_hash=$(get_xattr_hash $B0/${V0}2/dir1)
+TEST [ x"$new_hash" = x"00000001" ]
+
+
 cleanup
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 8e78746..48a003c 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -1923,25 +1923,51 @@ dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                               "Entry %s missing on subvol %s",
                               loc->path, prev->this->name);
 
-                if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
-                        local->op_errno = ENOENT;
-                        dht_lookup_everywhere (frame, this, loc);
-                        return 0;
-                }
-                if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
-                    (loc->parent)) {
+                /* lookup-optimize supercedes lookup-unhashed settings,
+                 *   - so if it is set, do not process search_unhashed
+                 *   - except, in the case of rebalance deamon, we want to
+                 *     force the lookup_everywhere behavior */
+                if (!conf->defrag && conf->lookup_optimize && loc->parent) {
                         ret = dht_inode_ctx_layout_get (loc->parent, this,
                                                         &parent_layout);
-                        if (ret || !parent_layout)
-                                goto out;
-                        if (parent_layout->commit_hash
-                                  != conf->vol_commit_hash) {
-                                gf_log (this->name, GF_LOG_DEBUG,
-                                        "hashes don't match, do global lookup");
+                        if (ret || !parent_layout ||
+                            (parent_layout->commit_hash !=
+                             conf->vol_commit_hash)) {
+                                gf_msg_debug (this->name, 0,
+                                        "hashes don't match (ret - %d,"
+                                        " parent_layout - %p, parent_hash - %x,"
+                                        " vol_hash - %x), do global lookup",
+                                        ret, parent_layout,
+                                        (parent_layout ?
+                                         parent_layout->commit_hash : -1),
+                                        conf->vol_commit_hash);
+                                local->op_errno = ENOENT;
+                                dht_lookup_everywhere (frame, this, loc);
+                                return 0;
+                        }
+                } else {
+                        if (conf->search_unhashed ==
+                            GF_DHT_LOOKUP_UNHASHED_ON) {
                                 local->op_errno = ENOENT;
                                 dht_lookup_everywhere (frame, this, loc);
                                 return 0;
                         }
+
+                        if ((conf->search_unhashed ==
+                            GF_DHT_LOOKUP_UNHASHED_AUTO) &&
+                            (loc->parent)) {
+                                ret = dht_inode_ctx_layout_get (loc->parent,
+                                                                this,
+                                                                &parent_layout);
+                                if (ret || !parent_layout)
+                                        goto out;
+                                if (parent_layout->search_unhashed) {
+                                        local->op_errno = ENOENT;
+                                        dht_lookup_everywhere (frame, this,
+                                                               loc);
+                                        return 0;
+                                }
+                        }
                 }
         }
 
@@ -5800,7 +5826,15 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
                 goto err;
         }
 
-        local->layout->commit_hash = conf->vol_commit_hash;
+        /* set the newly created directory hash to the commit hash
+         * if the configuration option is set. If configuration option
+         * is not set, the older clients may still be connecting to the
+         * volume and hence we need to preserve the 1 in disk[0] part of the
+         * layout xattr */
+        if (conf->lookup_optimize)
+                local->layout->commit_hash = conf->vol_commit_hash;
+        else
+                local->layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
 
         STACK_WIND (frame, dht_mkdir_hashed_cbk,
                     hashed_subvol,
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 7a5d40f..4b6531c 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -398,6 +398,7 @@ struct dht_conf {
         dht_layout_t **file_layouts;
         dht_layout_t **dir_layouts;
         gf_boolean_t   search_unhashed;
+        gf_boolean_t   lookup_optimize;
         int            gen;
         dht_du_t      *du_stats;
         double         min_free_disk;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 3ab73d4..89cc3a8 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -2362,8 +2362,10 @@ gf_defrag_settle_hash (xlator_t *this, gf_defrag_info_t *defrag,
                 return -1;
         }
 
-        if (conf->local_subvols_cnt == 0) {
-                /* Commit hash updates are only done on local subvolumes
+        if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) {
+                /* Commit hash updates are only done on local subvolumes and
+                 * only when lookup optmization is needed (for older client
+                 * support)
                  */
                 return 0;
         }
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index a1f72a8..456d831 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -431,6 +431,9 @@ dht_reconfigure (xlator_t *this, dict_t *options)
                 }
         }
 
+        GF_OPTION_RECONF ("lookup-optimize", conf->lookup_optimize, options,
+                          bool, out);
+
 	GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
                           percent_or_size, out);
         /* option can be any one of percent or bytes */
@@ -667,6 +670,8 @@ dht_init (xlator_t *this)
                         conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
         }
 
+        GF_OPTION_INIT ("lookup-optimize", conf->lookup_optimize, bool, err);
+
         GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
                         err);
 
@@ -838,6 +843,14 @@ struct volume_options options[] = {
           "from the hash subvolume. If set to OFF, it does not do a lookup "
           "on the remaining subvolumes."
         },
+        { .key = {"lookup-optimize"},
+          .type = GF_OPTION_TYPE_BOOL,
+          .default_value = "off",
+          .description = "This option if set to ON enables the optimization "
+          "of -ve lookups, by not doing a lookup on non-hashed subvolumes for "
+          "files, in case the hashed subvolume does not return any result. "
+          "This option disregards the lookup-unhashed setting, when enabled."
+        },
         { .key  = {"min-free-disk"},
           .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
           .default_value = "10%",
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index b8ca6be..65cbfc0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -333,6 +333,11 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .op_version = 1,
           .flags      = OPT_FLAG_CLIENT_OPT
         },
+        { .key        = "cluster.lookup-optimize",
+          .voltype    = "cluster/distribute",
+          .op_version  = GD_OP_VERSION_3_7_2,
+          .flags      = OPT_FLAG_CLIENT_OPT
+        },
         { .key        = "cluster.min-free-disk",
           .voltype    = "cluster/distribute",
           .op_version = 1,
-- 
1.7.1