Blob Blame History Raw
From 3ca2971df5aeb8f67a39f5fa2866e68a54fdd9ce Mon Sep 17 00:00:00 2001
From: Dan Lambright <dlambrig@redhat.com>
Date: Thu, 11 Jun 2015 10:50:41 -0400
Subject: [PATCH 54/57] cluster/tier: account for reordered layouts

This is a backport of fix 11092.

> For a tiered volume the cold subvolume is always at a fixed
> position in the graph. DHT's layout array, on the other hand,
> may have the cold subvolume in either the first or second
> index, therefore code cannot make any assumptions. The fix
> searches the layout for the correct position dynamically
> rather than statically.

> The bug manifested itself in NFS, in which a newly attached
> subvolume had not received an existing directory. This case
> is a "stale entry" and marked as such in the layout for
> that directory.  The code did not see this, because it
> looked at the wrong index in the layout array.

> The fix also adds the check for decomissioned bricks, and
> fixes a problem in detach tier related to starting the
> rebalance process: we never received the right defrag
> command and it did not get directed to the tier translator.

> Change-Id: I77cdf9fbb0a777640c98003188565a79be9d0b56
> BUG: 1214289
> Signed-off-by: Dan Lambright <dlambrig@redhat.com>
> Signed-off-by: Dan Lambright <dlambrig@redhat.com>

Change-Id: I402105623c8fe0af416c4b7e22ed77f1b95d9847
BUG: 1228643
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/50560
Tested-by: Joseph Fernandes <josferna@redhat.com>
Reviewed-by: Joseph Fernandes <josferna@redhat.com>
Reviewed-by: Shyam Ranganathan <srangana@redhat.com>
---
 xlators/cluster/dht/src/dht-common.c           |    3 +-
 xlators/cluster/dht/src/tier.c                 |   43 ++++++++++++++++-------
 xlators/mgmt/glusterd/src/glusterd-brick-ops.c |    6 +++-
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 6d70f82..8870a30 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -7089,7 +7089,8 @@ int32_t dht_migration_needed(xlator_t *this)
 
         defrag = conf->defrag;
 
-        if (defrag->cmd != GF_DEFRAG_CMD_START_TIER)
+        if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
+            (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
                 ret = 1;
 
 out:
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 0a9c073..cef4f5c 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -916,7 +916,8 @@ tier_migration_needed (xlator_t *this)
 
         defrag = conf->defrag;
 
-        if (defrag->cmd == GF_DEFRAG_CMD_START_TIER)
+        if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
+            (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
                 ret = 1;
 out:
         return ret;
@@ -958,9 +959,11 @@ tier_search (xlator_t *this, dht_layout_t *layout, const char *name)
 {
         xlator_t                *subvol = NULL;
         void                    *value;
-        int                      search_first_subvol = 0;
+        int                      search_subvol = 0;
         dht_conf_t              *conf   = NULL;
         gf_defrag_info_t        *defrag = NULL;
+        int                      layout_cold = 0;
+        int                      layout_hot = 1;
 
         GF_VALIDATE_OR_GOTO("tier", this, out);
         GF_VALIDATE_OR_GOTO(this->name, layout, out);
@@ -969,28 +972,42 @@ tier_search (xlator_t *this, dht_layout_t *layout, const char *name)
 
         conf = this->private;
 
+        /* The first subvolume in the graph is always cold. */
+        /* Find the position of the cold subvolume in the layout. */
+        layout_cold = 0;
+        layout_hot = 1;
+        if (conf->subvolumes[0] != layout->list[0].xlator) {
+                layout_cold = 1;
+                layout_hot = 0;
+        }
+
+        search_subvol = layout_hot;
+
         defrag = conf->defrag;
         if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
-                search_first_subvol = 1;
+                search_subvol = layout_cold;
 
+        /* "decommission_subvols_cnt" can only be non-zero on detach. */
+        /* This will change once brick add/remove is supported for */
+        /* tiered volumes. */
+        else if (conf->decommission_subvols_cnt) {
+                search_subvol = layout_cold;
+        }
         else if (!dict_get_ptr (this->options, "rule", &value) &&
-                 !strcmp(layout->list[0].xlator->name, value)) {
-                search_first_subvol = 1;
+                 !strcmp(layout->list[layout_cold].xlator->name, value)) {
+                search_subvol = layout_cold;
         }
 
-        if ((layout->list[0].err > 0) && (layout->list[0].err != ENOTCONN))
-                search_first_subvol = 0;
+        if ((layout->list[search_subvol].err > 0) &&
+            (layout->list[search_subvol].err != ENOTCONN))
+                search_subvol = layout_cold;
 
-        if (search_first_subvol)
-                subvol = layout->list[0].xlator;
-        else
-                subvol = layout->list[1].xlator;
+        subvol = layout->list[search_subvol].xlator;
+ out:
 
-out:
         return subvol;
 }
 
-
 dht_methods_t tier_methods = {
         .migration_get_dst_subvol = tier_migration_get_dst,
         .migration_other = tier_start,
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 0af86f5..aa3a6c9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -2039,6 +2039,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
         char                    *brick_tmpstr  = NULL;
         int                      start_remove  = 0;
         uint32_t                 commit_hash   = 0;
+        int                      defrag_cmd    = 0;
 
         this = THIS;
         GF_ASSERT (this);
@@ -2309,9 +2310,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
                         volinfo->rebal.commit_hash = commit_hash;
                 }
                 /* perform the rebalance operations */
+                defrag_cmd = GF_DEFRAG_CMD_START_FORCE;
+                if (cmd == GF_OP_CMD_DETACH_START)
+                        defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
                 ret = glusterd_handle_defrag_start
                         (volinfo, err_str, sizeof (err_str),
-                         GF_DEFRAG_CMD_START_FORCE,
+                         defrag_cmd,
                          glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
 
                 if (!ret)
-- 
1.7.1