From 3ca2971df5aeb8f67a39f5fa2866e68a54fdd9ce Mon Sep 17 00:00:00 2001
From: Dan Lambright <dlambrig@redhat.com>
Date: Thu, 11 Jun 2015 10:50:41 -0400
Subject: [PATCH 54/57] cluster/tier: account for reordered layouts
This is a backport of fix 11092.
> For a tiered volume the cold subvolume is always at a fixed
> position in the graph. DHT's layout array, on the other hand,
> may have the cold subvolume in either the first or second
> index, therefore code cannot make any assumptions. The fix
> searches the layout for the correct position dynamically
> rather than statically.
> The bug manifested itself in NFS, in which a newly attached
> subvolume had not received an existing directory. This case
> is a "stale entry" and marked as such in the layout for
> that directory. The code did not see this, because it
> looked at the wrong index in the layout array.
> The fix also adds the check for decomissioned bricks, and
> fixes a problem in detach tier related to starting the
> rebalance process: we never received the right defrag
> command and it did not get directed to the tier translator.
> Change-Id: I77cdf9fbb0a777640c98003188565a79be9d0b56
> BUG: 1214289
> Signed-off-by: Dan Lambright <dlambrig@redhat.com>
> Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Change-Id: I402105623c8fe0af416c4b7e22ed77f1b95d9847
BUG: 1228643
Signed-off-by: Dan Lambright <dlambrig@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/50560
Tested-by: Joseph Fernandes <josferna@redhat.com>
Reviewed-by: Joseph Fernandes <josferna@redhat.com>
Reviewed-by: Shyam Ranganathan <srangana@redhat.com>
---
xlators/cluster/dht/src/dht-common.c | 3 +-
xlators/cluster/dht/src/tier.c | 43 ++++++++++++++++-------
xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 6 +++-
3 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 6d70f82..8870a30 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -7089,7 +7089,8 @@ int32_t dht_migration_needed(xlator_t *this)
defrag = conf->defrag;
- if (defrag->cmd != GF_DEFRAG_CMD_START_TIER)
+ if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) &&
+ (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER))
ret = 1;
out:
diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c
index 0a9c073..cef4f5c 100644
--- a/xlators/cluster/dht/src/tier.c
+++ b/xlators/cluster/dht/src/tier.c
@@ -916,7 +916,8 @@ tier_migration_needed (xlator_t *this)
defrag = conf->defrag;
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER)
+ if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) ||
+ (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER))
ret = 1;
out:
return ret;
@@ -958,9 +959,11 @@ tier_search (xlator_t *this, dht_layout_t *layout, const char *name)
{
xlator_t *subvol = NULL;
void *value;
- int search_first_subvol = 0;
+ int search_subvol = 0;
dht_conf_t *conf = NULL;
gf_defrag_info_t *defrag = NULL;
+ int layout_cold = 0;
+ int layout_hot = 1;
GF_VALIDATE_OR_GOTO("tier", this, out);
GF_VALIDATE_OR_GOTO(this->name, layout, out);
@@ -969,28 +972,42 @@ tier_search (xlator_t *this, dht_layout_t *layout, const char *name)
conf = this->private;
+ /* The first subvolume in the graph is always cold. */
+ /* Find the position of the cold subvolume in the layout. */
+ layout_cold = 0;
+ layout_hot = 1;
+ if (conf->subvolumes[0] != layout->list[0].xlator) {
+ layout_cold = 1;
+ layout_hot = 0;
+ }
+
+ search_subvol = layout_hot;
+
defrag = conf->defrag;
if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)
- search_first_subvol = 1;
+ search_subvol = layout_cold;
+ /* "decommission_subvols_cnt" can only be non-zero on detach. */
+ /* This will change once brick add/remove is supported for */
+ /* tiered volumes. */
+ else if (conf->decommission_subvols_cnt) {
+ search_subvol = layout_cold;
+ }
else if (!dict_get_ptr (this->options, "rule", &value) &&
- !strcmp(layout->list[0].xlator->name, value)) {
- search_first_subvol = 1;
+ !strcmp(layout->list[layout_cold].xlator->name, value)) {
+ search_subvol = layout_cold;
}
- if ((layout->list[0].err > 0) && (layout->list[0].err != ENOTCONN))
- search_first_subvol = 0;
+ if ((layout->list[search_subvol].err > 0) &&
+ (layout->list[search_subvol].err != ENOTCONN))
+ search_subvol = layout_cold;
- if (search_first_subvol)
- subvol = layout->list[0].xlator;
- else
- subvol = layout->list[1].xlator;
+ subvol = layout->list[search_subvol].xlator;
+ out:
-out:
return subvol;
}
-
dht_methods_t tier_methods = {
.migration_get_dst_subvol = tier_migration_get_dst,
.migration_other = tier_start,
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 0af86f5..aa3a6c9 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -2039,6 +2039,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
char *brick_tmpstr = NULL;
int start_remove = 0;
uint32_t commit_hash = 0;
+ int defrag_cmd = 0;
this = THIS;
GF_ASSERT (this);
@@ -2309,9 +2310,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
volinfo->rebal.commit_hash = commit_hash;
}
/* perform the rebalance operations */
+ defrag_cmd = GF_DEFRAG_CMD_START_FORCE;
+ if (cmd == GF_OP_CMD_DETACH_START)
+ defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER;
ret = glusterd_handle_defrag_start
(volinfo, err_str, sizeof (err_str),
- GF_DEFRAG_CMD_START_FORCE,
+ defrag_cmd,
glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
if (!ret)
--
1.7.1