From 3ca2971df5aeb8f67a39f5fa2866e68a54fdd9ce Mon Sep 17 00:00:00 2001 From: Dan Lambright Date: Thu, 11 Jun 2015 10:50:41 -0400 Subject: [PATCH 54/57] cluster/tier: account for reordered layouts This is a backport of fix 11092. > For a tiered volume the cold subvolume is always at a fixed > position in the graph. DHT's layout array, on the other hand, > may have the cold subvolume in either the first or second > index, therefore code cannot make any assumptions. The fix > searches the layout for the correct position dynamically > rather than statically. > The bug manifested itself in NFS, in which a newly attached > subvolume had not received an existing directory. This case > is a "stale entry" and marked as such in the layout for > that directory. The code did not see this, because it > looked at the wrong index in the layout array. > The fix also adds the check for decomissioned bricks, and > fixes a problem in detach tier related to starting the > rebalance process: we never received the right defrag > command and it did not get directed to the tier translator. > Change-Id: I77cdf9fbb0a777640c98003188565a79be9d0b56 > BUG: 1214289 > Signed-off-by: Dan Lambright > Signed-off-by: Dan Lambright Change-Id: I402105623c8fe0af416c4b7e22ed77f1b95d9847 BUG: 1228643 Signed-off-by: Dan Lambright Reviewed-on: https://code.engineering.redhat.com/gerrit/50560 Tested-by: Joseph Fernandes Reviewed-by: Joseph Fernandes Reviewed-by: Shyam Ranganathan --- xlators/cluster/dht/src/dht-common.c | 3 +- xlators/cluster/dht/src/tier.c | 43 ++++++++++++++++------- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 6 +++- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 6d70f82..8870a30 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -7089,7 +7089,8 @@ int32_t dht_migration_needed(xlator_t *this) defrag = conf->defrag; - if (defrag->cmd != GF_DEFRAG_CMD_START_TIER) + if ((defrag->cmd != GF_DEFRAG_CMD_START_TIER) && + (defrag->cmd != GF_DEFRAG_CMD_START_DETACH_TIER)) ret = 1; out: diff --git a/xlators/cluster/dht/src/tier.c b/xlators/cluster/dht/src/tier.c index 0a9c073..cef4f5c 100644 --- a/xlators/cluster/dht/src/tier.c +++ b/xlators/cluster/dht/src/tier.c @@ -916,7 +916,8 @@ tier_migration_needed (xlator_t *this) defrag = conf->defrag; - if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) + if ((defrag->cmd == GF_DEFRAG_CMD_START_TIER) || + (defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER)) ret = 1; out: return ret; @@ -958,9 +959,11 @@ tier_search (xlator_t *this, dht_layout_t *layout, const char *name) { xlator_t *subvol = NULL; void *value; - int search_first_subvol = 0; + int search_subvol = 0; dht_conf_t *conf = NULL; gf_defrag_info_t *defrag = NULL; + int layout_cold = 0; + int layout_hot = 1; GF_VALIDATE_OR_GOTO("tier", this, out); GF_VALIDATE_OR_GOTO(this->name, layout, out); @@ -969,28 +972,42 @@ tier_search (xlator_t *this, dht_layout_t *layout, const char *name) conf = this->private; + /* The first subvolume in the graph is always cold. */ + /* Find the position of the cold subvolume in the layout. */ + layout_cold = 0; + layout_hot = 1; + if (conf->subvolumes[0] != layout->list[0].xlator) { + layout_cold = 1; + layout_hot = 0; + } + + search_subvol = layout_hot; + defrag = conf->defrag; if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_DETACH_TIER) - search_first_subvol = 1; + search_subvol = layout_cold; + /* "decommission_subvols_cnt" can only be non-zero on detach. */ + /* This will change once brick add/remove is supported for */ + /* tiered volumes. */ + else if (conf->decommission_subvols_cnt) { + search_subvol = layout_cold; + } else if (!dict_get_ptr (this->options, "rule", &value) && - !strcmp(layout->list[0].xlator->name, value)) { - search_first_subvol = 1; + !strcmp(layout->list[layout_cold].xlator->name, value)) { + search_subvol = layout_cold; } - if ((layout->list[0].err > 0) && (layout->list[0].err != ENOTCONN)) - search_first_subvol = 0; + if ((layout->list[search_subvol].err > 0) && + (layout->list[search_subvol].err != ENOTCONN)) + search_subvol = layout_cold; - if (search_first_subvol) - subvol = layout->list[0].xlator; - else - subvol = layout->list[1].xlator; + subvol = layout->list[search_subvol].xlator; + out: -out: return subvol; } - dht_methods_t tier_methods = { .migration_get_dst_subvol = tier_migration_get_dst, .migration_other = tier_start, diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 0af86f5..aa3a6c9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -2039,6 +2039,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) char *brick_tmpstr = NULL; int start_remove = 0; uint32_t commit_hash = 0; + int defrag_cmd = 0; this = THIS; GF_ASSERT (this); @@ -2309,9 +2310,12 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) volinfo->rebal.commit_hash = commit_hash; } /* perform the rebalance operations */ + defrag_cmd = GF_DEFRAG_CMD_START_FORCE; + if (cmd == GF_OP_CMD_DETACH_START) + defrag_cmd = GF_DEFRAG_CMD_START_DETACH_TIER; ret = glusterd_handle_defrag_start (volinfo, err_str, sizeof (err_str), - GF_DEFRAG_CMD_START_FORCE, + defrag_cmd, glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); if (!ret) -- 1.7.1