From 8b3ff01d3fa96dddee783e840d331d788ab08d77 Mon Sep 17 00:00:00 2001 From: karthik-us Date: Wed, 19 Apr 2017 18:04:46 +0530 Subject: [PATCH 459/473] cluster/afr: Return the list of node_uuids for the subvolume Backport of: https://review.gluster.org/17084 Problem: AFR was returning the node uuid of the first node for every file if the replica set was healthy, which was resulting in only one node migrating all the files. Fix: With this patch AFR returns the list of node_uuids to the upper layer, so that they can decide on which node to migrate which files, resulting in improved performance. Ordering of node uuids will be maintained based on the ordering of the bricks. If a brick is down, then the node uuid for that will be set to all zeros. Change-Id: I2f2c13879c33c7b8baf392433c72686242360765 BUG: 1315781 Signed-off-by: karthik-us Signed-off-by: Ravishankar N Reviewed-on: https://code.engineering.redhat.com/gerrit/106641 --- xlators/cluster/afr/src/afr-common.c | 49 +++++++++++ xlators/cluster/afr/src/afr-inode-read.c | 141 ++++++++++++++++++++----------- xlators/cluster/afr/src/afr.h | 5 ++ 3 files changed, 145 insertions(+), 50 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index e9117b8..316cc40 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5967,3 +5967,52 @@ mdata_unlock: return ret; } + +/* + * Concatenates the xattrs in local->replies separated by a delimiter. + */ +int +afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this, + char *buf, const char *default_str, + int32_t *serz_len, char delimiter) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + char *xattr = NULL; + int i = 0; + int len = 0; + int ret = -1; + + priv = this->private; + local = frame->local; + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid || local->replies[i].op_ret) { + buf = strncat (buf, default_str, strlen (default_str)); + len += strlen (default_str); + buf[len++] = delimiter; + buf[len] = '\0'; + } else { + ret = dict_get_str (local->replies[i].xattr, + local->cont.getxattr.name, &xattr); + if (ret) { + gf_msg ("TEST", GF_LOG_ERROR, -ret, + AFR_MSG_DICT_GET_FAILED, + "Failed to get the node_uuid of brick " + "%d", i); + goto out; + } + buf = strncat (buf, xattr, strlen (xattr)); + len += strlen (xattr); + buf[len++] = delimiter; + buf[len] = '\0'; + } + } + buf[--len] = '\0'; /*remove the last delimiter*/ + if (serz_len) + *serz_len = ++len; + ret = 0; + +out: + return ret; +} diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 2b369ca..20446d8 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -713,57 +713,110 @@ unwind: return ret; } + /** - * node-uuid cbk uses next child querying mechanism + * node-uuid cbk returns the list of node_uuids for the subvolume. */ int32_t -afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - dict_t *dict, dict_t *xdata) +afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - xlator_t **children = NULL; - int unwind = 1; - int curr_call_child = 0; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int32_t callcnt = 0; + int ret = 0; + char *xattr_serz = NULL; + long cky = 0; + int32_t tlen = 0; + local = frame->local; priv = this->private; - children = priv->children; + cky = (long) cookie; - local = frame->local; + LOCK (&frame->lock); + { + callcnt = --local->call_count; + local->replies[cky].valid = 1; + local->replies[cky].op_ret = op_ret; + local->replies[cky].op_errno = op_errno; - if (op_ret == -1) { /** query the _next_ child */ + if (op_ret < 0) + goto unlock; - /** - * _current_ becomes _next_ - * If done with all childs and yet no success; give up ! - */ - curr_call_child = (int) ((long)cookie); - if (++curr_call_child == priv->child_count) - goto unwind; + local->op_ret = 0; - gf_msg_debug (this->name, op_errno, - "op_ret (-1): Re-querying afr-child (%d/%d)", - curr_call_child, priv->child_count); - - unwind = 0; - STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk, - (void *) (long) curr_call_child, - children[curr_call_child], - children[curr_call_child]->fops->getxattr, - &local->loc, - local->cont.getxattr.name, - NULL); + if (!local->xdata_rsp && xdata) + local->xdata_rsp = dict_ref (xdata); + local->replies[cky].xattr = dict_ref (dict); } - unwind: - if (unwind) - AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, - NULL); +unlock: + UNLOCK (&frame->lock); - return 0; + if (!callcnt) { + + if (local->op_ret != 0) { + /* All bricks gave an error. */ + local->op_errno = afr_final_errno (local, priv); + goto unwind; + } + + /*Since we store the UUID0_STR as node uuid for down bricks and + *for non zero op_ret, assigning length to priv->child_count + *number of uuids*/ + local->cont.getxattr.xattr_len = (strlen (UUID0_STR) + 2) * + priv->child_count; + + if (!local->dict) + local->dict = dict_new (); + if (!local->dict) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + + xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len, + sizeof (char), gf_common_mt_char); + + if (!xattr_serz) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + + ret = afr_serialize_xattrs_with_delimiter (frame, this, + xattr_serz, + UUID0_STR, &tlen, + ' '); + if (ret) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unwind; + } + ret = dict_set_dynstr (local->dict, local->cont.getxattr.name, + xattr_serz); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, + -ret, AFR_MSG_DICT_SET_FAILED, + "Cannot set node_uuid key in dict"); + local->op_ret = -1; + local->op_errno = ENOMEM; + } else { + local->op_ret = local->cont.getxattr.xattr_len - 1; + local->op_errno = 0; + } + +unwind: + AFR_STACK_UNWIND (getxattr, frame, local->op_ret, + local->op_errno, local->dict, + local->xdata_rsp); + } + + return ret; } + int32_t afr_getxattr_quota_size_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @@ -1374,6 +1427,8 @@ afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk, *cbk = afr_common_getxattr_stime_cbk; } else if (strcmp (name, QUOTA_SIZE_KEY) == 0) { *cbk = afr_getxattr_quota_size_cbk; + } else if (!strcmp (name, GF_XATTR_NODE_UUID_KEY)) { + *cbk = afr_getxattr_node_uuid_cbk; } else { is_spl = _gf_false; } @@ -1489,9 +1544,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { afr_private_t *priv = NULL; - xlator_t **children = NULL; afr_local_t *local = NULL; - int i = 0; int32_t op_errno = 0; int ret = -1; fop_getxattr_cbk_t cbk = NULL; @@ -1503,8 +1556,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, priv = this->private; - children = priv->children; - loc_copy (&local->loc, loc); local->op = GF_FOP_GETXATTR; @@ -1545,16 +1596,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, return 0; } - if (XATTR_IS_NODE_UUID (name)) { - i = 0; - STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk, - (void *) (long) i, - children[i], - children[i]->fops->getxattr, - loc, name, xdata); - return 0; - } - no_name: afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 92b54f8..05f8249 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1232,4 +1232,9 @@ afr_quorum_errno (afr_private_t *priv); gf_boolean_t afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this, int event_gen1, int event_gen2); + +int +afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this, + char *buf, const char *default_str, + int32_t *serz_len, char delimiter); #endif /* __AFR_H__ */ -- 1.8.3.1