Blob Blame History Raw
From 8b3ff01d3fa96dddee783e840d331d788ab08d77 Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Wed, 19 Apr 2017 18:04:46 +0530
Subject: [PATCH 459/473] cluster/afr: Return the list of node_uuids for the
 subvolume

Backport of: https://review.gluster.org/17084

Problem:
AFR was returning the node uuid of the first node for every file if
the replica set was healthy, which was resulting in only one node
migrating all the files.

Fix:
With this patch AFR returns the list of node_uuids to the upper layer,
so that they can decide on which node to migrate which files, resulting
in improved performance. Ordering of node uuids will be maintained based
on the ordering of the bricks. If a brick is down, then the node uuid
for that will be set to all zeros.

Change-Id: I2f2c13879c33c7b8baf392433c72686242360765
BUG: 1315781
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/106641
---
 xlators/cluster/afr/src/afr-common.c     |  49 +++++++++++
 xlators/cluster/afr/src/afr-inode-read.c | 141 ++++++++++++++++++++-----------
 xlators/cluster/afr/src/afr.h            |   5 ++
 3 files changed, 145 insertions(+), 50 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index e9117b8..316cc40 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -5967,3 +5967,52 @@ mdata_unlock:
         return ret;
 
 }
+
+/*
+ * Concatenates the xattrs in local->replies separated by a delimiter.
+ */
+int
+afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,
+                                     char *buf, const char *default_str,
+                                     int32_t *serz_len, char delimiter)
+{
+        afr_private_t *priv      = NULL;
+        afr_local_t   *local     = NULL;
+        char          *xattr     = NULL;
+        int            i         = 0;
+        int            len       = 0;
+        int            ret       = -1;
+
+        priv = this->private;
+        local = frame->local;
+
+        for (i = 0; i < priv->child_count; i++) {
+                if (!local->replies[i].valid || local->replies[i].op_ret) {
+                        buf = strncat (buf, default_str, strlen (default_str));
+                        len += strlen (default_str);
+                        buf[len++] = delimiter;
+                        buf[len] = '\0';
+                } else {
+                        ret = dict_get_str (local->replies[i].xattr,
+                                            local->cont.getxattr.name, &xattr);
+                        if (ret) {
+                                gf_msg ("TEST", GF_LOG_ERROR, -ret,
+                                        AFR_MSG_DICT_GET_FAILED,
+                                        "Failed to get the node_uuid of brick "
+                                        "%d", i);
+                                goto out;
+                        }
+                        buf = strncat (buf, xattr, strlen (xattr));
+                        len += strlen (xattr);
+                        buf[len++] = delimiter;
+                        buf[len] = '\0';
+                }
+        }
+        buf[--len] = '\0'; /*remove the last delimiter*/
+        if (serz_len)
+                *serz_len = ++len;
+        ret = 0;
+
+out:
+        return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 2b369ca..20446d8 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -713,57 +713,110 @@ unwind:
         return ret;
 }
 
+
 /**
- * node-uuid cbk uses next child querying mechanism
+ * node-uuid cbk returns the list of node_uuids for the subvolume.
  */
 int32_t
-afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
-                            xlator_t *this, int32_t op_ret, int32_t op_errno,
-                            dict_t *dict, dict_t *xdata)
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                            int32_t op_ret, int32_t op_errno, dict_t *dict,
+                            dict_t *xdata)
 {
-        afr_private_t  *priv            = NULL;
-        afr_local_t    *local           = NULL;
-        xlator_t      **children        = NULL;
-        int             unwind          = 1;
-        int             curr_call_child = 0;
+        afr_local_t    *local          = NULL;
+        afr_private_t  *priv           = NULL;
+        int32_t         callcnt        = 0;
+        int             ret            = 0;
+        char           *xattr_serz     = NULL;
+        long            cky            = 0;
+        int32_t         tlen           = 0;
 
+        local = frame->local;
         priv = this->private;
-        children = priv->children;
+        cky = (long) cookie;
 
-        local = frame->local;
+        LOCK (&frame->lock);
+        {
+                callcnt = --local->call_count;
+                local->replies[cky].valid = 1;
+                local->replies[cky].op_ret = op_ret;
+                local->replies[cky].op_errno = op_errno;
 
-        if (op_ret == -1) { /** query the _next_ child */
+                if (op_ret < 0)
+                        goto unlock;
 
-                /**
-                 * _current_ becomes _next_
-                 * If done with all childs and yet no success; give up !
-                 */
-                curr_call_child = (int) ((long)cookie);
-                if (++curr_call_child == priv->child_count)
-                        goto unwind;
+                local->op_ret = 0;
 
-                gf_msg_debug (this->name, op_errno,
-                              "op_ret (-1): Re-querying afr-child (%d/%d)",
-                              curr_call_child, priv->child_count);
-
-                unwind = 0;
-                STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
-                                   (void *) (long) curr_call_child,
-                                   children[curr_call_child],
-                                   children[curr_call_child]->fops->getxattr,
-                                   &local->loc,
-                                   local->cont.getxattr.name,
-                                   NULL);
+                if (!local->xdata_rsp && xdata)
+                                local->xdata_rsp = dict_ref (xdata);
+                local->replies[cky].xattr = dict_ref (dict);
         }
 
- unwind:
-        if (unwind)
-                AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
-                                  NULL);
+unlock:
+        UNLOCK (&frame->lock);
 
-        return 0;
+        if (!callcnt) {
+
+                if (local->op_ret != 0) {
+                        /* All bricks gave an error. */
+                        local->op_errno = afr_final_errno (local, priv);
+                        goto unwind;
+                }
+
+                /*Since we store the UUID0_STR as node uuid for down bricks and
+                 *for non zero op_ret, assigning length to  priv->child_count
+                 *number of uuids*/
+                local->cont.getxattr.xattr_len = (strlen (UUID0_STR) + 2) *
+                                                  priv->child_count;
+
+                if (!local->dict)
+                        local->dict = dict_new ();
+                if (!local->dict) {
+                        local->op_ret = -1;
+                        local->op_errno = ENOMEM;
+                        goto unwind;
+                }
+
+                xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
+                                        sizeof (char), gf_common_mt_char);
+
+                if (!xattr_serz) {
+                        local->op_ret = -1;
+                        local->op_errno = ENOMEM;
+                        goto unwind;
+                }
+
+                ret = afr_serialize_xattrs_with_delimiter (frame, this,
+                                                           xattr_serz,
+                                                           UUID0_STR, &tlen,
+                                                           ' ');
+                if (ret) {
+                        local->op_ret = -1;
+                        local->op_errno = ENOMEM;
+                        goto unwind;
+                }
+                ret = dict_set_dynstr (local->dict, local->cont.getxattr.name,
+                                       xattr_serz);
+                if (ret) {
+                        gf_msg (this->name, GF_LOG_ERROR,
+                                -ret, AFR_MSG_DICT_SET_FAILED,
+                                "Cannot set node_uuid key in dict");
+                        local->op_ret = -1;
+                        local->op_errno = ENOMEM;
+                } else {
+                        local->op_ret = local->cont.getxattr.xattr_len - 1;
+                        local->op_errno = 0;
+                }
+
+unwind:
+                AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
+                                  local->op_errno, local->dict,
+                                  local->xdata_rsp);
+        }
+
+        return ret;
 }
 
+
 int32_t
 afr_getxattr_quota_size_cbk (call_frame_t *frame, void *cookie,
                              xlator_t *this, int32_t op_ret, int32_t op_errno,
@@ -1374,6 +1427,8 @@ afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
                 *cbk = afr_common_getxattr_stime_cbk;
         } else if (strcmp (name, QUOTA_SIZE_KEY) == 0) {
                 *cbk = afr_getxattr_quota_size_cbk;
+        } else if (!strcmp (name, GF_XATTR_NODE_UUID_KEY)) {
+                *cbk = afr_getxattr_node_uuid_cbk;
         } else {
                 is_spl = _gf_false;
         }
@@ -1489,9 +1544,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
               loc_t *loc, const char *name, dict_t *xdata)
 {
         afr_private_t           *priv         = NULL;
-        xlator_t                **children    = NULL;
         afr_local_t             *local        = NULL;
-        int                     i             = 0;
         int32_t                 op_errno      = 0;
         int                     ret           = -1;
         fop_getxattr_cbk_t      cbk           = NULL;
@@ -1503,8 +1556,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
 
         priv     = this->private;
 
-        children = priv->children;
-
         loc_copy (&local->loc, loc);
 
 	local->op = GF_FOP_GETXATTR;
@@ -1545,16 +1596,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
                 return 0;
         }
 
-        if (XATTR_IS_NODE_UUID (name)) {
-                i = 0;
-                STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
-                                   (void *) (long) i,
-                                   children[i],
-                                   children[i]->fops->getxattr,
-                                   loc, name, xdata);
-                return 0;
-        }
-
 no_name:
 
 	afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 92b54f8..05f8249 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1232,4 +1232,9 @@ afr_quorum_errno (afr_private_t *priv);
 gf_boolean_t
 afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this,
                            int event_gen1, int event_gen2);
+
+int
+afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,
+                                     char *buf, const char *default_str,
+                                     int32_t *serz_len, char delimiter);
 #endif /* __AFR_H__ */
-- 
1.8.3.1