21ab4e
From 8b3ff01d3fa96dddee783e840d331d788ab08d77 Mon Sep 17 00:00:00 2001
21ab4e
From: karthik-us <ksubrahm@redhat.com>
21ab4e
Date: Wed, 19 Apr 2017 18:04:46 +0530
21ab4e
Subject: [PATCH 459/473] cluster/afr: Return the list of node_uuids for the
21ab4e
 subvolume
21ab4e
21ab4e
Backport of: https://review.gluster.org/17084
21ab4e
21ab4e
Problem:
21ab4e
AFR was returning the node uuid of the first node for every file if
21ab4e
the replica set was healthy, which was resulting in only one node
21ab4e
migrating all the files.
21ab4e
21ab4e
Fix:
21ab4e
With this patch AFR returns the list of node_uuids to the upper layer,
21ab4e
so that they can decide on which node to migrate which files, resulting
21ab4e
in improved performance. Ordering of node uuids will be maintained based
21ab4e
on the ordering of the bricks. If a brick is down, then the node uuid
21ab4e
for that will be set to all zeros.
21ab4e
21ab4e
Change-Id: I2f2c13879c33c7b8baf392433c72686242360765
21ab4e
BUG: 1315781
21ab4e
Signed-off-by: karthik-us <ksubrahm@redhat.com>
21ab4e
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/106641
21ab4e
---
21ab4e
 xlators/cluster/afr/src/afr-common.c     |  49 +++++++++++
21ab4e
 xlators/cluster/afr/src/afr-inode-read.c | 141 ++++++++++++++++++++-----------
21ab4e
 xlators/cluster/afr/src/afr.h            |   5 ++
21ab4e
 3 files changed, 145 insertions(+), 50 deletions(-)
21ab4e
21ab4e
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
21ab4e
index e9117b8..316cc40 100644
21ab4e
--- a/xlators/cluster/afr/src/afr-common.c
21ab4e
+++ b/xlators/cluster/afr/src/afr-common.c
21ab4e
@@ -5967,3 +5967,52 @@ mdata_unlock:
21ab4e
         return ret;
21ab4e
 
21ab4e
 }
21ab4e
+
21ab4e
+/*
21ab4e
+ * Concatenates the xattrs in local->replies separated by a delimiter.
21ab4e
+ */
21ab4e
+int
21ab4e
+afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,
21ab4e
+                                     char *buf, const char *default_str,
21ab4e
+                                     int32_t *serz_len, char delimiter)
21ab4e
+{
21ab4e
+        afr_private_t *priv      = NULL;
21ab4e
+        afr_local_t   *local     = NULL;
21ab4e
+        char          *xattr     = NULL;
21ab4e
+        int            i         = 0;
21ab4e
+        int            len       = 0;
21ab4e
+        int            ret       = -1;
21ab4e
+
21ab4e
+        priv = this->private;
21ab4e
+        local = frame->local;
21ab4e
+
21ab4e
+        for (i = 0; i < priv->child_count; i++) {
21ab4e
+                if (!local->replies[i].valid || local->replies[i].op_ret) {
21ab4e
+                        buf = strncat (buf, default_str, strlen (default_str));
21ab4e
+                        len += strlen (default_str);
21ab4e
+                        buf[len++] = delimiter;
21ab4e
+                        buf[len] = '\0';
21ab4e
+                } else {
21ab4e
+                        ret = dict_get_str (local->replies[i].xattr,
21ab4e
+                                            local->cont.getxattr.name, &xattr);
21ab4e
+                        if (ret) {
21ab4e
+                                gf_msg ("TEST", GF_LOG_ERROR, -ret,
21ab4e
+                                        AFR_MSG_DICT_GET_FAILED,
21ab4e
+                                        "Failed to get the node_uuid of brick "
21ab4e
+                                        "%d", i);
21ab4e
+                                goto out;
21ab4e
+                        }
21ab4e
+                        buf = strncat (buf, xattr, strlen (xattr));
21ab4e
+                        len += strlen (xattr);
21ab4e
+                        buf[len++] = delimiter;
21ab4e
+                        buf[len] = '\0';
21ab4e
+                }
21ab4e
+        }
21ab4e
+        buf[--len] = '\0'; /*remove the last delimiter*/
21ab4e
+        if (serz_len)
21ab4e
+                *serz_len = ++len;
21ab4e
+        ret = 0;
21ab4e
+
21ab4e
+out:
21ab4e
+        return ret;
21ab4e
+}
21ab4e
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
21ab4e
index 2b369ca..20446d8 100644
21ab4e
--- a/xlators/cluster/afr/src/afr-inode-read.c
21ab4e
+++ b/xlators/cluster/afr/src/afr-inode-read.c
21ab4e
@@ -713,57 +713,110 @@ unwind:
21ab4e
         return ret;
21ab4e
 }
21ab4e
 
21ab4e
+
21ab4e
 /**
21ab4e
- * node-uuid cbk uses next child querying mechanism
21ab4e
+ * node-uuid cbk returns the list of node_uuids for the subvolume.
21ab4e
  */
21ab4e
 int32_t
21ab4e
-afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
21ab4e
-                            xlator_t *this, int32_t op_ret, int32_t op_errno,
21ab4e
-                            dict_t *dict, dict_t *xdata)
21ab4e
+afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
21ab4e
+                            int32_t op_ret, int32_t op_errno, dict_t *dict,
21ab4e
+                            dict_t *xdata)
21ab4e
 {
21ab4e
-        afr_private_t  *priv            = NULL;
21ab4e
-        afr_local_t    *local           = NULL;
21ab4e
-        xlator_t      **children        = NULL;
21ab4e
-        int             unwind          = 1;
21ab4e
-        int             curr_call_child = 0;
21ab4e
+        afr_local_t    *local          = NULL;
21ab4e
+        afr_private_t  *priv           = NULL;
21ab4e
+        int32_t         callcnt        = 0;
21ab4e
+        int             ret            = 0;
21ab4e
+        char           *xattr_serz     = NULL;
21ab4e
+        long            cky            = 0;
21ab4e
+        int32_t         tlen           = 0;
21ab4e
 
21ab4e
+        local = frame->local;
21ab4e
         priv = this->private;
21ab4e
-        children = priv->children;
21ab4e
+        cky = (long) cookie;
21ab4e
 
21ab4e
-        local = frame->local;
21ab4e
+        LOCK (&frame->lock);
21ab4e
+        {
21ab4e
+                callcnt = --local->call_count;
21ab4e
+                local->replies[cky].valid = 1;
21ab4e
+                local->replies[cky].op_ret = op_ret;
21ab4e
+                local->replies[cky].op_errno = op_errno;
21ab4e
 
21ab4e
-        if (op_ret == -1) { /** query the _next_ child */
21ab4e
+                if (op_ret < 0)
21ab4e
+                        goto unlock;
21ab4e
 
21ab4e
-                /**
21ab4e
-                 * _current_ becomes _next_
21ab4e
-                 * If done with all childs and yet no success; give up !
21ab4e
-                 */
21ab4e
-                curr_call_child = (int) ((long)cookie);
21ab4e
-                if (++curr_call_child == priv->child_count)
21ab4e
-                        goto unwind;
21ab4e
+                local->op_ret = 0;
21ab4e
 
21ab4e
-                gf_msg_debug (this->name, op_errno,
21ab4e
-                              "op_ret (-1): Re-querying afr-child (%d/%d)",
21ab4e
-                              curr_call_child, priv->child_count);
21ab4e
-
21ab4e
-                unwind = 0;
21ab4e
-                STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
21ab4e
-                                   (void *) (long) curr_call_child,
21ab4e
-                                   children[curr_call_child],
21ab4e
-                                   children[curr_call_child]->fops->getxattr,
21ab4e
-                                   &local->loc,
21ab4e
-                                   local->cont.getxattr.name,
21ab4e
-                                   NULL);
21ab4e
+                if (!local->xdata_rsp && xdata)
21ab4e
+                                local->xdata_rsp = dict_ref (xdata);
21ab4e
+                local->replies[cky].xattr = dict_ref (dict);
21ab4e
         }
21ab4e
 
21ab4e
- unwind:
21ab4e
-        if (unwind)
21ab4e
-                AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
21ab4e
-                                  NULL);
21ab4e
+unlock:
21ab4e
+        UNLOCK (&frame->lock);
21ab4e
 
21ab4e
-        return 0;
21ab4e
+        if (!callcnt) {
21ab4e
+
21ab4e
+                if (local->op_ret != 0) {
21ab4e
+                        /* All bricks gave an error. */
21ab4e
+                        local->op_errno = afr_final_errno (local, priv);
21ab4e
+                        goto unwind;
21ab4e
+                }
21ab4e
+
21ab4e
+                /*Since we store the UUID0_STR as node uuid for down bricks and
21ab4e
+                 *for non zero op_ret, assigning length to  priv->child_count
21ab4e
+                 *number of uuids*/
21ab4e
+                local->cont.getxattr.xattr_len = (strlen (UUID0_STR) + 2) *
21ab4e
+                                                  priv->child_count;
21ab4e
+
21ab4e
+                if (!local->dict)
21ab4e
+                        local->dict = dict_new ();
21ab4e
+                if (!local->dict) {
21ab4e
+                        local->op_ret = -1;
21ab4e
+                        local->op_errno = ENOMEM;
21ab4e
+                        goto unwind;
21ab4e
+                }
21ab4e
+
21ab4e
+                xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
21ab4e
+                                        sizeof (char), gf_common_mt_char);
21ab4e
+
21ab4e
+                if (!xattr_serz) {
21ab4e
+                        local->op_ret = -1;
21ab4e
+                        local->op_errno = ENOMEM;
21ab4e
+                        goto unwind;
21ab4e
+                }
21ab4e
+
21ab4e
+                ret = afr_serialize_xattrs_with_delimiter (frame, this,
21ab4e
+                                                           xattr_serz,
21ab4e
+                                                           UUID0_STR, &tlen,
21ab4e
+                                                           ' ');
21ab4e
+                if (ret) {
21ab4e
+                        local->op_ret = -1;
21ab4e
+                        local->op_errno = ENOMEM;
21ab4e
+                        goto unwind;
21ab4e
+                }
21ab4e
+                ret = dict_set_dynstr (local->dict, local->cont.getxattr.name,
21ab4e
+                                       xattr_serz);
21ab4e
+                if (ret) {
21ab4e
+                        gf_msg (this->name, GF_LOG_ERROR,
21ab4e
+                                -ret, AFR_MSG_DICT_SET_FAILED,
21ab4e
+                                "Cannot set node_uuid key in dict");
21ab4e
+                        local->op_ret = -1;
21ab4e
+                        local->op_errno = ENOMEM;
21ab4e
+                } else {
21ab4e
+                        local->op_ret = local->cont.getxattr.xattr_len - 1;
21ab4e
+                        local->op_errno = 0;
21ab4e
+                }
21ab4e
+
21ab4e
+unwind:
21ab4e
+                AFR_STACK_UNWIND (getxattr, frame, local->op_ret,
21ab4e
+                                  local->op_errno, local->dict,
21ab4e
+                                  local->xdata_rsp);
21ab4e
+        }
21ab4e
+
21ab4e
+        return ret;
21ab4e
 }
21ab4e
 
21ab4e
+
21ab4e
 int32_t
21ab4e
 afr_getxattr_quota_size_cbk (call_frame_t *frame, void *cookie,
21ab4e
                              xlator_t *this, int32_t op_ret, int32_t op_errno,
21ab4e
@@ -1374,6 +1427,8 @@ afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk,
21ab4e
                 *cbk = afr_common_getxattr_stime_cbk;
21ab4e
         } else if (strcmp (name, QUOTA_SIZE_KEY) == 0) {
21ab4e
                 *cbk = afr_getxattr_quota_size_cbk;
21ab4e
+        } else if (!strcmp (name, GF_XATTR_NODE_UUID_KEY)) {
21ab4e
+                *cbk = afr_getxattr_node_uuid_cbk;
21ab4e
         } else {
21ab4e
                 is_spl = _gf_false;
21ab4e
         }
21ab4e
@@ -1489,9 +1544,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
21ab4e
               loc_t *loc, const char *name, dict_t *xdata)
21ab4e
 {
21ab4e
         afr_private_t           *priv         = NULL;
21ab4e
-        xlator_t                **children    = NULL;
21ab4e
         afr_local_t             *local        = NULL;
21ab4e
-        int                     i             = 0;
21ab4e
         int32_t                 op_errno      = 0;
21ab4e
         int                     ret           = -1;
21ab4e
         fop_getxattr_cbk_t      cbk           = NULL;
21ab4e
@@ -1503,8 +1556,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
21ab4e
 
21ab4e
         priv     = this->private;
21ab4e
 
21ab4e
-        children = priv->children;
21ab4e
-
21ab4e
         loc_copy (&local->loc, loc);
21ab4e
 
21ab4e
 	local->op = GF_FOP_GETXATTR;
21ab4e
@@ -1545,16 +1596,6 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
21ab4e
                 return 0;
21ab4e
         }
21ab4e
 
21ab4e
-        if (XATTR_IS_NODE_UUID (name)) {
21ab4e
-                i = 0;
21ab4e
-                STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
21ab4e
-                                   (void *) (long) i,
21ab4e
-                                   children[i],
21ab4e
-                                   children[i]->fops->getxattr,
21ab4e
-                                   loc, name, xdata);
21ab4e
-                return 0;
21ab4e
-        }
21ab4e
-
21ab4e
 no_name:
21ab4e
 
21ab4e
 	afr_read_txn (frame, this, local->loc.inode, afr_getxattr_wind,
21ab4e
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
21ab4e
index 92b54f8..05f8249 100644
21ab4e
--- a/xlators/cluster/afr/src/afr.h
21ab4e
+++ b/xlators/cluster/afr/src/afr.h
21ab4e
@@ -1232,4 +1232,9 @@ afr_quorum_errno (afr_private_t *priv);
21ab4e
 gf_boolean_t
21ab4e
 afr_is_inode_refresh_reqd (inode_t *inode, xlator_t *this,
21ab4e
                            int event_gen1, int event_gen2);
21ab4e
+
21ab4e
+int
21ab4e
+afr_serialize_xattrs_with_delimiter (call_frame_t *frame, xlator_t *this,
21ab4e
+                                     char *buf, const char *default_str,
21ab4e
+                                     int32_t *serz_len, char delimiter);
21ab4e
 #endif /* __AFR_H__ */
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e