Blob Blame History Raw
From 2b95b8fad1c35aaa995c664cf668a9be0d8c621b Mon Sep 17 00:00:00 2001
From: Anuradha Talur <atalur@redhat.com>
Date: Wed, 16 Mar 2016 10:55:09 +0530
Subject: [PATCH 44/80] glusterd / afr : Enable auto heal when replica count increases

        Backport of http://review.gluster.org/13806

In replicate volumes, when a brick is added to a replicate
group, heal to the new brick should be triggered.
Also, the new brick should not be considered as source for
healing till it is up to date.

Previously, extended attributes had to be set manually on
the bricks for this to happen. This patch is part 1 patch
to automate this process.

        >Change-Id: Ica83592aab8edbe49e2bb9d8d4824cf5c76324b7
        >BUG: 1320020
        >Reviewed-on: http://review.gluster.org/13806
        >Smoke: Gluster Build System <jenkins@build.gluster.com>
        >Tested-by: Anuradha Talur <atalur@redhat.com>
        >NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
        >CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
        >Reviewed-by: Atin Mukherjee <amukherj@redhat.com>

Change-Id: I183fd491d94534cf72e0bcc691911555b350fa17
BUG: 1248998
Signed-off-by: Anuradha Talur <atalur@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/71424
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
 libglusterfs/src/globals.h                         |    2 +-
 libglusterfs/src/glusterfs.h                       |    1 +
 xlators/mgmt/glusterd/src/glusterd-brick-ops.c     |   86 ++++++++++---
 xlators/mgmt/glusterd/src/glusterd-replace-brick.c |   76 +----------
 xlators/mgmt/glusterd/src/glusterd-utils.c         |  141 ++++++++++++++++++++
 xlators/mgmt/glusterd/src/glusterd-utils.h         |   10 ++
 xlators/mgmt/glusterd/src/glusterd-volgen.c        |   52 +++++++
 xlators/mgmt/glusterd/src/glusterd-volgen.h        |    3 +
 8 files changed, 280 insertions(+), 91 deletions(-)

diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index ad0aef8..4fe4bcb 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -42,7 +42,7 @@
  */
 #define GD_OP_VERSION_MIN  1 /* MIN is the fresh start op-version, mostly
                                 should not change */
-#define GD_OP_VERSION_MAX  GD_OP_VERSION_3_7_7 /* MAX VERSION is the maximum
+#define GD_OP_VERSION_MAX  GD_OP_VERSION_3_7_10 /* MAX VERSION is the maximum
                                                   count in VME table, should
                                                   keep changing with
                                                   introduction of newer
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 81caf12..d1db087 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -176,6 +176,7 @@
 #define GF_AFR_SBRAIN_CHOICE "replica.split-brain-choice"
 #define GF_AFR_SPB_CHOICE_TIMEOUT "replica.split-brain-choice-timeout"
 #define GF_AFR_SBRAIN_RESOLVE "replica.split-brain-heal-finalize"
+#define GF_AFR_ADD_BRICK "trusted.add-brick"
 #define GF_AFR_REPLACE_BRICK "trusted.replace-brick"
 #define GF_AFR_DIRTY "trusted.afr.dirty"
 
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 859795b..540f8f7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -26,6 +26,7 @@
 #include "glusterd-messages.h"
 #include "glusterd-server-quorum.h"
 #include "run.h"
+#include "glusterd-volgen.h"
 #include <sys/signal.h>
 
 /* misc */
@@ -1238,6 +1239,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
         char                         *brick_mount_dir  = NULL;
         xlator_t                     *this           = NULL;
         glusterd_conf_t              *conf           = NULL;
+        gf_boolean_t                  is_valid_add_brick = _gf_false;
 
         this = THIS;
         GF_ASSERT (this);
@@ -1325,6 +1327,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
         /* Gets changed only if the options are given in add-brick cli */
         if (type)
                 volinfo->type = type;
+
         if (replica_count) {
                 volinfo->replica_count = replica_count;
         }
@@ -1360,6 +1363,27 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
                         CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT;
 #endif
 
+        /* This check needs to be added to distinguish between
+         * attach-tier commands and add-brick commands.
+         * When a tier is attached, adding is done via add-brick
+         * and setting of pending xattrs shouldn't be done for
+         * attach-tiers as they are virtually new volumes.
+         */
+        if (glusterd_is_volume_replicate (volinfo)) {
+                if (replica_count &&
+                    !dict_get (dict, "attach-tier") &&
+                    conf->op_version >= GD_OP_VERSION_3_7_10) {
+                        is_valid_add_brick = _gf_true;
+                        ret = generate_dummy_client_volfiles (volinfo);
+                        if (ret) {
+                                gf_msg (THIS->name, GF_LOG_ERROR, 0,
+                                        GD_MSG_VOLFILE_CREATE_FAIL,
+                                        "Failed to create volfile.");
+                                goto out;
+                                }
+                        }
+        }
+
         while (i <= count) {
                 ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
                                                               &brickinfo);
@@ -1391,6 +1415,16 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
                         }
                 }
 
+                /* if the volume is a replicate volume, do: */
+                if (is_valid_add_brick) {
+                        if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) {
+                                ret = glusterd_handle_replicate_brick_ops (
+                                                           volinfo, brickinfo,
+                                                           GD_OP_ADD_BRICK);
+                                if (ret < 0)
+                                        goto out;
+                        }
+                }
                 ret = glusterd_brick_start (volinfo, brickinfo,
                                             _gf_true);
                 if (ret)
@@ -1519,22 +1553,6 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
         conf = this->private;
         GF_ASSERT (conf);
 
-        ret = dict_get_int32 (dict, "replica-count", &replica_count);
-        if (ret) {
-                gf_msg_debug (THIS->name, 0,
-                        "Unable to get replica count");
-        }
-
-        if (replica_count > 0) {
-                ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS,
-                                        msg, sizeof(msg));
-                if (ret) {
-                        gf_msg (this->name, GF_LOG_ERROR, 0,
-                                GD_MSG_OP_VERSION_MISMATCH, "%s", msg);
-                        *op_errstr = gf_strdup (msg);
-                        goto out;
-                }
-        }
         ret = dict_get_str (dict, "volname", &volname);
         if (ret) {
                 gf_msg (THIS->name, GF_LOG_ERROR, errno,
@@ -1555,6 +1573,42 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
         if (ret)
                 goto out;
 
+        ret = dict_get_int32 (dict, "replica-count", &replica_count);
+        if (ret) {
+                gf_msg_debug (THIS->name, 0,
+                        "Unable to get replica count");
+        }
+
+        if (replica_count > 0) {
+                ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS,
+                                        msg, sizeof(msg));
+                if (ret) {
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
+                                GD_MSG_OP_VERSION_MISMATCH, "%s", msg);
+                        *op_errstr = gf_strdup (msg);
+                        goto out;
+                }
+        }
+
+        /* Do not allow add-brick for stopped volumes when replica-count
+         * is being increased.
+         */
+        if (glusterd_is_volume_replicate (volinfo)) {
+                if (conf->op_version >= GD_OP_VERSION_3_7_10 &&
+                    !dict_get (dict, "attach-tier") &&
+                    replica_count &&
+                    GLUSTERD_STATUS_STOPPED == volinfo->status) {
+                        ret = -1;
+                        snprintf (msg, sizeof (msg), " Volume must not be in"
+                                  " stopped state when replica-count needs to "
+                                  " be increased.");
+                        gf_msg (THIS->name, GF_LOG_ERROR, 0,
+                                GD_MSG_BRICK_ADD_FAIL, "%s", msg);
+                        *op_errstr = gf_strdup (msg);
+                        goto out;
+                }
+        }
+
         if (conf->op_version > GD_OP_VERSION_3_7_5 &&
             is_origin_glusterd (dict)) {
                 ret = glusterd_validate_quorum (this, GD_OP_ADD_BRICK, dict,
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index d0a1126..ecfdc0a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -44,78 +44,6 @@ glusterd_mgmt_v3_initiate_replace_brick_cmd_phases (rpcsvc_request_t *req,
                                                     glusterd_op_t op,
                                                     dict_t *dict);
 int
-glusterd_handle_replicate_replace_brick (glusterd_volinfo_t *volinfo,
-                                         glusterd_brickinfo_t *brickinfo)
-{
-        int32_t                    ret               = -1;
-        char                       tmpmount[]        = "/tmp/mntXXXXXX";
-        char                       logfile[PATH_MAX] = {0,};
-        int                        dirty[3]          = {0,};
-        runner_t                   runner            = {0};
-        glusterd_conf_t           *priv              = NULL;
-        char                      *pid               = NULL;
-
-        priv = THIS->private;
-
-        dirty[2] = hton32(1);
-
-        ret = sys_lsetxattr (brickinfo->path, GF_AFR_DIRTY, dirty,
-                             sizeof (dirty), 0);
-        if (ret == -1) {
-                gf_msg (THIS->name, GF_LOG_ERROR, errno,
-                        GD_MSG_SETXATTR_FAIL, "Failed to set extended"
-                        " attribute %s : %s.", GF_AFR_DIRTY, strerror (errno));
-                goto out;
-        }
-
-        if (mkdtemp (tmpmount) == NULL) {
-                gf_msg (THIS->name, GF_LOG_ERROR, errno,
-                        GD_MSG_DIR_OP_FAILED,
-                        "failed to create a temporary mount directory.");
-                ret = -1;
-                goto out;
-        }
-        snprintf (logfile, sizeof (logfile),
-                  DEFAULT_LOG_FILE_DIRECTORY"/%s-replace-brick-mount.log",
-                  volinfo->volname);
-
-        ret = gf_asprintf (&pid, "%d", GF_CLIENT_PID_SELF_HEALD);
-        if (ret < 0)
-                goto out;
-
-        runinit (&runner);
-        runner_add_args (&runner, SBIN_DIR"/glusterfs",
-                         "-s", "localhost",
-                         "--volfile-id", volinfo->volname,
-                         "--client-pid", pid,
-                         "-l", logfile, tmpmount, NULL);
-        synclock_unlock (&priv->big_lock);
-        ret = runner_run (&runner);
-
-        if (ret) {
-                runner_log (&runner, THIS->name, GF_LOG_ERROR, "mount command"
-                            "failed.");
-                goto lock;
-        }
-        ret = sys_lsetxattr (tmpmount, GF_AFR_REPLACE_BRICK,
-                             brickinfo->brick_id, sizeof (brickinfo->brick_id),
-                             0);
-        if (ret == -1)
-                gf_msg (THIS->name, GF_LOG_ERROR, errno,
-                        GD_MSG_SETXATTR_FAIL, "Failed to set extended"
-                        " attribute %s : %s", GF_AFR_REPLACE_BRICK,
-                        strerror (errno));
-        gf_umount_lazy (THIS->name, tmpmount, 1);
-lock:
-        synclock_lock (&priv->big_lock);
-out:
-        if (pid)
-                GF_FREE (pid);
-        gf_msg_debug ("glusterd", 0, "Returning with ret");
-        return ret;
-}
-
-int
 __glusterd_handle_replace_brick (rpcsvc_request_t *req)
 {
         int32_t                         ret = -1;
@@ -659,8 +587,8 @@ glusterd_op_perform_replace_brick (glusterd_volinfo_t  *volinfo,
         /* if the volume is a replicate volume, do: */
         if (glusterd_is_volume_replicate (volinfo)) {
                 if (!gf_uuid_compare (new_brickinfo->uuid, MY_UUID)) {
-                        ret = glusterd_handle_replicate_replace_brick
-                                  (volinfo, new_brickinfo);
+                        ret = glusterd_handle_replicate_brick_ops (volinfo,
+                                        new_brickinfo, GD_OP_REPLACE_BRICK);
                         if (ret < 0)
                                 goto out;
                 }
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index bc4f7ae..639404a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -7102,6 +7102,33 @@ void glusterd_update_tier_status (glusterd_volinfo_t *volinfo) {
 }
 
 int
+glusterd_get_dummy_client_filepath (char *filepath,
+                                    glusterd_volinfo_t *volinfo,
+                                    gf_transport_type type)
+{
+        int   ret             = 0;
+        char  path[PATH_MAX]  = {0,};
+
+        switch (type) {
+        case GF_TRANSPORT_TCP:
+        case GF_TRANSPORT_BOTH_TCP_RDMA:
+                snprintf (filepath, PATH_MAX,
+                          "/tmp/%s.tcp-fuse.vol", volinfo->volname);
+                break;
+
+        case GF_TRANSPORT_RDMA:
+                snprintf (filepath, PATH_MAX,
+                          "/tmp/%s.rdma-fuse.vol", volinfo->volname);
+                break;
+        default:
+                ret = -1;
+                break;
+        }
+
+        return ret;
+}
+
+int
 glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr,
                               size_t len, int cmd, defrag_cbk_fn_t cbk)
 {
@@ -11057,3 +11084,117 @@ gd_get_shd_key (int type)
         }
         return key;
 }
+
+int
+glusterd_handle_replicate_brick_ops (glusterd_volinfo_t *volinfo,
+                                     glusterd_brickinfo_t *brickinfo,
+                                     glusterd_op_t op)
+{
+        int32_t                    ret               = -1;
+        char                       tmpmount[]        = "/tmp/mntXXXXXX";
+        char                       logfile[PATH_MAX] = {0,};
+        int                        dirty[3]          = {0,};
+        runner_t                   runner            = {0};
+        glusterd_conf_t           *priv              = NULL;
+        char                      *pid               = NULL;
+        char                       vpath[PATH_MAX]   = {0,};
+        char                      *volfileserver     = NULL;
+
+        priv = THIS->private;
+        GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
+
+        dirty[2] = hton32(1);
+
+        ret = sys_lsetxattr (brickinfo->path, GF_AFR_DIRTY, dirty,
+                             sizeof (dirty), 0);
+        if (ret == -1) {
+                gf_msg (THIS->name, GF_LOG_ERROR, errno,
+                        GD_MSG_SETXATTR_FAIL, "Failed to set extended"
+                        " attribute %s : %s.", GF_AFR_DIRTY, strerror (errno));
+                goto out;
+        }
+
+        if (mkdtemp (tmpmount) == NULL) {
+                gf_msg (THIS->name, GF_LOG_ERROR, errno,
+                        GD_MSG_DIR_OP_FAILED,
+                        "failed to create a temporary mount directory.");
+                ret = -1;
+                goto out;
+        }
+
+        ret = gf_asprintf (&pid, "%d", GF_CLIENT_PID_SELF_HEALD);
+        if (ret < 0)
+                goto out;
+
+        switch (op) {
+        case GD_OP_REPLACE_BRICK:
+        if (dict_get_str (THIS->options, "transport.socket.bind-address",
+                          &volfileserver) != 0)
+                volfileserver = "localhost";
+
+                snprintf (logfile, sizeof (logfile),
+                          DEFAULT_LOG_FILE_DIRECTORY"/%s-replace-brick-mount.log",
+                          volinfo->volname);
+                if (!*logfile) {
+                        ret = -1;
+                        goto out;
+                }
+                runinit (&runner);
+                runner_add_args (&runner, SBIN_DIR"/glusterfs",
+                                 "-s", volfileserver,
+                                 "--volfile-id", volinfo->volname,
+                                 "--client-pid", pid,
+                                 "-l", logfile, tmpmount, NULL);
+                break;
+
+        case GD_OP_ADD_BRICK:
+                snprintf (logfile, sizeof (logfile),
+                          DEFAULT_LOG_FILE_DIRECTORY"/%s-add-brick-mount.log",
+                          volinfo->volname);
+                if (!*logfile) {
+                        ret = -1;
+                        goto out;
+                }
+                ret = glusterd_get_dummy_client_filepath (vpath, volinfo,
+                                                    volinfo->transport_type);
+                if (ret) {
+                        gf_log ("", GF_LOG_ERROR, "Failed to get "
+                                "volfile path");
+                        goto out;
+                }
+                runinit (&runner);
+                runner_add_args (&runner, SBIN_DIR"/glusterfs",
+                                 "--volfile", vpath,
+                                 "--client-pid", pid,
+                                 "-l", logfile, tmpmount, NULL);
+                break;
+        default:
+                break;
+        }
+        synclock_unlock (&priv->big_lock);
+        ret = runner_run (&runner);
+
+        if (ret) {
+                gf_log (THIS->name, GF_LOG_ERROR, "mount command"
+                        " failed.");
+                goto lock;
+        }
+        ret = sys_lsetxattr (tmpmount, (op == GD_OP_REPLACE_BRICK) ?
+                             GF_AFR_REPLACE_BRICK : GF_AFR_ADD_BRICK,
+                             brickinfo->brick_id, sizeof (brickinfo->brick_id),
+                             0);
+        if (ret == -1)
+                gf_msg (THIS->name, GF_LOG_ERROR, errno,
+                        GD_MSG_SETXATTR_FAIL, "Failed to set extended"
+                        " attribute %s : %s", (op == GD_OP_REPLACE_BRICK) ?
+                        GF_AFR_REPLACE_BRICK : GF_AFR_ADD_BRICK,
+                        strerror (errno));
+        gf_umount_lazy (THIS->name, tmpmount, 1);
+lock:
+        synclock_lock (&priv->big_lock);
+out:
+        if (pid)
+                GF_FREE (pid);
+        gf_msg_debug ("glusterd", 0, "Returning with ret");
+        return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
index e6380f5..6b74e90 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
@@ -695,4 +695,14 @@ int
 glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data,
                int (*fn) (glusterd_volinfo_t *, glusterd_brickinfo_t *,
                           dict_t *mod_dict, void *));
+
+int
+glusterd_get_dummy_client_filepath (char *filepath,
+                                    glusterd_volinfo_t *volinfo,
+                                    gf_transport_type type);
+
+int
+glusterd_handle_replicate_brick_ops (glusterd_volinfo_t *volinfo,
+                                     glusterd_brickinfo_t *brickinfo,
+                                     glusterd_op_t op);
 #endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 8a86ab5..086f053 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -5167,6 +5167,58 @@ enumerate_transport_reqs (gf_transport_type type, char **types)
 }
 
 int
+generate_dummy_client_volfiles (glusterd_volinfo_t *volinfo)
+{
+        int                i                  = 0;
+        int                ret                = -1;
+        char               filepath[PATH_MAX] = {0,};
+        char               *types[]           = {NULL, NULL, NULL};
+        dict_t             *dict              = NULL;
+        xlator_t           *this              = NULL;
+        gf_transport_type  type               = GF_TRANSPORT_TCP;
+
+        this = THIS;
+
+        enumerate_transport_reqs (volinfo->transport_type, types);
+        dict = dict_new ();
+        if (!dict)
+                goto out;
+        for (i = 0; types[i]; i++) {
+                memset (filepath, 0, sizeof (filepath));
+                ret = dict_set_str (dict, "client-transport-type", types[i]);
+                if (ret)
+                        goto out;
+                type = transport_str_to_type (types[i]);
+
+                ret = dict_set_uint32 (dict, "trusted-client", GF_CLIENT_OTHER);
+                if (ret)
+                        goto out;
+
+                ret = glusterd_get_dummy_client_filepath (filepath,
+                                                          volinfo, type);
+                if (ret) {
+                        gf_msg (this->name, GF_LOG_ERROR, EINVAL,
+                                GD_MSG_INVALID_ENTRY,
+                                "Received invalid transport-type.");
+                        goto out;
+                }
+
+                ret = generate_single_transport_client_volfile (volinfo,
+                                                                filepath,
+                                                                dict);
+                if (ret)
+                        goto out;
+        }
+
+out:
+        if (dict)
+                dict_unref (dict);
+
+        gf_msg_trace ("glusterd", 0, "Returning %d", ret);
+        return ret;
+}
+
+int
 generate_client_volfiles (glusterd_volinfo_t *volinfo,
                           glusterd_client_type_t client_type)
 {
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h
index f1dc823..c86a87b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.h
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h
@@ -293,4 +293,7 @@ glusterd_volopt_validate (glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
 gf_boolean_t
 gd_is_self_heal_enabled (glusterd_volinfo_t *volinfo, dict_t *dict);
 
+int
+generate_dummy_client_volfiles (glusterd_volinfo_t *volinfo);
+
 #endif
-- 
1.7.1