7c2869
From 359b99fc520d16bc2a2013b555fda774db03aa90 Mon Sep 17 00:00:00 2001
7c2869
From: Atin Mukherjee <amukherj@redhat.com>
7c2869
Date: Thu, 8 Feb 2018 09:09:00 +0530
7c2869
Subject: [PATCH 648/649] glusterd: import volumes in separate synctask
7c2869
7c2869
With brick multiplexing, to attach a brick to an existing brick process
7c2869
the prerequisite is to have the compatible brick to finish it's
7c2869
initialization and portmap sign in and hence the thread might have to go
7c2869
to a sleep and context switch the synctask to allow the brick process to
7c2869
communicate with glusterd. In normal code path, this works fine as
7c2869
glusterd_restart_bricks () is launched through a separate synctask.
7c2869
7c2869
In case there's a mismatch of the volume when glusterd restarts,
7c2869
glusterd_import_friend_volume is invoked and then it tries to call
7c2869
glusterd_start_bricks () from the main thread which eventually may land
7c2869
into the similar situation. Now since this is not done through a
7c2869
separate synctask, the 1st brick will never be able to get its turn to
7c2869
finish all of its handshaking and as a consequence to it, all the bricks
7c2869
will fail to get attached to it.
7c2869
7c2869
Solution : Execute import volume and glusterd restart bricks in separate
7c2869
synctask. Importing snaps had to be also done through synctask as
7c2869
there's a dependency of the parent volume need to be available for the
7c2869
importing snap functionality to work.
7c2869
7c2869
>upstream mainline patch : https://review.gluster.org/#/c/19357
7c2869
7c2869
Change-Id: I290b244d456afcc9b913ab30be4af040d340428c
7c2869
BUG: 1556670
7c2869
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
7c2869
Reviewed-on: https://code.engineering.redhat.com/gerrit/132724
7c2869
Tested-by: RHGS Build Bot <nigelb@redhat.com>
7c2869
---
7c2869
 xlators/mgmt/glusterd/src/glusterd-op-sm.c         |   9 +-
7c2869
 xlators/mgmt/glusterd/src/glusterd-op-sm.h         |   2 +
7c2869
 .../mgmt/glusterd/src/glusterd-snapshot-utils.c    | 226 +++++++++++++++++----
7c2869
 xlators/mgmt/glusterd/src/glusterd-utils.c         | 166 ++++++++++++---
7c2869
 xlators/mgmt/glusterd/src/glusterd-utils.h         |   4 +
7c2869
 xlators/mgmt/glusterd/src/glusterd.h               |   3 +-
7c2869
 6 files changed, 340 insertions(+), 70 deletions(-)
7c2869
7c2869
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
7c2869
index f034ae8..ab2886e 100644
7c2869
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
7c2869
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
7c2869
@@ -2339,6 +2339,7 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
7c2869
 
7c2869
 int
7c2869
 glusterd_start_bricks (glusterd_volinfo_t *volinfo)
7c2869
+
7c2869
 {
7c2869
         int                      ret            = -1;
7c2869
         glusterd_brickinfo_t    *brickinfo      = NULL;
7c2869
@@ -2366,14 +2367,6 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
7c2869
                                 goto out;
7c2869
                         }
7c2869
                 }
7c2869
-
7c2869
-        }
7c2869
-        ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
7c2869
-        if (ret) {
7c2869
-                gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
7c2869
-                        "Failed to write volinfo for volume %s",
7c2869
-                        volinfo->volname);
7c2869
-                goto out;
7c2869
         }
7c2869
         ret = 0;
7c2869
 out:
7c2869
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
7c2869
index 571905f..9f857b6 100644
7c2869
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
7c2869
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
7c2869
@@ -269,8 +269,10 @@ glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size,
7c2869
                 int32_t blk_count, double *throughput, double *time);
7c2869
 gf_boolean_t
7c2869
 glusterd_is_volume_started (glusterd_volinfo_t  *volinfo);
7c2869
+
7c2869
 int
7c2869
 glusterd_start_bricks (glusterd_volinfo_t *volinfo);
7c2869
+
7c2869
 gf_boolean_t
7c2869
 glusterd_are_all_volumes_stopped ();
7c2869
 int
7c2869
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
7c2869
index 3fe424a..e32fb29 100644
7c2869
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
7c2869
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
7c2869
@@ -1738,8 +1738,11 @@ out:
7c2869
  * state, i.e either both would be hosting bricks or both would not be hosting
7c2869
  * bricks, then a decision can't be taken and a peer-reject will happen.
7c2869
  *
7c2869
- * glusterd_compare_and_update_snap() implements the following algorithm to
7c2869
- * perform the above task:
7c2869
+ * glusterd_compare_snap()  & glusterd_update_snaps () implement the following
7c2869
+ * algorithm to perform the above task. Please note the former function tries to
7c2869
+ * iterate over the snaps one at a time and updating the relevant fields in the
7c2869
+ * dictionary and then glusterd_update_snaps () go over all the snaps and update
7c2869
+ * them at one go as part of a synctask.
7c2869
  * Step  1: Start.
7c2869
  * Step  2: Check if the peer is missing a delete or restore on the said snap.
7c2869
  *          If yes, goto step 6.
7c2869
@@ -1764,21 +1767,18 @@ out:
7c2869
  *
7c2869
  */
7c2869
 int32_t
7c2869
-glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
7c2869
-                                  char *peername, uuid_t peerid)
7c2869
+glusterd_compare_snap (dict_t *peer_data, int32_t snap_count,
7c2869
+                       char *peername, uuid_t peerid)
7c2869
 {
7c2869
         char              buf[NAME_MAX]    = "";
7c2869
         char              prefix[NAME_MAX] = "";
7c2869
         char             *peer_snap_name   = NULL;
7c2869
         char             *peer_snap_id     = NULL;
7c2869
-        dict_t           *dict             = NULL;
7c2869
         glusterd_snap_t  *snap             = NULL;
7c2869
         gf_boolean_t      conflict         = _gf_false;
7c2869
         gf_boolean_t      is_local         = _gf_false;
7c2869
         gf_boolean_t      is_hosted        = _gf_false;
7c2869
         gf_boolean_t      missed_delete    = _gf_false;
7c2869
-        gf_boolean_t      remove_lvm       = _gf_true;
7c2869
-
7c2869
         int32_t           ret              = -1;
7c2869
         int32_t           volcount         = 0;
7c2869
         xlator_t         *this             = NULL;
7c2869
@@ -1790,6 +1790,14 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
7c2869
 
7c2869
         snprintf (prefix, sizeof(prefix), "snap%d", snap_count);
7c2869
 
7c2869
+        ret = dict_set_uint32 (peer_data, buf, 0);
7c2869
+        snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
7c2869
+        ret = dict_set_uint32 (peer_data, buf, 0);
7c2869
+        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
7c2869
+        ret = dict_set_uint32 (peer_data, buf, 0);
7c2869
+        snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
7c2869
+        ret = dict_set_uint32 (peer_data, buf, 0);
7c2869
+
7c2869
         /* Fetch the peer's snapname */
7c2869
         snprintf (buf, sizeof(buf), "%s.snapname", prefix);
7c2869
         ret = dict_get_str (peer_data, buf, &peer_snap_name);
7c2869
@@ -1846,7 +1854,10 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
7c2869
                         /* Peer has snap with the same snapname
7c2869
                         * and snap_id, which local node doesn't have.
7c2869
                         */
7c2869
-                        goto accept_peer_data;
7c2869
+                        snprintf (buf, sizeof(buf), "%s.accept_peer_data",
7c2869
+                                  prefix);
7c2869
+                        ret = dict_set_uint32 (peer_data, buf, 1);
7c2869
+                        goto out;
7c2869
                 }
7c2869
                 /* Peer has snap with the same snapname
7c2869
                  * and snap_id. Now check if peer has a
7c2869
@@ -1873,12 +1884,15 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
7c2869
                          * When removing data from local node, make sure
7c2869
                          * we are not removing backend lvm of the snap.
7c2869
                          */
7c2869
-                        remove_lvm = _gf_false;
7c2869
-                        goto remove_my_data;
7c2869
+                        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
7c2869
+                        ret = dict_set_uint32 (peer_data, buf, 0);
7c2869
+                        snprintf (buf, sizeof(buf), "%s.remove_my_data",
7c2869
+                                  prefix);
7c2869
+                        ret = dict_set_uint32 (peer_data, buf, 1);
7c2869
                 } else {
7c2869
                         ret = 0;
7c2869
-                        goto out;
7c2869
                 }
7c2869
+                goto out;
7c2869
         }
7c2869
 
7c2869
         /* There is a conflict. Check if the current node is
7c2869
@@ -1930,50 +1944,176 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
7c2869
          * And local node isn't. Hence remove local node's
7c2869
          * data and accept peer data
7c2869
          */
7c2869
-
7c2869
         gf_msg_debug (this->name, 0, "Peer hosts bricks for conflicting "
7c2869
                 "snap(%s). Removing local data. Accepting peer data.",
7c2869
                 peer_snap_name);
7c2869
-        remove_lvm = _gf_true;
7c2869
+        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
7c2869
+        ret = dict_set_uint32 (peer_data, buf, 1);
7c2869
+        snprintf (buf, sizeof(buf), "%s.remove_my_data",
7c2869
+                  prefix);
7c2869
+        ret = dict_set_uint32 (peer_data, buf, 1);
7c2869
+        snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
7c2869
+        ret = dict_set_uint32 (peer_data, buf, 1);
7c2869
 
7c2869
-remove_my_data:
7c2869
+out:
7c2869
+        gf_msg_trace (this->name, 0, "Returning %d", ret);
7c2869
+        return ret;
7c2869
+}
7c2869
 
7c2869
-        dict = dict_new();
7c2869
-        if (!dict) {
7c2869
-                gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
-                        GD_MSG_DICT_CREATE_FAIL,
7c2869
-                        "Unable to create dict");
7c2869
-                ret = -1;
7c2869
-                goto out;
7c2869
+int32_t
7c2869
+glusterd_update_snaps_synctask (void *opaque)
7c2869
+{
7c2869
+        int32_t           ret              = -1;
7c2869
+        int32_t           snap_count       = 0;
7c2869
+        int               i                = 1;
7c2869
+        xlator_t         *this             = NULL;
7c2869
+        dict_t           *peer_data        = NULL;
7c2869
+        char              buf[NAME_MAX]    = "";
7c2869
+        char              prefix[NAME_MAX] = "";
7c2869
+        char             *peer_snap_name   = NULL;
7c2869
+        char             *peer_snap_id     = NULL;
7c2869
+        char             *peername         = NULL;
7c2869
+        gf_boolean_t      remove_lvm       = _gf_false;
7c2869
+        gf_boolean_t      remove_my_data   = _gf_false;
7c2869
+        gf_boolean_t      accept_peer_data = _gf_false;
7c2869
+        int32_t           val              = 0;
7c2869
+        glusterd_snap_t  *snap             = NULL;
7c2869
+        dict_t           *dict             = NULL;
7c2869
+        glusterd_conf_t  *conf             = NULL;
7c2869
+
7c2869
+        this = THIS;
7c2869
+        GF_ASSERT (this);
7c2869
+
7c2869
+        conf = this->private;
7c2869
+        GF_ASSERT (conf);
7c2869
+
7c2869
+        peer_data = (dict_t *)opaque;
7c2869
+        GF_ASSERT (peer_data);
7c2869
+
7c2869
+        synclock_lock (&conf->big_lock);
7c2869
+
7c2869
+        while (conf->restart_bricks) {
7c2869
+                synclock_unlock (&conf->big_lock);
7c2869
+                sleep (2);
7c2869
+                synclock_lock (&conf->big_lock);
7c2869
         }
7c2869
+        conf->restart_bricks = _gf_true;
7c2869
 
7c2869
-        ret = glusterd_snap_remove (dict, snap, remove_lvm, _gf_false,
7c2869
-                                    _gf_false);
7c2869
+        ret = dict_get_int32 (peer_data, "snap_count", &snap_count);
7c2869
         if (ret) {
7c2869
                 gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
-                        GD_MSG_SNAP_REMOVE_FAIL,
7c2869
-                        "Failed to remove snap %s", snap->snapname);
7c2869
+                        GD_MSG_DICT_GET_FAILED, "Failed to fetch snap_count");
7c2869
                 goto out;
7c2869
         }
7c2869
-
7c2869
-accept_peer_data:
7c2869
-
7c2869
-        /* Accept Peer Data */
7c2869
-        ret = glusterd_import_friend_snap (peer_data, snap_count,
7c2869
-                                           peer_snap_name, peer_snap_id);
7c2869
+        ret = dict_get_str (peer_data, "peername", &peername);
7c2869
         if (ret) {
7c2869
                 gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
-                        GD_MSG_SNAP_IMPORT_FAIL,
7c2869
-                        "Failed to import snap %s from peer %s",
7c2869
-                        peer_snap_name, peername);
7c2869
+                        GD_MSG_DICT_GET_FAILED, "Failed to fetch peername");
7c2869
                 goto out;
7c2869
         }
7c2869
 
7c2869
+        for (i = 1; i <= snap_count; i++) {
7c2869
+                snprintf (prefix, sizeof(prefix), "snap%d", i);
7c2869
+
7c2869
+                /* Fetch the peer's snapname */
7c2869
+                snprintf (buf, sizeof(buf), "%s.snapname", prefix);
7c2869
+                ret = dict_get_str (peer_data, buf, &peer_snap_name);
7c2869
+                if (ret) {
7c2869
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
+                                GD_MSG_DICT_GET_FAILED,
7c2869
+                                "Unable to fetch snapname from peer: %s",
7c2869
+                                peername);
7c2869
+                        goto out;
7c2869
+                }
7c2869
+
7c2869
+                /* Fetch the peer's snap_id */
7c2869
+                snprintf (buf, sizeof(buf), "%s.snap_id", prefix);
7c2869
+                ret = dict_get_str (peer_data, buf, &peer_snap_id);
7c2869
+                if (ret) {
7c2869
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
+                                GD_MSG_DICT_GET_FAILED,
7c2869
+                                "Unable to fetch snap_id from peer: %s",
7c2869
+                                peername);
7c2869
+                        goto out;
7c2869
+                }
7c2869
+
7c2869
+                /* remove_my_data */
7c2869
+                snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
7c2869
+                ret = dict_get_int32 (peer_data, buf, &val;;
7c2869
+                if (val)
7c2869
+                        remove_my_data = _gf_true;
7c2869
+                else
7c2869
+                        remove_my_data = _gf_false;
7c2869
+
7c2869
+                if (remove_my_data) {
7c2869
+                        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
7c2869
+                        ret = dict_get_int32 (peer_data, buf, &val;;
7c2869
+                        if (val)
7c2869
+                                remove_lvm = _gf_true;
7c2869
+                        else
7c2869
+                                remove_lvm = _gf_false;
7c2869
+
7c2869
+                        dict = dict_new();
7c2869
+                        if (!dict) {
7c2869
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
+                                        GD_MSG_DICT_CREATE_FAIL,
7c2869
+                                        "Unable to create dict");
7c2869
+                                ret = -1;
7c2869
+                                goto out;
7c2869
+                        }
7c2869
+                        snap = glusterd_find_snap_by_name (peer_snap_name);
7c2869
+                        if (!snap) {
7c2869
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
+                                        GD_MSG_MISSED_SNAP_PRESENT,
7c2869
+                                        "Snapshot %s from peer %s missing on "
7c2869
+                                        "localhost", peer_snap_name,
7c2869
+                                        peername);
7c2869
+                                ret = -1;
7c2869
+                                goto out;
7c2869
+                        }
7c2869
+
7c2869
+                        ret = glusterd_snap_remove (dict, snap, remove_lvm,
7c2869
+                                                    _gf_false, _gf_false);
7c2869
+                        if (ret) {
7c2869
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
+                                        GD_MSG_SNAP_REMOVE_FAIL,
7c2869
+                                        "Failed to remove snap %s",
7c2869
+                                        snap->snapname);
7c2869
+                                goto out;
7c2869
+                        }
7c2869
+                        if (dict)
7c2869
+                                dict_unref (dict);
7c2869
+                }
7c2869
+                snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
7c2869
+                ret = dict_get_int32 (peer_data, buf, &val;;
7c2869
+                if (val)
7c2869
+                        accept_peer_data = _gf_true;
7c2869
+                else
7c2869
+                        accept_peer_data = _gf_false;
7c2869
+
7c2869
+                if (accept_peer_data) {
7c2869
+                        /* Accept Peer Data */
7c2869
+                        ret = glusterd_import_friend_snap (peer_data,
7c2869
+                                                           i,
7c2869
+                                                           peer_snap_name,
7c2869
+                                                           peer_snap_id);
7c2869
+                        if (ret) {
7c2869
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
+                                        GD_MSG_SNAP_IMPORT_FAIL,
7c2869
+                                        "Failed to import snap %s from peer %s",
7c2869
+                                        peer_snap_name, peername);
7c2869
+                                goto out;
7c2869
+                        }
7c2869
+                }
7c2869
+        }
7c2869
+
7c2869
 out:
7c2869
+        if (peer_data)
7c2869
+                dict_unref (peer_data);
7c2869
         if (dict)
7c2869
                 dict_unref (dict);
7c2869
+        conf->restart_bricks = _gf_false;
7c2869
 
7c2869
-        gf_msg_trace (this->name, 0, "Returning %d", ret);
7c2869
         return ret;
7c2869
 }
7c2869
 
7c2869
@@ -1988,6 +2128,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
7c2869
         int32_t          snap_count   = 0;
7c2869
         int              i            = 1;
7c2869
         xlator_t        *this         = NULL;
7c2869
+        dict_t          *peer_data_copy = NULL;
7c2869
 
7c2869
         this = THIS;
7c2869
         GF_ASSERT (this);
7c2869
@@ -2003,8 +2144,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
7c2869
 
7c2869
         for (i = 1; i <= snap_count; i++) {
7c2869
                 /* Compare one snapshot from peer_data at a time */
7c2869
-                ret = glusterd_compare_and_update_snap (peer_data, i, peername,
7c2869
-                                                        peerid);
7c2869
+                ret = glusterd_compare_snap (peer_data, i, peername, peerid);
7c2869
                 if (ret) {
7c2869
                         gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
                                 GD_MSG_SNAPSHOT_OP_FAILED,
7c2869
@@ -2013,6 +2153,18 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
7c2869
                         goto out;
7c2869
                 }
7c2869
         }
7c2869
+        /* Update the snaps at one go */
7c2869
+        peer_data_copy = dict_copy_with_ref (peer_data, NULL);
7c2869
+        ret = dict_set_str (peer_data_copy, "peername", peername);
7c2869
+        if (ret) {
7c2869
+                gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
7c2869
+                        "Failed to set peername into the dict");
7c2869
+                if (peer_data_copy)
7c2869
+                        dict_unref (peer_data_copy);
7c2869
+                goto out;
7c2869
+        }
7c2869
+        glusterd_launch_synctask (glusterd_update_snaps_synctask,
7c2869
+                                  peer_data_copy);
7c2869
 
7c2869
 out:
7c2869
         gf_msg_trace (this->name, 0, "Returning %d", ret);
7c2869
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
7c2869
index 59ef282..a04ed99 100644
7c2869
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
7c2869
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
7c2869
@@ -3378,6 +3378,14 @@ glusterd_compare_friend_volume (dict_t *peer_data, int32_t count,
7c2869
         *status = GLUSTERD_VOL_COMP_SCS;
7c2869
 
7c2869
 out:
7c2869
+        memset (key, 0, sizeof (key));
7c2869
+        snprintf (key, sizeof (key), "volume%d.update", count);
7c2869
+
7c2869
+        if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) {
7c2869
+                ret = dict_set_int32 (peer_data, key, 1);
7c2869
+        } else {
7c2869
+                ret = dict_set_int32 (peer_data, key, 0);
7c2869
+        }
7c2869
         if (*status == GLUSTERD_VOL_COMP_RJT) {
7c2869
                 gf_event (EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s",
7c2869
                           volinfo->volname);
7c2869
@@ -3450,11 +3458,10 @@ glusterd_spawn_daemons (void *opaque)
7c2869
         int             ret     = -1;
7c2869
 
7c2869
         synclock_lock (&conf->big_lock);
7c2869
-        glusterd_restart_bricks (conf);
7c2869
+        glusterd_restart_bricks ();
7c2869
         glusterd_restart_gsyncds (conf);
7c2869
         glusterd_restart_rebalance (conf);
7c2869
         ret = glusterd_snapdsvc_restart ();
7c2869
-
7c2869
         return ret;
7c2869
 }
7c2869
 
7c2869
@@ -4224,20 +4231,35 @@ out:
7c2869
 int32_t
7c2869
 glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo)
7c2869
 {
7c2869
-        int                  ret = 0;
7c2869
-        glusterd_brickinfo_t *brickinfo = NULL;
7c2869
+        int                      ret        = 0;
7c2869
+        glusterd_brickinfo_t    *brickinfo  = NULL;
7c2869
+        glusterd_brick_proc_t   *brick_proc = NULL;
7c2869
+        int                      brick_count = 0;
7c2869
+
7c2869
         GF_ASSERT (volinfo);
7c2869
 
7c2869
         cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
7c2869
                 if (glusterd_is_brick_started (brickinfo)) {
7c2869
-                        ret = glusterd_brick_disconnect (brickinfo);
7c2869
-                        if (ret) {
7c2869
-                                gf_msg ("glusterd", GF_LOG_ERROR, 0,
7c2869
-                                        GD_MSD_BRICK_DISCONNECT_FAIL,
7c2869
-                                        "Failed to "
7c2869
-                                        "disconnect %s:%s", brickinfo->hostname,
7c2869
-                                        brickinfo->path);
7c2869
-                                break;
7c2869
+                        /* If brick multiplexing is enabled then we can't
7c2869
+                         * blindly set brickinfo->rpc to NULL as it might impact
7c2869
+                         * the other attached bricks.
7c2869
+                         */
7c2869
+                        ret = glusterd_brick_proc_for_port (brickinfo->port,
7c2869
+                                                            &brick_proc);
7c2869
+                        if (!ret) {
7c2869
+                                brick_count = brick_proc->brick_count;
7c2869
+                        }
7c2869
+                        if (!is_brick_mx_enabled () || brick_count == 0) {
7c2869
+                                ret = glusterd_brick_disconnect (brickinfo);
7c2869
+                                if (ret) {
7c2869
+                                        gf_msg ("glusterd", GF_LOG_ERROR, 0,
7c2869
+                                                GD_MSD_BRICK_DISCONNECT_FAIL,
7c2869
+                                                "Failed to "
7c2869
+                                                "disconnect %s:%s",
7c2869
+                                                brickinfo->hostname,
7c2869
+                                                brickinfo->path);
7c2869
+                                        break;
7c2869
+                                }
7c2869
                         }
7c2869
                 }
7c2869
         }
7c2869
@@ -4477,7 +4499,7 @@ out:
7c2869
 }
7c2869
 
7c2869
 int32_t
7c2869
-glusterd_import_friend_volume (dict_t *peer_data, size_t count)
7c2869
+glusterd_import_friend_volume (dict_t *peer_data, int count)
7c2869
 {
7c2869
 
7c2869
         int32_t                 ret = -1;
7c2869
@@ -4486,6 +4508,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
7c2869
         glusterd_volinfo_t      *old_volinfo = NULL;
7c2869
         glusterd_volinfo_t      *new_volinfo = NULL;
7c2869
         glusterd_svc_t          *svc         = NULL;
7c2869
+        int32_t                  update      = 0;
7c2869
+        char                     key[512]    = {0,};
7c2869
 
7c2869
         GF_ASSERT (peer_data);
7c2869
 
7c2869
@@ -4493,6 +4517,15 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
7c2869
         GF_ASSERT (this);
7c2869
         priv = this->private;
7c2869
         GF_ASSERT (priv);
7c2869
+
7c2869
+        memset (key, 0, sizeof (key));
7c2869
+        snprintf (key, sizeof (key), "volume%d.update", count);
7c2869
+        ret = dict_get_int32 (peer_data, key, &update);
7c2869
+        if (ret || !update) {
7c2869
+                /* if update is 0 that means the volume is not imported */
7c2869
+                goto out;
7c2869
+        }
7c2869
+
7c2869
         ret = glusterd_import_volinfo (peer_data, count,
7c2869
                                        &new_volinfo, "volume");
7c2869
         if (ret)
7c2869
@@ -4506,6 +4539,14 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
7c2869
 
7c2869
         ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo);
7c2869
         if (0 == ret) {
7c2869
+                if (new_volinfo->version <= old_volinfo->version) {
7c2869
+                        /* When this condition is true, it already means that
7c2869
+                         * the other synctask thread of import volume has
7c2869
+                         * already up to date volume, so just ignore this volume
7c2869
+                         * now
7c2869
+                         */
7c2869
+                        goto out;
7c2869
+                }
7c2869
                 /* Ref count the old_volinfo such that deleting it doesn't crash
7c2869
                  * if its been already in use by other thread
7c2869
                  */
7c2869
@@ -4536,7 +4577,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
7c2869
                 }
7c2869
         }
7c2869
 
7c2869
-        ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
7c2869
+        ret = glusterd_store_volinfo (new_volinfo,
7c2869
+                                      GLUSTERD_VOLINFO_VER_AC_NONE);
7c2869
         if (ret) {
7c2869
                 gf_msg (this->name, GF_LOG_ERROR, 0,
7c2869
                         GD_MSG_VOLINFO_STORE_FAIL, "Failed to store "
7c2869
@@ -4564,6 +4606,60 @@ out:
7c2869
 }
7c2869
 
7c2869
 int32_t
7c2869
+glusterd_import_friend_volumes_synctask (void *opaque)
7c2869
+{
7c2869
+        int32_t                 ret = -1;
7c2869
+        int32_t                 count = 0;
7c2869
+        int                     i = 1;
7c2869
+        xlator_t                *this = NULL;
7c2869
+        glusterd_conf_t         *conf = NULL;
7c2869
+        dict_t *peer_data         = NULL;
7c2869
+
7c2869
+        this = THIS;
7c2869
+        GF_ASSERT (this);
7c2869
+
7c2869
+        conf = this->private;
7c2869
+        GF_ASSERT (conf);
7c2869
+
7c2869
+        peer_data = (dict_t *)opaque;
7c2869
+        GF_ASSERT (peer_data);
7c2869
+
7c2869
+        ret = dict_get_int32 (peer_data, "count", &count);
7c2869
+        if (ret)
7c2869
+                goto out;
7c2869
+
7c2869
+        synclock_lock (&conf->big_lock);
7c2869
+
7c2869
+        /* We need to ensure that importing a volume shouldn't race with an
7c2869
+         * other thread where as part of restarting glusterd, bricks are
7c2869
+         * restarted (refer glusterd_restart_bricks ())
7c2869
+         */
7c2869
+        while (conf->restart_bricks) {
7c2869
+                synclock_unlock (&conf->big_lock);
7c2869
+                sleep (2);
7c2869
+                synclock_lock (&conf->big_lock);
7c2869
+        }
7c2869
+        conf->restart_bricks = _gf_true;
7c2869
+
7c2869
+        while (i <= count) {
7c2869
+                ret = glusterd_import_friend_volume (peer_data, i);
7c2869
+                if (ret) {
7c2869
+                        conf->restart_bricks = _gf_false;
7c2869
+                        goto out;
7c2869
+                }
7c2869
+                i++;
7c2869
+        }
7c2869
+        glusterd_svcs_manager (NULL);
7c2869
+        conf->restart_bricks = _gf_false;
7c2869
+out:
7c2869
+        if (peer_data)
7c2869
+                dict_unref (peer_data);
7c2869
+
7c2869
+        gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
7c2869
+        return ret;
7c2869
+}
7c2869
+
7c2869
+int32_t
7c2869
 glusterd_import_friend_volumes (dict_t *peer_data)
7c2869
 {
7c2869
         int32_t                 ret = -1;
7c2869
@@ -4702,8 +4798,10 @@ glusterd_import_global_opts (dict_t *friend_data)
7c2869
                  * recompute if quorum is met. If quorum is not met bricks are
7c2869
                  * not started and those already running are stopped
7c2869
                  */
7c2869
-                if (old_quorum != new_quorum)
7c2869
-                        glusterd_restart_bricks (conf);
7c2869
+                if (old_quorum != new_quorum) {
7c2869
+                        glusterd_launch_synctask (glusterd_restart_bricks,
7c2869
+                                                  NULL);
7c2869
+                }
7c2869
         }
7c2869
 
7c2869
         ret = 0;
7c2869
@@ -4723,6 +4821,7 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
7c2869
         gf_boolean_t     update    = _gf_false;
7c2869
         xlator_t        *this      = NULL;
7c2869
         glusterd_conf_t *priv      = NULL;
7c2869
+        dict_t          *peer_data_copy = NULL;
7c2869
 
7c2869
         this = THIS;
7c2869
         GF_ASSERT (this);
7c2869
@@ -4754,18 +4853,23 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
7c2869
                         goto out;
7c2869
                 }
7c2869
                 if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) {
7c2869
-                        ret = glusterd_import_friend_volume (peer_data, i);
7c2869
-                        if (ret) {
7c2869
-                                goto out;
7c2869
-                        }
7c2869
                         update = _gf_true;
7c2869
-                        *status = GLUSTERD_VOL_COMP_NONE;
7c2869
                 }
7c2869
                 i++;
7c2869
         }
7c2869
 
7c2869
         if (update) {
7c2869
-                glusterd_svcs_manager (NULL);
7c2869
+                /* Launch the import friend volume as a separate synctask as it
7c2869
+                 * has to trigger start bricks where we may need to wait for the
7c2869
+                 * first brick to come up before attaching the subsequent bricks
7c2869
+                 * in case brick multiplexing is enabled
7c2869
+                 */
7c2869
+                peer_data_copy = dict_copy_with_ref (peer_data, NULL);
7c2869
+                glusterd_launch_synctask
7c2869
+                        (glusterd_import_friend_volumes_synctask,
7c2869
+                         peer_data_copy);
7c2869
+                if (ret)
7c2869
+                        goto out;
7c2869
         }
7c2869
 
7c2869
 out:
7c2869
@@ -5897,7 +6001,7 @@ out:
7c2869
 }
7c2869
 
7c2869
 int
7c2869
-glusterd_restart_bricks (glusterd_conf_t *conf)
7c2869
+glusterd_restart_bricks (void *opaque)
7c2869
 {
7c2869
         int                   ret            = 0;
7c2869
         glusterd_volinfo_t   *volinfo        = NULL;
7c2869
@@ -5905,6 +6009,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
7c2869
         glusterd_snap_t      *snap           = NULL;
7c2869
         gf_boolean_t          start_svcs     = _gf_false;
7c2869
         xlator_t             *this           = NULL;
7c2869
+        glusterd_conf_t      *conf           = NULL;
7c2869
         int                   active_count   = 0;
7c2869
         int                   quorum_count   = 0;
7c2869
         gf_boolean_t          node_quorum    = _gf_false;
7c2869
@@ -5915,6 +6020,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
7c2869
         conf = this->private;
7c2869
         GF_VALIDATE_OR_GOTO (this->name, conf, return_block);
7c2869
 
7c2869
+        /* We need to ensure that restarting the bricks during glusterd restart
7c2869
+         * shouldn't race with the import volume thread (refer
7c2869
+         * glusterd_compare_friend_data ())
7c2869
+         */
7c2869
+        while (conf->restart_bricks) {
7c2869
+                synclock_unlock (&conf->big_lock);
7c2869
+                sleep (2);
7c2869
+                synclock_lock (&conf->big_lock);
7c2869
+        }
7c2869
+        conf->restart_bricks = _gf_true;
7c2869
+
7c2869
         ++(conf->blockers);
7c2869
         ret = glusterd_get_quorum_cluster_counts (this, &active_count,
7c2869
                                                   &quorum_count);
7c2869
@@ -5925,8 +6041,9 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
7c2869
                 node_quorum = _gf_true;
7c2869
 
7c2869
         cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
7c2869
-                if (volinfo->status != GLUSTERD_STATUS_STARTED)
7c2869
+                if (volinfo->status != GLUSTERD_STATUS_STARTED) {
7c2869
                         continue;
7c2869
+                }
7c2869
                 gf_msg_debug (this->name, 0, "starting the volume %s",
7c2869
                         volinfo->volname);
7c2869
 
7c2869
@@ -6033,6 +6150,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
7c2869
 out:
7c2869
         --(conf->blockers);
7c2869
         conf->restart_done = _gf_true;
7c2869
+        conf->restart_bricks = _gf_false;
7c2869
 
7c2869
 return_block:
7c2869
         return ret;
7c2869
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
7c2869
index adc3cb1..7a5bfd9 100644
7c2869
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
7c2869
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
7c2869
@@ -243,6 +243,10 @@ glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node);
7c2869
 int
7c2869
 glusterd_remote_hostname_get (rpcsvc_request_t *req,
7c2869
                               char *remote_host, int len);
7c2869
+
7c2869
+int32_t
7c2869
+glusterd_import_friend_volumes_synctask (void *opaque);
7c2869
+
7c2869
 int32_t
7c2869
 glusterd_import_friend_volumes (dict_t *peer_data);
7c2869
 void
7c2869
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
7c2869
index d8a0a6f..b94ccc9 100644
7c2869
--- a/xlators/mgmt/glusterd/src/glusterd.h
7c2869
+++ b/xlators/mgmt/glusterd/src/glusterd.h
7c2869
@@ -190,6 +190,7 @@ typedef struct {
7c2869
         int32_t                    workers;
7c2869
         uint32_t                   blockers;
7c2869
         uint32_t                   mgmt_v3_lock_timeout;
7c2869
+        gf_boolean_t               restart_bricks;
7c2869
 } glusterd_conf_t;
7c2869
 
7c2869
 
7c2869
@@ -1033,7 +1034,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
7c2869
                                     dict_t  *volumes, int   count);
7c2869
 
7c2869
 int
7c2869
-glusterd_restart_bricks (glusterd_conf_t *conf);
7c2869
+glusterd_restart_bricks ();
7c2869
 
7c2869
 int32_t
7c2869
 glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags,
7c2869
-- 
7c2869
1.8.3.1
7c2869