e7a346
From 2f5d6b2923a7f9fe74cf820e5a4cdf894eb0a2bd Mon Sep 17 00:00:00 2001
e7a346
From: Atin Mukherjee <amukherj@redhat.com>
e7a346
Date: Thu, 8 Feb 2018 09:09:00 +0530
e7a346
Subject: [PATCH 147/148] glusterd: import volumes in separate synctask
e7a346
e7a346
With brick multiplexing, to attach a brick to an existing brick process
e7a346
the prerequisite is to have the compatible brick to finish it's
e7a346
initialization and portmap sign in and hence the thread might have to go
e7a346
to a sleep and context switch the synctask to allow the brick process to
e7a346
communicate with glusterd. In normal code path, this works fine as
e7a346
glusterd_restart_bricks () is launched through a separate synctask.
e7a346
e7a346
In case there's a mismatch of the volume when glusterd restarts,
e7a346
glusterd_import_friend_volume is invoked and then it tries to call
e7a346
glusterd_start_bricks () from the main thread which eventually may land
e7a346
into the similar situation. Now since this is not done through a
e7a346
separate synctask, the 1st brick will never be able to get its turn to
e7a346
finish all of its handshaking and as a consequence to it, all the bricks
e7a346
will fail to get attached to it.
e7a346
e7a346
Solution : Execute import volume and glusterd restart bricks in separate
e7a346
synctask. Importing snaps had to be also done through synctask as
e7a346
there's a dependency of the parent volume need to be available for the
e7a346
importing snap functionality to work.
e7a346
e7a346
>upstream mainline patch : https://review.gluster.org/#/c/19357
e7a346
                           https://review.gluster.org/#/c/19536/
e7a346
                           https://review.gluster.org/#/c/19539/
e7a346
e7a346
Change-Id: I290b244d456afcc9b913ab30be4af040d340428c
e7a346
BUG: 1540600
e7a346
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/129937
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
e7a346
---
e7a346
 ...e-with-other-processes-accessing-mounted-path.t |  13 ++
e7a346
 xlators/mgmt/glusterd/src/glusterd-op-sm.c         |   9 +-
e7a346
 xlators/mgmt/glusterd/src/glusterd-op-sm.h         |   2 +
e7a346
 .../mgmt/glusterd/src/glusterd-snapshot-utils.c    | 229 +++++++++++++++++----
e7a346
 xlators/mgmt/glusterd/src/glusterd-utils.c         | 166 ++++++++++++---
e7a346
 xlators/mgmt/glusterd/src/glusterd-utils.h         |   4 +
e7a346
 xlators/mgmt/glusterd/src/glusterd.h               |   3 +-
e7a346
 7 files changed, 356 insertions(+), 70 deletions(-)
e7a346
e7a346
diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
e7a346
index c5a0088..22f98d2 100644
e7a346
--- a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
e7a346
+++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
e7a346
@@ -92,20 +92,33 @@ EXPECT "0" mounted_snaps ${V1}
e7a346
 # handled during handshake.
e7a346
 
e7a346
 activate_snapshots
e7a346
+
e7a346
+EXPECT 'Started' snapshot_status ${V0}_snap;
e7a346
+EXPECT 'Started' snapshot_status ${V1}_snap;
e7a346
+
e7a346
 kill_glusterd 2
e7a346
+
e7a346
 deactivate_snapshots
e7a346
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
e7a346
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
e7a346
+
e7a346
 TEST start_glusterd 2
e7a346
 
e7a346
 # Updates form friend should reflect as snap was deactivated while glusterd
e7a346
 # process was inactive and mount point should also not exist.
e7a346
 
e7a346
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
e7a346
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V0}
e7a346
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V1}
e7a346
 
e7a346
 kill_glusterd 2
e7a346
 activate_snapshots
e7a346
+EXPECT 'Started' snapshot_status ${V0}_snap;
e7a346
+EXPECT 'Started' snapshot_status ${V1}_snap;
e7a346
 TEST start_glusterd 2
e7a346
 
e7a346
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
e7a346
+
e7a346
 # Updates form friend should reflect as snap was activated while glusterd
e7a346
 # process was inactive and mount point should exist.
e7a346
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" mounted_snaps ${V0}
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
e7a346
index 2fc2e3b..81cde21 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
e7a346
@@ -2426,6 +2426,7 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
e7a346
 
e7a346
 int
e7a346
 glusterd_start_bricks (glusterd_volinfo_t *volinfo)
e7a346
+
e7a346
 {
e7a346
         int                      ret            = -1;
e7a346
         glusterd_brickinfo_t    *brickinfo      = NULL;
e7a346
@@ -2454,14 +2455,6 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
e7a346
                                 goto out;
e7a346
                         }
e7a346
                 }
e7a346
-
e7a346
-        }
e7a346
-        ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
e7a346
-        if (ret) {
e7a346
-                gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
e7a346
-                        "Failed to write volinfo for volume %s",
e7a346
-                        volinfo->volname);
e7a346
-                goto out;
e7a346
         }
e7a346
         ret = 0;
e7a346
 out:
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
e7a346
index 48275c5..24b1944 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
e7a346
@@ -275,8 +275,10 @@ glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size,
e7a346
                 int32_t blk_count, double *throughput, double *time);
e7a346
 gf_boolean_t
e7a346
 glusterd_is_volume_started (glusterd_volinfo_t  *volinfo);
e7a346
+
e7a346
 int
e7a346
 glusterd_start_bricks (glusterd_volinfo_t *volinfo);
e7a346
+
e7a346
 gf_boolean_t
e7a346
 glusterd_are_all_volumes_stopped ();
e7a346
 int
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
e7a346
index 3f03d2b..ad206f6 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
e7a346
@@ -1758,8 +1758,11 @@ out:
e7a346
  * state, i.e either both would be hosting bricks or both would not be hosting
e7a346
  * bricks, then a decision can't be taken and a peer-reject will happen.
e7a346
  *
e7a346
- * glusterd_compare_and_update_snap() implements the following algorithm to
e7a346
- * perform the above task:
e7a346
+ * glusterd_compare_snap()  & glusterd_update_snaps () implement the following
e7a346
+ * algorithm to perform the above task. Please note the former function tries to
e7a346
+ * iterate over the snaps one at a time and updating the relevant fields in the
e7a346
+ * dictionary and then glusterd_update_snaps () go over all the snaps and update
e7a346
+ * them at one go as part of a synctask.
e7a346
  * Step  1: Start.
e7a346
  * Step  2: Check if the peer is missing a delete or restore on the said snap.
e7a346
  *          If yes, goto step 6.
e7a346
@@ -1784,21 +1787,18 @@ out:
e7a346
  *
e7a346
  */
e7a346
 int32_t
e7a346
-glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
e7a346
-                                  char *peername, uuid_t peerid)
e7a346
+glusterd_compare_snap (dict_t *peer_data, int32_t snap_count,
e7a346
+                       char *peername, uuid_t peerid)
e7a346
 {
e7a346
         char              buf[NAME_MAX]    = "";
e7a346
         char              prefix[NAME_MAX] = "";
e7a346
         char             *peer_snap_name   = NULL;
e7a346
         char             *peer_snap_id     = NULL;
e7a346
-        dict_t           *dict             = NULL;
e7a346
         glusterd_snap_t  *snap             = NULL;
e7a346
         gf_boolean_t      conflict         = _gf_false;
e7a346
         gf_boolean_t      is_local         = _gf_false;
e7a346
         gf_boolean_t      is_hosted        = _gf_false;
e7a346
         gf_boolean_t      missed_delete    = _gf_false;
e7a346
-        gf_boolean_t      remove_lvm       = _gf_true;
e7a346
-
e7a346
         int32_t           ret              = -1;
e7a346
         int32_t           volcount         = 0;
e7a346
         xlator_t         *this             = NULL;
e7a346
@@ -1810,6 +1810,14 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
e7a346
 
e7a346
         snprintf (prefix, sizeof(prefix), "snap%d", snap_count);
e7a346
 
e7a346
+        ret = dict_set_uint32 (peer_data, buf, 0);
e7a346
+        snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
e7a346
+        ret = dict_set_uint32 (peer_data, buf, 0);
e7a346
+        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
e7a346
+        ret = dict_set_uint32 (peer_data, buf, 0);
e7a346
+        snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
e7a346
+        ret = dict_set_uint32 (peer_data, buf, 0);
e7a346
+
e7a346
         /* Fetch the peer's snapname */
e7a346
         snprintf (buf, sizeof(buf), "%s.snapname", prefix);
e7a346
         ret = dict_get_str (peer_data, buf, &peer_snap_name);
e7a346
@@ -1866,7 +1874,10 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
e7a346
                         /* Peer has snap with the same snapname
e7a346
                         * and snap_id, which local node doesn't have.
e7a346
                         */
e7a346
-                        goto accept_peer_data;
e7a346
+                        snprintf (buf, sizeof(buf), "%s.accept_peer_data",
e7a346
+                                  prefix);
e7a346
+                        ret = dict_set_uint32 (peer_data, buf, 1);
e7a346
+                        goto out;
e7a346
                 }
e7a346
                 /* Peer has snap with the same snapname
e7a346
                  * and snap_id. Now check if peer has a
e7a346
@@ -1893,12 +1904,18 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
e7a346
                          * When removing data from local node, make sure
e7a346
                          * we are not removing backend lvm of the snap.
e7a346
                          */
e7a346
-                        remove_lvm = _gf_false;
e7a346
-                        goto remove_my_data;
e7a346
+                        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
e7a346
+                        ret = dict_set_uint32 (peer_data, buf, 0);
e7a346
+                        snprintf (buf, sizeof(buf), "%s.remove_my_data",
e7a346
+                                  prefix);
e7a346
+                        ret = dict_set_uint32 (peer_data, buf, 1);
e7a346
+                        snprintf (buf, sizeof(buf), "%s.accept_peer_data",
e7a346
+                                  prefix);
e7a346
+                        ret = dict_set_uint32 (peer_data, buf, 1);
e7a346
                 } else {
e7a346
                         ret = 0;
e7a346
-                        goto out;
e7a346
                 }
e7a346
+                goto out;
e7a346
         }
e7a346
 
e7a346
         /* There is a conflict. Check if the current node is
e7a346
@@ -1950,50 +1967,176 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
e7a346
          * And local node isn't. Hence remove local node's
e7a346
          * data and accept peer data
e7a346
          */
e7a346
-
e7a346
         gf_msg_debug (this->name, 0, "Peer hosts bricks for conflicting "
e7a346
                 "snap(%s). Removing local data. Accepting peer data.",
e7a346
                 peer_snap_name);
e7a346
-        remove_lvm = _gf_true;
e7a346
+        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
e7a346
+        ret = dict_set_uint32 (peer_data, buf, 1);
e7a346
+        snprintf (buf, sizeof(buf), "%s.remove_my_data",
e7a346
+                  prefix);
e7a346
+        ret = dict_set_uint32 (peer_data, buf, 1);
e7a346
+        snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
e7a346
+        ret = dict_set_uint32 (peer_data, buf, 1);
e7a346
 
e7a346
-remove_my_data:
e7a346
+out:
e7a346
+        gf_msg_trace (this->name, 0, "Returning %d", ret);
e7a346
+        return ret;
e7a346
+}
e7a346
 
e7a346
-        dict = dict_new();
e7a346
-        if (!dict) {
e7a346
-                gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
-                        GD_MSG_DICT_CREATE_FAIL,
e7a346
-                        "Unable to create dict");
e7a346
-                ret = -1;
e7a346
-                goto out;
e7a346
+int32_t
e7a346
+glusterd_update_snaps_synctask (void *opaque)
e7a346
+{
e7a346
+        int32_t           ret              = -1;
e7a346
+        int32_t           snap_count       = 0;
e7a346
+        int               i                = 1;
e7a346
+        xlator_t         *this             = NULL;
e7a346
+        dict_t           *peer_data        = NULL;
e7a346
+        char              buf[NAME_MAX]    = "";
e7a346
+        char              prefix[NAME_MAX] = "";
e7a346
+        char             *peer_snap_name   = NULL;
e7a346
+        char             *peer_snap_id     = NULL;
e7a346
+        char             *peername         = NULL;
e7a346
+        gf_boolean_t      remove_lvm       = _gf_false;
e7a346
+        gf_boolean_t      remove_my_data   = _gf_false;
e7a346
+        gf_boolean_t      accept_peer_data = _gf_false;
e7a346
+        int32_t           val              = 0;
e7a346
+        glusterd_snap_t  *snap             = NULL;
e7a346
+        dict_t           *dict             = NULL;
e7a346
+        glusterd_conf_t  *conf             = NULL;
e7a346
+
e7a346
+        this = THIS;
e7a346
+        GF_ASSERT (this);
e7a346
+
e7a346
+        conf = this->private;
e7a346
+        GF_ASSERT (conf);
e7a346
+
e7a346
+        peer_data = (dict_t *)opaque;
e7a346
+        GF_ASSERT (peer_data);
e7a346
+
e7a346
+        synclock_lock (&conf->big_lock);
e7a346
+
e7a346
+        while (conf->restart_bricks) {
e7a346
+                synclock_unlock (&conf->big_lock);
e7a346
+                sleep (2);
e7a346
+                synclock_lock (&conf->big_lock);
e7a346
         }
e7a346
+        conf->restart_bricks = _gf_true;
e7a346
 
e7a346
-        ret = glusterd_snap_remove (dict, snap, remove_lvm, _gf_false,
e7a346
-                                    _gf_false);
e7a346
+        ret = dict_get_int32 (peer_data, "snap_count", &snap_count);
e7a346
         if (ret) {
e7a346
                 gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
-                        GD_MSG_SNAP_REMOVE_FAIL,
e7a346
-                        "Failed to remove snap %s", snap->snapname);
e7a346
+                        GD_MSG_DICT_GET_FAILED, "Failed to fetch snap_count");
e7a346
                 goto out;
e7a346
         }
e7a346
-
e7a346
-accept_peer_data:
e7a346
-
e7a346
-        /* Accept Peer Data */
e7a346
-        ret = glusterd_import_friend_snap (peer_data, snap_count,
e7a346
-                                           peer_snap_name, peer_snap_id);
e7a346
+        ret = dict_get_str (peer_data, "peername", &peername);
e7a346
         if (ret) {
e7a346
                 gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
-                        GD_MSG_SNAP_IMPORT_FAIL,
e7a346
-                        "Failed to import snap %s from peer %s",
e7a346
-                        peer_snap_name, peername);
e7a346
+                        GD_MSG_DICT_GET_FAILED, "Failed to fetch peername");
e7a346
                 goto out;
e7a346
         }
e7a346
 
e7a346
+        for (i = 1; i <= snap_count; i++) {
e7a346
+                snprintf (prefix, sizeof(prefix), "snap%d", i);
e7a346
+
e7a346
+                /* Fetch the peer's snapname */
e7a346
+                snprintf (buf, sizeof(buf), "%s.snapname", prefix);
e7a346
+                ret = dict_get_str (peer_data, buf, &peer_snap_name);
e7a346
+                if (ret) {
e7a346
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
+                                GD_MSG_DICT_GET_FAILED,
e7a346
+                                "Unable to fetch snapname from peer: %s",
e7a346
+                                peername);
e7a346
+                        goto out;
e7a346
+                }
e7a346
+
e7a346
+                /* Fetch the peer's snap_id */
e7a346
+                snprintf (buf, sizeof(buf), "%s.snap_id", prefix);
e7a346
+                ret = dict_get_str (peer_data, buf, &peer_snap_id);
e7a346
+                if (ret) {
e7a346
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
+                                GD_MSG_DICT_GET_FAILED,
e7a346
+                                "Unable to fetch snap_id from peer: %s",
e7a346
+                                peername);
e7a346
+                        goto out;
e7a346
+                }
e7a346
+
e7a346
+                /* remove_my_data */
e7a346
+                snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
e7a346
+                ret = dict_get_int32 (peer_data, buf, &val;;
e7a346
+                if (val)
e7a346
+                        remove_my_data = _gf_true;
e7a346
+                else
e7a346
+                        remove_my_data = _gf_false;
e7a346
+
e7a346
+                if (remove_my_data) {
e7a346
+                        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
e7a346
+                        ret = dict_get_int32 (peer_data, buf, &val;;
e7a346
+                        if (val)
e7a346
+                                remove_lvm = _gf_true;
e7a346
+                        else
e7a346
+                                remove_lvm = _gf_false;
e7a346
+
e7a346
+                        dict = dict_new();
e7a346
+                        if (!dict) {
e7a346
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
+                                        GD_MSG_DICT_CREATE_FAIL,
e7a346
+                                        "Unable to create dict");
e7a346
+                                ret = -1;
e7a346
+                                goto out;
e7a346
+                        }
e7a346
+                        snap = glusterd_find_snap_by_name (peer_snap_name);
e7a346
+                        if (!snap) {
e7a346
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
+                                        GD_MSG_MISSED_SNAP_PRESENT,
e7a346
+                                        "Snapshot %s from peer %s missing on "
e7a346
+                                        "localhost", peer_snap_name,
e7a346
+                                        peername);
e7a346
+                                ret = -1;
e7a346
+                                goto out;
e7a346
+                        }
e7a346
+
e7a346
+                        ret = glusterd_snap_remove (dict, snap, remove_lvm,
e7a346
+                                                    _gf_false, _gf_false);
e7a346
+                        if (ret) {
e7a346
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
+                                        GD_MSG_SNAP_REMOVE_FAIL,
e7a346
+                                        "Failed to remove snap %s",
e7a346
+                                        snap->snapname);
e7a346
+                                goto out;
e7a346
+                        }
e7a346
+                        if (dict)
e7a346
+                                dict_unref (dict);
e7a346
+                }
e7a346
+                snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
e7a346
+                ret = dict_get_int32 (peer_data, buf, &val;;
e7a346
+                if (val)
e7a346
+                        accept_peer_data = _gf_true;
e7a346
+                else
e7a346
+                        accept_peer_data = _gf_false;
e7a346
+
e7a346
+                if (accept_peer_data) {
e7a346
+                        /* Accept Peer Data */
e7a346
+                        ret = glusterd_import_friend_snap (peer_data,
e7a346
+                                                           i,
e7a346
+                                                           peer_snap_name,
e7a346
+                                                           peer_snap_id);
e7a346
+                        if (ret) {
e7a346
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
+                                        GD_MSG_SNAP_IMPORT_FAIL,
e7a346
+                                        "Failed to import snap %s from peer %s",
e7a346
+                                        peer_snap_name, peername);
e7a346
+                                goto out;
e7a346
+                        }
e7a346
+                }
e7a346
+        }
e7a346
+
e7a346
 out:
e7a346
+        if (peer_data)
e7a346
+                dict_unref (peer_data);
e7a346
         if (dict)
e7a346
                 dict_unref (dict);
e7a346
+        conf->restart_bricks = _gf_false;
e7a346
 
e7a346
-        gf_msg_trace (this->name, 0, "Returning %d", ret);
e7a346
         return ret;
e7a346
 }
e7a346
 
e7a346
@@ -2008,6 +2151,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
e7a346
         int32_t          snap_count   = 0;
e7a346
         int              i            = 1;
e7a346
         xlator_t        *this         = NULL;
e7a346
+        dict_t          *peer_data_copy = NULL;
e7a346
 
e7a346
         this = THIS;
e7a346
         GF_ASSERT (this);
e7a346
@@ -2023,8 +2167,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
e7a346
 
e7a346
         for (i = 1; i <= snap_count; i++) {
e7a346
                 /* Compare one snapshot from peer_data at a time */
e7a346
-                ret = glusterd_compare_and_update_snap (peer_data, i, peername,
e7a346
-                                                        peerid);
e7a346
+                ret = glusterd_compare_snap (peer_data, i, peername, peerid);
e7a346
                 if (ret) {
e7a346
                         gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
                                 GD_MSG_SNAPSHOT_OP_FAILED,
e7a346
@@ -2033,6 +2176,18 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
e7a346
                         goto out;
e7a346
                 }
e7a346
         }
e7a346
+        /* Update the snaps at one go */
e7a346
+        peer_data_copy = dict_copy_with_ref (peer_data, NULL);
e7a346
+        ret = dict_set_str (peer_data_copy, "peername", peername);
e7a346
+        if (ret) {
e7a346
+                gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
e7a346
+                        "Failed to set peername into the dict");
e7a346
+                if (peer_data_copy)
e7a346
+                        dict_unref (peer_data_copy);
e7a346
+                goto out;
e7a346
+        }
e7a346
+        glusterd_launch_synctask (glusterd_update_snaps_synctask,
e7a346
+                                  peer_data_copy);
e7a346
 
e7a346
 out:
e7a346
         gf_msg_trace (this->name, 0, "Returning %d", ret);
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
index d991a9f..5deacde 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
e7a346
@@ -3448,6 +3448,14 @@ glusterd_compare_friend_volume (dict_t *peer_data, int32_t count,
e7a346
         *status = GLUSTERD_VOL_COMP_SCS;
e7a346
 
e7a346
 out:
e7a346
+        memset (key, 0, sizeof (key));
e7a346
+        snprintf (key, sizeof (key), "volume%d.update", count);
e7a346
+
e7a346
+        if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) {
e7a346
+                ret = dict_set_int32 (peer_data, key, 1);
e7a346
+        } else {
e7a346
+                ret = dict_set_int32 (peer_data, key, 0);
e7a346
+        }
e7a346
         if (*status == GLUSTERD_VOL_COMP_RJT) {
e7a346
                 gf_event (EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s",
e7a346
                           volinfo->volname);
e7a346
@@ -3520,12 +3528,11 @@ glusterd_spawn_daemons (void *opaque)
e7a346
         int             ret     = -1;
e7a346
 
e7a346
         synclock_lock (&conf->big_lock);
e7a346
-        glusterd_restart_bricks (conf);
e7a346
+        glusterd_restart_bricks ();
e7a346
         glusterd_restart_gsyncds (conf);
e7a346
         glusterd_restart_rebalance (conf);
e7a346
         ret = glusterd_snapdsvc_restart ();
e7a346
         ret = glusterd_tierdsvc_restart ();
e7a346
-
e7a346
         return ret;
e7a346
 }
e7a346
 
e7a346
@@ -4291,20 +4298,35 @@ out:
e7a346
 int32_t
e7a346
 glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo)
e7a346
 {
e7a346
-        int                  ret = 0;
e7a346
-        glusterd_brickinfo_t *brickinfo = NULL;
e7a346
+        int                      ret        = 0;
e7a346
+        glusterd_brickinfo_t    *brickinfo  = NULL;
e7a346
+        glusterd_brick_proc_t   *brick_proc = NULL;
e7a346
+        int                      brick_count = 0;
e7a346
+
e7a346
         GF_ASSERT (volinfo);
e7a346
 
e7a346
         cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
e7a346
                 if (glusterd_is_brick_started (brickinfo)) {
e7a346
-                        ret = glusterd_brick_disconnect (brickinfo);
e7a346
-                        if (ret) {
e7a346
-                                gf_msg ("glusterd", GF_LOG_ERROR, 0,
e7a346
-                                        GD_MSD_BRICK_DISCONNECT_FAIL,
e7a346
-                                        "Failed to "
e7a346
-                                        "disconnect %s:%s", brickinfo->hostname,
e7a346
-                                        brickinfo->path);
e7a346
-                                break;
e7a346
+                        /* If brick multiplexing is enabled then we can't
e7a346
+                         * blindly set brickinfo->rpc to NULL as it might impact
e7a346
+                         * the other attached bricks.
e7a346
+                         */
e7a346
+                        ret = glusterd_brick_proc_for_port (brickinfo->port,
e7a346
+                                                            &brick_proc);
e7a346
+                        if (!ret) {
e7a346
+                                brick_count = brick_proc->brick_count;
e7a346
+                        }
e7a346
+                        if (!is_brick_mx_enabled () || brick_count == 0) {
e7a346
+                                ret = glusterd_brick_disconnect (brickinfo);
e7a346
+                                if (ret) {
e7a346
+                                        gf_msg ("glusterd", GF_LOG_ERROR, 0,
e7a346
+                                                GD_MSD_BRICK_DISCONNECT_FAIL,
e7a346
+                                                "Failed to "
e7a346
+                                                "disconnect %s:%s",
e7a346
+                                                brickinfo->hostname,
e7a346
+                                                brickinfo->path);
e7a346
+                                        break;
e7a346
+                                }
e7a346
                         }
e7a346
                 }
e7a346
         }
e7a346
@@ -4543,7 +4565,7 @@ out:
e7a346
 }
e7a346
 
e7a346
 int32_t
e7a346
-glusterd_import_friend_volume (dict_t *peer_data, size_t count)
e7a346
+glusterd_import_friend_volume (dict_t *peer_data, int count)
e7a346
 {
e7a346
 
e7a346
         int32_t                 ret = -1;
e7a346
@@ -4552,6 +4574,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
e7a346
         glusterd_volinfo_t      *old_volinfo = NULL;
e7a346
         glusterd_volinfo_t      *new_volinfo = NULL;
e7a346
         glusterd_svc_t          *svc         = NULL;
e7a346
+        int32_t                  update      = 0;
e7a346
+        char                     key[512]    = {0,};
e7a346
 
e7a346
         GF_ASSERT (peer_data);
e7a346
 
e7a346
@@ -4559,6 +4583,15 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
e7a346
         GF_ASSERT (this);
e7a346
         priv = this->private;
e7a346
         GF_ASSERT (priv);
e7a346
+
e7a346
+        memset (key, 0, sizeof (key));
e7a346
+        snprintf (key, sizeof (key), "volume%d.update", count);
e7a346
+        ret = dict_get_int32 (peer_data, key, &update);
e7a346
+        if (ret || !update) {
e7a346
+                /* if update is 0 that means the volume is not imported */
e7a346
+                goto out;
e7a346
+        }
e7a346
+
e7a346
         ret = glusterd_import_volinfo (peer_data, count,
e7a346
                                        &new_volinfo, "volume");
e7a346
         if (ret)
e7a346
@@ -4572,6 +4605,14 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
e7a346
 
e7a346
         ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo);
e7a346
         if (0 == ret) {
e7a346
+                if (new_volinfo->version <= old_volinfo->version) {
e7a346
+                        /* When this condition is true, it already means that
e7a346
+                         * the other synctask thread of import volume has
e7a346
+                         * already up to date volume, so just ignore this volume
e7a346
+                         * now
e7a346
+                         */
e7a346
+                        goto out;
e7a346
+                }
e7a346
                 /* Ref count the old_volinfo such that deleting it doesn't crash
e7a346
                  * if its been already in use by other thread
e7a346
                  */
e7a346
@@ -4602,7 +4643,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
e7a346
                 }
e7a346
         }
e7a346
 
e7a346
-        ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
e7a346
+        ret = glusterd_store_volinfo (new_volinfo,
e7a346
+                                      GLUSTERD_VOLINFO_VER_AC_NONE);
e7a346
         if (ret) {
e7a346
                 gf_msg (this->name, GF_LOG_ERROR, 0,
e7a346
                         GD_MSG_VOLINFO_STORE_FAIL, "Failed to store "
e7a346
@@ -4630,6 +4672,60 @@ out:
e7a346
 }
e7a346
 
e7a346
 int32_t
e7a346
+glusterd_import_friend_volumes_synctask (void *opaque)
e7a346
+{
e7a346
+        int32_t                 ret = -1;
e7a346
+        int32_t                 count = 0;
e7a346
+        int                     i = 1;
e7a346
+        xlator_t                *this = NULL;
e7a346
+        glusterd_conf_t         *conf = NULL;
e7a346
+        dict_t *peer_data         = NULL;
e7a346
+
e7a346
+        this = THIS;
e7a346
+        GF_ASSERT (this);
e7a346
+
e7a346
+        conf = this->private;
e7a346
+        GF_ASSERT (conf);
e7a346
+
e7a346
+        peer_data = (dict_t *)opaque;
e7a346
+        GF_ASSERT (peer_data);
e7a346
+
e7a346
+        ret = dict_get_int32 (peer_data, "count", &count);
e7a346
+        if (ret)
e7a346
+                goto out;
e7a346
+
e7a346
+        synclock_lock (&conf->big_lock);
e7a346
+
e7a346
+        /* We need to ensure that importing a volume shouldn't race with an
e7a346
+         * other thread where as part of restarting glusterd, bricks are
e7a346
+         * restarted (refer glusterd_restart_bricks ())
e7a346
+         */
e7a346
+        while (conf->restart_bricks) {
e7a346
+                synclock_unlock (&conf->big_lock);
e7a346
+                sleep (2);
e7a346
+                synclock_lock (&conf->big_lock);
e7a346
+        }
e7a346
+        conf->restart_bricks = _gf_true;
e7a346
+
e7a346
+        while (i <= count) {
e7a346
+                ret = glusterd_import_friend_volume (peer_data, i);
e7a346
+                if (ret) {
e7a346
+                        conf->restart_bricks = _gf_false;
e7a346
+                        goto out;
e7a346
+                }
e7a346
+                i++;
e7a346
+        }
e7a346
+        glusterd_svcs_manager (NULL);
e7a346
+        conf->restart_bricks = _gf_false;
e7a346
+out:
e7a346
+        if (peer_data)
e7a346
+                dict_unref (peer_data);
e7a346
+
e7a346
+        gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
e7a346
+        return ret;
e7a346
+}
e7a346
+
e7a346
+int32_t
e7a346
 glusterd_import_friend_volumes (dict_t *peer_data)
e7a346
 {
e7a346
         int32_t                 ret = -1;
e7a346
@@ -4768,8 +4864,10 @@ glusterd_import_global_opts (dict_t *friend_data)
e7a346
                  * recompute if quorum is met. If quorum is not met bricks are
e7a346
                  * not started and those already running are stopped
e7a346
                  */
e7a346
-                if (old_quorum != new_quorum)
e7a346
-                        glusterd_restart_bricks (conf);
e7a346
+                if (old_quorum != new_quorum) {
e7a346
+                        glusterd_launch_synctask (glusterd_restart_bricks,
e7a346
+                                                  NULL);
e7a346
+                }
e7a346
         }
e7a346
 
e7a346
         ret = 0;
e7a346
@@ -4789,6 +4887,7 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
e7a346
         gf_boolean_t     update    = _gf_false;
e7a346
         xlator_t        *this      = NULL;
e7a346
         glusterd_conf_t *priv      = NULL;
e7a346
+        dict_t          *peer_data_copy = NULL;
e7a346
 
e7a346
         this = THIS;
e7a346
         GF_ASSERT (this);
e7a346
@@ -4820,18 +4919,23 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
e7a346
                         goto out;
e7a346
                 }
e7a346
                 if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) {
e7a346
-                        ret = glusterd_import_friend_volume (peer_data, i);
e7a346
-                        if (ret) {
e7a346
-                                goto out;
e7a346
-                        }
e7a346
                         update = _gf_true;
e7a346
-                        *status = GLUSTERD_VOL_COMP_NONE;
e7a346
                 }
e7a346
                 i++;
e7a346
         }
e7a346
 
e7a346
         if (update) {
e7a346
-                glusterd_svcs_manager (NULL);
e7a346
+                /* Launch the import friend volume as a separate synctask as it
e7a346
+                 * has to trigger start bricks where we may need to wait for the
e7a346
+                 * first brick to come up before attaching the subsequent bricks
e7a346
+                 * in case brick multiplexing is enabled
e7a346
+                 */
e7a346
+                peer_data_copy = dict_copy_with_ref (peer_data, NULL);
e7a346
+                glusterd_launch_synctask
e7a346
+                        (glusterd_import_friend_volumes_synctask,
e7a346
+                         peer_data_copy);
e7a346
+                if (ret)
e7a346
+                        goto out;
e7a346
         }
e7a346
 
e7a346
 out:
e7a346
@@ -5975,7 +6079,7 @@ out:
e7a346
 }
e7a346
 
e7a346
 int
e7a346
-glusterd_restart_bricks (glusterd_conf_t *conf)
e7a346
+glusterd_restart_bricks (void *opaque)
e7a346
 {
e7a346
         int                   ret            = 0;
e7a346
         glusterd_volinfo_t   *volinfo        = NULL;
e7a346
@@ -5983,6 +6087,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
e7a346
         glusterd_snap_t      *snap           = NULL;
e7a346
         gf_boolean_t          start_svcs     = _gf_false;
e7a346
         xlator_t             *this           = NULL;
e7a346
+        glusterd_conf_t      *conf           = NULL;
e7a346
         int                   active_count   = 0;
e7a346
         int                   quorum_count   = 0;
e7a346
         gf_boolean_t          node_quorum    = _gf_false;
e7a346
@@ -5993,6 +6098,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
e7a346
         conf = this->private;
e7a346
         GF_VALIDATE_OR_GOTO (this->name, conf, return_block);
e7a346
 
e7a346
+        /* We need to ensure that restarting the bricks during glusterd restart
e7a346
+         * shouldn't race with the import volume thread (refer
e7a346
+         * glusterd_compare_friend_data ())
e7a346
+         */
e7a346
+        while (conf->restart_bricks) {
e7a346
+                synclock_unlock (&conf->big_lock);
e7a346
+                sleep (2);
e7a346
+                synclock_lock (&conf->big_lock);
e7a346
+        }
e7a346
+        conf->restart_bricks = _gf_true;
e7a346
+
e7a346
         ++(conf->blockers);
e7a346
         ret = glusterd_get_quorum_cluster_counts (this, &active_count,
e7a346
                                                   &quorum_count);
e7a346
@@ -6003,8 +6119,9 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
e7a346
                 node_quorum = _gf_true;
e7a346
 
e7a346
         cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
e7a346
-                if (volinfo->status != GLUSTERD_STATUS_STARTED)
e7a346
+                if (volinfo->status != GLUSTERD_STATUS_STARTED) {
e7a346
                         continue;
e7a346
+                }
e7a346
                 gf_msg_debug (this->name, 0, "starting the volume %s",
e7a346
                         volinfo->volname);
e7a346
 
e7a346
@@ -6111,6 +6228,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
e7a346
 out:
e7a346
         --(conf->blockers);
e7a346
         conf->restart_done = _gf_true;
e7a346
+        conf->restart_bricks = _gf_false;
e7a346
 
e7a346
 return_block:
e7a346
         return ret;
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
e7a346
index 9194da0..3b82b1e 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
e7a346
@@ -245,6 +245,10 @@ glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node);
e7a346
 int
e7a346
 glusterd_remote_hostname_get (rpcsvc_request_t *req,
e7a346
                               char *remote_host, int len);
e7a346
+
e7a346
+int32_t
e7a346
+glusterd_import_friend_volumes_synctask (void *opaque);
e7a346
+
e7a346
 int32_t
e7a346
 glusterd_import_friend_volumes (dict_t *peer_data);
e7a346
 void
e7a346
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
e7a346
index 3ad5ed6..b0656e6 100644
e7a346
--- a/xlators/mgmt/glusterd/src/glusterd.h
e7a346
+++ b/xlators/mgmt/glusterd/src/glusterd.h
e7a346
@@ -199,6 +199,7 @@ typedef struct {
e7a346
         int32_t                    workers;
e7a346
         uint32_t                   blockers;
e7a346
         uint32_t                   mgmt_v3_lock_timeout;
e7a346
+        gf_boolean_t               restart_bricks;
e7a346
 } glusterd_conf_t;
e7a346
 
e7a346
 
e7a346
@@ -1077,7 +1078,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
e7a346
                                     dict_t  *volumes, int   count);
e7a346
 
e7a346
 int
e7a346
-glusterd_restart_bricks (glusterd_conf_t *conf);
e7a346
+glusterd_restart_bricks ();
e7a346
 
e7a346
 int32_t
e7a346
 glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags,
e7a346
-- 
e7a346
1.8.3.1
e7a346