d1681e
From 2f5d6b2923a7f9fe74cf820e5a4cdf894eb0a2bd Mon Sep 17 00:00:00 2001
d1681e
From: Atin Mukherjee <amukherj@redhat.com>
d1681e
Date: Thu, 8 Feb 2018 09:09:00 +0530
d1681e
Subject: [PATCH 147/148] glusterd: import volumes in separate synctask
d1681e
d1681e
With brick multiplexing, to attach a brick to an existing brick process
d1681e
the prerequisite is to have the compatible brick to finish it's
d1681e
initialization and portmap sign in and hence the thread might have to go
d1681e
to a sleep and context switch the synctask to allow the brick process to
d1681e
communicate with glusterd. In normal code path, this works fine as
d1681e
glusterd_restart_bricks () is launched through a separate synctask.
d1681e
d1681e
In case there's a mismatch of the volume when glusterd restarts,
d1681e
glusterd_import_friend_volume is invoked and then it tries to call
d1681e
glusterd_start_bricks () from the main thread which eventually may land
d1681e
into the similar situation. Now since this is not done through a
d1681e
separate synctask, the 1st brick will never be able to get its turn to
d1681e
finish all of its handshaking and as a consequence to it, all the bricks
d1681e
will fail to get attached to it.
d1681e
d1681e
Solution : Execute import volume and glusterd restart bricks in separate
d1681e
synctask. Importing snaps had to be also done through synctask as
d1681e
there's a dependency of the parent volume need to be available for the
d1681e
importing snap functionality to work.
d1681e
d1681e
>upstream mainline patch : https://review.gluster.org/#/c/19357
d1681e
                           https://review.gluster.org/#/c/19536/
d1681e
                           https://review.gluster.org/#/c/19539/
d1681e
d1681e
Change-Id: I290b244d456afcc9b913ab30be4af040d340428c
d1681e
BUG: 1540600
d1681e
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/129937
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d1681e
---
d1681e
 ...e-with-other-processes-accessing-mounted-path.t |  13 ++
d1681e
 xlators/mgmt/glusterd/src/glusterd-op-sm.c         |   9 +-
d1681e
 xlators/mgmt/glusterd/src/glusterd-op-sm.h         |   2 +
d1681e
 .../mgmt/glusterd/src/glusterd-snapshot-utils.c    | 229 +++++++++++++++++----
d1681e
 xlators/mgmt/glusterd/src/glusterd-utils.c         | 166 ++++++++++++---
d1681e
 xlators/mgmt/glusterd/src/glusterd-utils.h         |   4 +
d1681e
 xlators/mgmt/glusterd/src/glusterd.h               |   3 +-
d1681e
 7 files changed, 356 insertions(+), 70 deletions(-)
d1681e
d1681e
diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
d1681e
index c5a0088..22f98d2 100644
d1681e
--- a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
d1681e
+++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
d1681e
@@ -92,20 +92,33 @@ EXPECT "0" mounted_snaps ${V1}
d1681e
 # handled during handshake.
d1681e
 
d1681e
 activate_snapshots
d1681e
+
d1681e
+EXPECT 'Started' snapshot_status ${V0}_snap;
d1681e
+EXPECT 'Started' snapshot_status ${V1}_snap;
d1681e
+
d1681e
 kill_glusterd 2
d1681e
+
d1681e
 deactivate_snapshots
d1681e
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
d1681e
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
d1681e
+
d1681e
 TEST start_glusterd 2
d1681e
 
d1681e
 # Updates form friend should reflect as snap was deactivated while glusterd
d1681e
 # process was inactive and mount point should also not exist.
d1681e
 
d1681e
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
d1681e
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V0}
d1681e
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V1}
d1681e
 
d1681e
 kill_glusterd 2
d1681e
 activate_snapshots
d1681e
+EXPECT 'Started' snapshot_status ${V0}_snap;
d1681e
+EXPECT 'Started' snapshot_status ${V1}_snap;
d1681e
 TEST start_glusterd 2
d1681e
 
d1681e
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
d1681e
+
d1681e
 # Updates form friend should reflect as snap was activated while glusterd
d1681e
 # process was inactive and mount point should exist.
d1681e
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" mounted_snaps ${V0}
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
d1681e
index 2fc2e3b..81cde21 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
d1681e
@@ -2426,6 +2426,7 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
d1681e
 
d1681e
 int
d1681e
 glusterd_start_bricks (glusterd_volinfo_t *volinfo)
d1681e
+
d1681e
 {
d1681e
         int                      ret            = -1;
d1681e
         glusterd_brickinfo_t    *brickinfo      = NULL;
d1681e
@@ -2454,14 +2455,6 @@ glusterd_start_bricks (glusterd_volinfo_t *volinfo)
d1681e
                                 goto out;
d1681e
                         }
d1681e
                 }
d1681e
-
d1681e
-        }
d1681e
-        ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
d1681e
-        if (ret) {
d1681e
-                gf_msg (THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
d1681e
-                        "Failed to write volinfo for volume %s",
d1681e
-                        volinfo->volname);
d1681e
-                goto out;
d1681e
         }
d1681e
         ret = 0;
d1681e
 out:
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
d1681e
index 48275c5..24b1944 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
d1681e
@@ -275,8 +275,10 @@ glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size,
d1681e
                 int32_t blk_count, double *throughput, double *time);
d1681e
 gf_boolean_t
d1681e
 glusterd_is_volume_started (glusterd_volinfo_t  *volinfo);
d1681e
+
d1681e
 int
d1681e
 glusterd_start_bricks (glusterd_volinfo_t *volinfo);
d1681e
+
d1681e
 gf_boolean_t
d1681e
 glusterd_are_all_volumes_stopped ();
d1681e
 int
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
d1681e
index 3f03d2b..ad206f6 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
d1681e
@@ -1758,8 +1758,11 @@ out:
d1681e
  * state, i.e either both would be hosting bricks or both would not be hosting
d1681e
  * bricks, then a decision can't be taken and a peer-reject will happen.
d1681e
  *
d1681e
- * glusterd_compare_and_update_snap() implements the following algorithm to
d1681e
- * perform the above task:
d1681e
+ * glusterd_compare_snap()  & glusterd_update_snaps () implement the following
d1681e
+ * algorithm to perform the above task. Please note the former function tries to
d1681e
+ * iterate over the snaps one at a time and updating the relevant fields in the
d1681e
+ * dictionary and then glusterd_update_snaps () go over all the snaps and update
d1681e
+ * them at one go as part of a synctask.
d1681e
  * Step  1: Start.
d1681e
  * Step  2: Check if the peer is missing a delete or restore on the said snap.
d1681e
  *          If yes, goto step 6.
d1681e
@@ -1784,21 +1787,18 @@ out:
d1681e
  *
d1681e
  */
d1681e
 int32_t
d1681e
-glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
d1681e
-                                  char *peername, uuid_t peerid)
d1681e
+glusterd_compare_snap (dict_t *peer_data, int32_t snap_count,
d1681e
+                       char *peername, uuid_t peerid)
d1681e
 {
d1681e
         char              buf[NAME_MAX]    = "";
d1681e
         char              prefix[NAME_MAX] = "";
d1681e
         char             *peer_snap_name   = NULL;
d1681e
         char             *peer_snap_id     = NULL;
d1681e
-        dict_t           *dict             = NULL;
d1681e
         glusterd_snap_t  *snap             = NULL;
d1681e
         gf_boolean_t      conflict         = _gf_false;
d1681e
         gf_boolean_t      is_local         = _gf_false;
d1681e
         gf_boolean_t      is_hosted        = _gf_false;
d1681e
         gf_boolean_t      missed_delete    = _gf_false;
d1681e
-        gf_boolean_t      remove_lvm       = _gf_true;
d1681e
-
d1681e
         int32_t           ret              = -1;
d1681e
         int32_t           volcount         = 0;
d1681e
         xlator_t         *this             = NULL;
d1681e
@@ -1810,6 +1810,14 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
d1681e
 
d1681e
         snprintf (prefix, sizeof(prefix), "snap%d", snap_count);
d1681e
 
d1681e
+        ret = dict_set_uint32 (peer_data, buf, 0);
d1681e
+        snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
d1681e
+        ret = dict_set_uint32 (peer_data, buf, 0);
d1681e
+        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
d1681e
+        ret = dict_set_uint32 (peer_data, buf, 0);
d1681e
+        snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
d1681e
+        ret = dict_set_uint32 (peer_data, buf, 0);
d1681e
+
d1681e
         /* Fetch the peer's snapname */
d1681e
         snprintf (buf, sizeof(buf), "%s.snapname", prefix);
d1681e
         ret = dict_get_str (peer_data, buf, &peer_snap_name);
d1681e
@@ -1866,7 +1874,10 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
d1681e
                         /* Peer has snap with the same snapname
d1681e
                         * and snap_id, which local node doesn't have.
d1681e
                         */
d1681e
-                        goto accept_peer_data;
d1681e
+                        snprintf (buf, sizeof(buf), "%s.accept_peer_data",
d1681e
+                                  prefix);
d1681e
+                        ret = dict_set_uint32 (peer_data, buf, 1);
d1681e
+                        goto out;
d1681e
                 }
d1681e
                 /* Peer has snap with the same snapname
d1681e
                  * and snap_id. Now check if peer has a
d1681e
@@ -1893,12 +1904,18 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
d1681e
                          * When removing data from local node, make sure
d1681e
                          * we are not removing backend lvm of the snap.
d1681e
                          */
d1681e
-                        remove_lvm = _gf_false;
d1681e
-                        goto remove_my_data;
d1681e
+                        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
d1681e
+                        ret = dict_set_uint32 (peer_data, buf, 0);
d1681e
+                        snprintf (buf, sizeof(buf), "%s.remove_my_data",
d1681e
+                                  prefix);
d1681e
+                        ret = dict_set_uint32 (peer_data, buf, 1);
d1681e
+                        snprintf (buf, sizeof(buf), "%s.accept_peer_data",
d1681e
+                                  prefix);
d1681e
+                        ret = dict_set_uint32 (peer_data, buf, 1);
d1681e
                 } else {
d1681e
                         ret = 0;
d1681e
-                        goto out;
d1681e
                 }
d1681e
+                goto out;
d1681e
         }
d1681e
 
d1681e
         /* There is a conflict. Check if the current node is
d1681e
@@ -1950,50 +1967,176 @@ glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count,
d1681e
          * And local node isn't. Hence remove local node's
d1681e
          * data and accept peer data
d1681e
          */
d1681e
-
d1681e
         gf_msg_debug (this->name, 0, "Peer hosts bricks for conflicting "
d1681e
                 "snap(%s). Removing local data. Accepting peer data.",
d1681e
                 peer_snap_name);
d1681e
-        remove_lvm = _gf_true;
d1681e
+        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
d1681e
+        ret = dict_set_uint32 (peer_data, buf, 1);
d1681e
+        snprintf (buf, sizeof(buf), "%s.remove_my_data",
d1681e
+                  prefix);
d1681e
+        ret = dict_set_uint32 (peer_data, buf, 1);
d1681e
+        snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
d1681e
+        ret = dict_set_uint32 (peer_data, buf, 1);
d1681e
 
d1681e
-remove_my_data:
d1681e
+out:
d1681e
+        gf_msg_trace (this->name, 0, "Returning %d", ret);
d1681e
+        return ret;
d1681e
+}
d1681e
 
d1681e
-        dict = dict_new();
d1681e
-        if (!dict) {
d1681e
-                gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
-                        GD_MSG_DICT_CREATE_FAIL,
d1681e
-                        "Unable to create dict");
d1681e
-                ret = -1;
d1681e
-                goto out;
d1681e
+int32_t
d1681e
+glusterd_update_snaps_synctask (void *opaque)
d1681e
+{
d1681e
+        int32_t           ret              = -1;
d1681e
+        int32_t           snap_count       = 0;
d1681e
+        int               i                = 1;
d1681e
+        xlator_t         *this             = NULL;
d1681e
+        dict_t           *peer_data        = NULL;
d1681e
+        char              buf[NAME_MAX]    = "";
d1681e
+        char              prefix[NAME_MAX] = "";
d1681e
+        char             *peer_snap_name   = NULL;
d1681e
+        char             *peer_snap_id     = NULL;
d1681e
+        char             *peername         = NULL;
d1681e
+        gf_boolean_t      remove_lvm       = _gf_false;
d1681e
+        gf_boolean_t      remove_my_data   = _gf_false;
d1681e
+        gf_boolean_t      accept_peer_data = _gf_false;
d1681e
+        int32_t           val              = 0;
d1681e
+        glusterd_snap_t  *snap             = NULL;
d1681e
+        dict_t           *dict             = NULL;
d1681e
+        glusterd_conf_t  *conf             = NULL;
d1681e
+
d1681e
+        this = THIS;
d1681e
+        GF_ASSERT (this);
d1681e
+
d1681e
+        conf = this->private;
d1681e
+        GF_ASSERT (conf);
d1681e
+
d1681e
+        peer_data = (dict_t *)opaque;
d1681e
+        GF_ASSERT (peer_data);
d1681e
+
d1681e
+        synclock_lock (&conf->big_lock);
d1681e
+
d1681e
+        while (conf->restart_bricks) {
d1681e
+                synclock_unlock (&conf->big_lock);
d1681e
+                sleep (2);
d1681e
+                synclock_lock (&conf->big_lock);
d1681e
         }
d1681e
+        conf->restart_bricks = _gf_true;
d1681e
 
d1681e
-        ret = glusterd_snap_remove (dict, snap, remove_lvm, _gf_false,
d1681e
-                                    _gf_false);
d1681e
+        ret = dict_get_int32 (peer_data, "snap_count", &snap_count);
d1681e
         if (ret) {
d1681e
                 gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
-                        GD_MSG_SNAP_REMOVE_FAIL,
d1681e
-                        "Failed to remove snap %s", snap->snapname);
d1681e
+                        GD_MSG_DICT_GET_FAILED, "Failed to fetch snap_count");
d1681e
                 goto out;
d1681e
         }
d1681e
-
d1681e
-accept_peer_data:
d1681e
-
d1681e
-        /* Accept Peer Data */
d1681e
-        ret = glusterd_import_friend_snap (peer_data, snap_count,
d1681e
-                                           peer_snap_name, peer_snap_id);
d1681e
+        ret = dict_get_str (peer_data, "peername", &peername);
d1681e
         if (ret) {
d1681e
                 gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
-                        GD_MSG_SNAP_IMPORT_FAIL,
d1681e
-                        "Failed to import snap %s from peer %s",
d1681e
-                        peer_snap_name, peername);
d1681e
+                        GD_MSG_DICT_GET_FAILED, "Failed to fetch peername");
d1681e
                 goto out;
d1681e
         }
d1681e
 
d1681e
+        for (i = 1; i <= snap_count; i++) {
d1681e
+                snprintf (prefix, sizeof(prefix), "snap%d", i);
d1681e
+
d1681e
+                /* Fetch the peer's snapname */
d1681e
+                snprintf (buf, sizeof(buf), "%s.snapname", prefix);
d1681e
+                ret = dict_get_str (peer_data, buf, &peer_snap_name);
d1681e
+                if (ret) {
d1681e
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
+                                GD_MSG_DICT_GET_FAILED,
d1681e
+                                "Unable to fetch snapname from peer: %s",
d1681e
+                                peername);
d1681e
+                        goto out;
d1681e
+                }
d1681e
+
d1681e
+                /* Fetch the peer's snap_id */
d1681e
+                snprintf (buf, sizeof(buf), "%s.snap_id", prefix);
d1681e
+                ret = dict_get_str (peer_data, buf, &peer_snap_id);
d1681e
+                if (ret) {
d1681e
+                        gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
+                                GD_MSG_DICT_GET_FAILED,
d1681e
+                                "Unable to fetch snap_id from peer: %s",
d1681e
+                                peername);
d1681e
+                        goto out;
d1681e
+                }
d1681e
+
d1681e
+                /* remove_my_data */
d1681e
+                snprintf (buf, sizeof(buf), "%s.remove_my_data", prefix);
d1681e
+                ret = dict_get_int32 (peer_data, buf, &val;;
d1681e
+                if (val)
d1681e
+                        remove_my_data = _gf_true;
d1681e
+                else
d1681e
+                        remove_my_data = _gf_false;
d1681e
+
d1681e
+                if (remove_my_data) {
d1681e
+                        snprintf (buf, sizeof(buf), "%s.remove_lvm", prefix);
d1681e
+                        ret = dict_get_int32 (peer_data, buf, &val;;
d1681e
+                        if (val)
d1681e
+                                remove_lvm = _gf_true;
d1681e
+                        else
d1681e
+                                remove_lvm = _gf_false;
d1681e
+
d1681e
+                        dict = dict_new();
d1681e
+                        if (!dict) {
d1681e
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
+                                        GD_MSG_DICT_CREATE_FAIL,
d1681e
+                                        "Unable to create dict");
d1681e
+                                ret = -1;
d1681e
+                                goto out;
d1681e
+                        }
d1681e
+                        snap = glusterd_find_snap_by_name (peer_snap_name);
d1681e
+                        if (!snap) {
d1681e
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
+                                        GD_MSG_MISSED_SNAP_PRESENT,
d1681e
+                                        "Snapshot %s from peer %s missing on "
d1681e
+                                        "localhost", peer_snap_name,
d1681e
+                                        peername);
d1681e
+                                ret = -1;
d1681e
+                                goto out;
d1681e
+                        }
d1681e
+
d1681e
+                        ret = glusterd_snap_remove (dict, snap, remove_lvm,
d1681e
+                                                    _gf_false, _gf_false);
d1681e
+                        if (ret) {
d1681e
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
+                                        GD_MSG_SNAP_REMOVE_FAIL,
d1681e
+                                        "Failed to remove snap %s",
d1681e
+                                        snap->snapname);
d1681e
+                                goto out;
d1681e
+                        }
d1681e
+                        if (dict)
d1681e
+                                dict_unref (dict);
d1681e
+                }
d1681e
+                snprintf (buf, sizeof(buf), "%s.accept_peer_data", prefix);
d1681e
+                ret = dict_get_int32 (peer_data, buf, &val;;
d1681e
+                if (val)
d1681e
+                        accept_peer_data = _gf_true;
d1681e
+                else
d1681e
+                        accept_peer_data = _gf_false;
d1681e
+
d1681e
+                if (accept_peer_data) {
d1681e
+                        /* Accept Peer Data */
d1681e
+                        ret = glusterd_import_friend_snap (peer_data,
d1681e
+                                                           i,
d1681e
+                                                           peer_snap_name,
d1681e
+                                                           peer_snap_id);
d1681e
+                        if (ret) {
d1681e
+                                gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
+                                        GD_MSG_SNAP_IMPORT_FAIL,
d1681e
+                                        "Failed to import snap %s from peer %s",
d1681e
+                                        peer_snap_name, peername);
d1681e
+                                goto out;
d1681e
+                        }
d1681e
+                }
d1681e
+        }
d1681e
+
d1681e
 out:
d1681e
+        if (peer_data)
d1681e
+                dict_unref (peer_data);
d1681e
         if (dict)
d1681e
                 dict_unref (dict);
d1681e
+        conf->restart_bricks = _gf_false;
d1681e
 
d1681e
-        gf_msg_trace (this->name, 0, "Returning %d", ret);
d1681e
         return ret;
d1681e
 }
d1681e
 
d1681e
@@ -2008,6 +2151,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
d1681e
         int32_t          snap_count   = 0;
d1681e
         int              i            = 1;
d1681e
         xlator_t        *this         = NULL;
d1681e
+        dict_t          *peer_data_copy = NULL;
d1681e
 
d1681e
         this = THIS;
d1681e
         GF_ASSERT (this);
d1681e
@@ -2023,8 +2167,7 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
d1681e
 
d1681e
         for (i = 1; i <= snap_count; i++) {
d1681e
                 /* Compare one snapshot from peer_data at a time */
d1681e
-                ret = glusterd_compare_and_update_snap (peer_data, i, peername,
d1681e
-                                                        peerid);
d1681e
+                ret = glusterd_compare_snap (peer_data, i, peername, peerid);
d1681e
                 if (ret) {
d1681e
                         gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
                                 GD_MSG_SNAPSHOT_OP_FAILED,
d1681e
@@ -2033,6 +2176,18 @@ glusterd_compare_friend_snapshots (dict_t *peer_data, char *peername,
d1681e
                         goto out;
d1681e
                 }
d1681e
         }
d1681e
+        /* Update the snaps at one go */
d1681e
+        peer_data_copy = dict_copy_with_ref (peer_data, NULL);
d1681e
+        ret = dict_set_str (peer_data_copy, "peername", peername);
d1681e
+        if (ret) {
d1681e
+                gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
d1681e
+                        "Failed to set peername into the dict");
d1681e
+                if (peer_data_copy)
d1681e
+                        dict_unref (peer_data_copy);
d1681e
+                goto out;
d1681e
+        }
d1681e
+        glusterd_launch_synctask (glusterd_update_snaps_synctask,
d1681e
+                                  peer_data_copy);
d1681e
 
d1681e
 out:
d1681e
         gf_msg_trace (this->name, 0, "Returning %d", ret);
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
d1681e
index d991a9f..5deacde 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
d1681e
@@ -3448,6 +3448,14 @@ glusterd_compare_friend_volume (dict_t *peer_data, int32_t count,
d1681e
         *status = GLUSTERD_VOL_COMP_SCS;
d1681e
 
d1681e
 out:
d1681e
+        memset (key, 0, sizeof (key));
d1681e
+        snprintf (key, sizeof (key), "volume%d.update", count);
d1681e
+
d1681e
+        if (*status == GLUSTERD_VOL_COMP_UPDATE_REQ) {
d1681e
+                ret = dict_set_int32 (peer_data, key, 1);
d1681e
+        } else {
d1681e
+                ret = dict_set_int32 (peer_data, key, 0);
d1681e
+        }
d1681e
         if (*status == GLUSTERD_VOL_COMP_RJT) {
d1681e
                 gf_event (EVENT_COMPARE_FRIEND_VOLUME_FAILED, "volume=%s",
d1681e
                           volinfo->volname);
d1681e
@@ -3520,12 +3528,11 @@ glusterd_spawn_daemons (void *opaque)
d1681e
         int             ret     = -1;
d1681e
 
d1681e
         synclock_lock (&conf->big_lock);
d1681e
-        glusterd_restart_bricks (conf);
d1681e
+        glusterd_restart_bricks ();
d1681e
         glusterd_restart_gsyncds (conf);
d1681e
         glusterd_restart_rebalance (conf);
d1681e
         ret = glusterd_snapdsvc_restart ();
d1681e
         ret = glusterd_tierdsvc_restart ();
d1681e
-
d1681e
         return ret;
d1681e
 }
d1681e
 
d1681e
@@ -4291,20 +4298,35 @@ out:
d1681e
 int32_t
d1681e
 glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo)
d1681e
 {
d1681e
-        int                  ret = 0;
d1681e
-        glusterd_brickinfo_t *brickinfo = NULL;
d1681e
+        int                      ret        = 0;
d1681e
+        glusterd_brickinfo_t    *brickinfo  = NULL;
d1681e
+        glusterd_brick_proc_t   *brick_proc = NULL;
d1681e
+        int                      brick_count = 0;
d1681e
+
d1681e
         GF_ASSERT (volinfo);
d1681e
 
d1681e
         cds_list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
d1681e
                 if (glusterd_is_brick_started (brickinfo)) {
d1681e
-                        ret = glusterd_brick_disconnect (brickinfo);
d1681e
-                        if (ret) {
d1681e
-                                gf_msg ("glusterd", GF_LOG_ERROR, 0,
d1681e
-                                        GD_MSD_BRICK_DISCONNECT_FAIL,
d1681e
-                                        "Failed to "
d1681e
-                                        "disconnect %s:%s", brickinfo->hostname,
d1681e
-                                        brickinfo->path);
d1681e
-                                break;
d1681e
+                        /* If brick multiplexing is enabled then we can't
d1681e
+                         * blindly set brickinfo->rpc to NULL as it might impact
d1681e
+                         * the other attached bricks.
d1681e
+                         */
d1681e
+                        ret = glusterd_brick_proc_for_port (brickinfo->port,
d1681e
+                                                            &brick_proc);
d1681e
+                        if (!ret) {
d1681e
+                                brick_count = brick_proc->brick_count;
d1681e
+                        }
d1681e
+                        if (!is_brick_mx_enabled () || brick_count == 0) {
d1681e
+                                ret = glusterd_brick_disconnect (brickinfo);
d1681e
+                                if (ret) {
d1681e
+                                        gf_msg ("glusterd", GF_LOG_ERROR, 0,
d1681e
+                                                GD_MSD_BRICK_DISCONNECT_FAIL,
d1681e
+                                                "Failed to "
d1681e
+                                                "disconnect %s:%s",
d1681e
+                                                brickinfo->hostname,
d1681e
+                                                brickinfo->path);
d1681e
+                                        break;
d1681e
+                                }
d1681e
                         }
d1681e
                 }
d1681e
         }
d1681e
@@ -4543,7 +4565,7 @@ out:
d1681e
 }
d1681e
 
d1681e
 int32_t
d1681e
-glusterd_import_friend_volume (dict_t *peer_data, size_t count)
d1681e
+glusterd_import_friend_volume (dict_t *peer_data, int count)
d1681e
 {
d1681e
 
d1681e
         int32_t                 ret = -1;
d1681e
@@ -4552,6 +4574,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
d1681e
         glusterd_volinfo_t      *old_volinfo = NULL;
d1681e
         glusterd_volinfo_t      *new_volinfo = NULL;
d1681e
         glusterd_svc_t          *svc         = NULL;
d1681e
+        int32_t                  update      = 0;
d1681e
+        char                     key[512]    = {0,};
d1681e
 
d1681e
         GF_ASSERT (peer_data);
d1681e
 
d1681e
@@ -4559,6 +4583,15 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
d1681e
         GF_ASSERT (this);
d1681e
         priv = this->private;
d1681e
         GF_ASSERT (priv);
d1681e
+
d1681e
+        memset (key, 0, sizeof (key));
d1681e
+        snprintf (key, sizeof (key), "volume%d.update", count);
d1681e
+        ret = dict_get_int32 (peer_data, key, &update);
d1681e
+        if (ret || !update) {
d1681e
+                /* if update is 0 that means the volume is not imported */
d1681e
+                goto out;
d1681e
+        }
d1681e
+
d1681e
         ret = glusterd_import_volinfo (peer_data, count,
d1681e
                                        &new_volinfo, "volume");
d1681e
         if (ret)
d1681e
@@ -4572,6 +4605,14 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
d1681e
 
d1681e
         ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo);
d1681e
         if (0 == ret) {
d1681e
+                if (new_volinfo->version <= old_volinfo->version) {
d1681e
+                        /* When this condition is true, it already means that
d1681e
+                         * the other synctask thread of import volume has
d1681e
+                         * already up to date volume, so just ignore this volume
d1681e
+                         * now
d1681e
+                         */
d1681e
+                        goto out;
d1681e
+                }
d1681e
                 /* Ref count the old_volinfo such that deleting it doesn't crash
d1681e
                  * if its been already in use by other thread
d1681e
                  */
d1681e
@@ -4602,7 +4643,8 @@ glusterd_import_friend_volume (dict_t *peer_data, size_t count)
d1681e
                 }
d1681e
         }
d1681e
 
d1681e
-        ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
d1681e
+        ret = glusterd_store_volinfo (new_volinfo,
d1681e
+                                      GLUSTERD_VOLINFO_VER_AC_NONE);
d1681e
         if (ret) {
d1681e
                 gf_msg (this->name, GF_LOG_ERROR, 0,
d1681e
                         GD_MSG_VOLINFO_STORE_FAIL, "Failed to store "
d1681e
@@ -4630,6 +4672,60 @@ out:
d1681e
 }
d1681e
 
d1681e
 int32_t
d1681e
+glusterd_import_friend_volumes_synctask (void *opaque)
d1681e
+{
d1681e
+        int32_t                 ret = -1;
d1681e
+        int32_t                 count = 0;
d1681e
+        int                     i = 1;
d1681e
+        xlator_t                *this = NULL;
d1681e
+        glusterd_conf_t         *conf = NULL;
d1681e
+        dict_t *peer_data         = NULL;
d1681e
+
d1681e
+        this = THIS;
d1681e
+        GF_ASSERT (this);
d1681e
+
d1681e
+        conf = this->private;
d1681e
+        GF_ASSERT (conf);
d1681e
+
d1681e
+        peer_data = (dict_t *)opaque;
d1681e
+        GF_ASSERT (peer_data);
d1681e
+
d1681e
+        ret = dict_get_int32 (peer_data, "count", &count);
d1681e
+        if (ret)
d1681e
+                goto out;
d1681e
+
d1681e
+        synclock_lock (&conf->big_lock);
d1681e
+
d1681e
+        /* We need to ensure that importing a volume shouldn't race with an
d1681e
+         * other thread where as part of restarting glusterd, bricks are
d1681e
+         * restarted (refer glusterd_restart_bricks ())
d1681e
+         */
d1681e
+        while (conf->restart_bricks) {
d1681e
+                synclock_unlock (&conf->big_lock);
d1681e
+                sleep (2);
d1681e
+                synclock_lock (&conf->big_lock);
d1681e
+        }
d1681e
+        conf->restart_bricks = _gf_true;
d1681e
+
d1681e
+        while (i <= count) {
d1681e
+                ret = glusterd_import_friend_volume (peer_data, i);
d1681e
+                if (ret) {
d1681e
+                        conf->restart_bricks = _gf_false;
d1681e
+                        goto out;
d1681e
+                }
d1681e
+                i++;
d1681e
+        }
d1681e
+        glusterd_svcs_manager (NULL);
d1681e
+        conf->restart_bricks = _gf_false;
d1681e
+out:
d1681e
+        if (peer_data)
d1681e
+                dict_unref (peer_data);
d1681e
+
d1681e
+        gf_msg_debug ("glusterd", 0, "Returning with %d", ret);
d1681e
+        return ret;
d1681e
+}
d1681e
+
d1681e
+int32_t
d1681e
 glusterd_import_friend_volumes (dict_t *peer_data)
d1681e
 {
d1681e
         int32_t                 ret = -1;
d1681e
@@ -4768,8 +4864,10 @@ glusterd_import_global_opts (dict_t *friend_data)
d1681e
                  * recompute if quorum is met. If quorum is not met bricks are
d1681e
                  * not started and those already running are stopped
d1681e
                  */
d1681e
-                if (old_quorum != new_quorum)
d1681e
-                        glusterd_restart_bricks (conf);
d1681e
+                if (old_quorum != new_quorum) {
d1681e
+                        glusterd_launch_synctask (glusterd_restart_bricks,
d1681e
+                                                  NULL);
d1681e
+                }
d1681e
         }
d1681e
 
d1681e
         ret = 0;
d1681e
@@ -4789,6 +4887,7 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
d1681e
         gf_boolean_t     update    = _gf_false;
d1681e
         xlator_t        *this      = NULL;
d1681e
         glusterd_conf_t *priv      = NULL;
d1681e
+        dict_t          *peer_data_copy = NULL;
d1681e
 
d1681e
         this = THIS;
d1681e
         GF_ASSERT (this);
d1681e
@@ -4820,18 +4919,23 @@ glusterd_compare_friend_data (dict_t *peer_data, int32_t *status,
d1681e
                         goto out;
d1681e
                 }
d1681e
                 if (GLUSTERD_VOL_COMP_UPDATE_REQ == *status) {
d1681e
-                        ret = glusterd_import_friend_volume (peer_data, i);
d1681e
-                        if (ret) {
d1681e
-                                goto out;
d1681e
-                        }
d1681e
                         update = _gf_true;
d1681e
-                        *status = GLUSTERD_VOL_COMP_NONE;
d1681e
                 }
d1681e
                 i++;
d1681e
         }
d1681e
 
d1681e
         if (update) {
d1681e
-                glusterd_svcs_manager (NULL);
d1681e
+                /* Launch the import friend volume as a separate synctask as it
d1681e
+                 * has to trigger start bricks where we may need to wait for the
d1681e
+                 * first brick to come up before attaching the subsequent bricks
d1681e
+                 * in case brick multiplexing is enabled
d1681e
+                 */
d1681e
+                peer_data_copy = dict_copy_with_ref (peer_data, NULL);
d1681e
+                glusterd_launch_synctask
d1681e
+                        (glusterd_import_friend_volumes_synctask,
d1681e
+                         peer_data_copy);
d1681e
+                if (ret)
d1681e
+                        goto out;
d1681e
         }
d1681e
 
d1681e
 out:
d1681e
@@ -5975,7 +6079,7 @@ out:
d1681e
 }
d1681e
 
d1681e
 int
d1681e
-glusterd_restart_bricks (glusterd_conf_t *conf)
d1681e
+glusterd_restart_bricks (void *opaque)
d1681e
 {
d1681e
         int                   ret            = 0;
d1681e
         glusterd_volinfo_t   *volinfo        = NULL;
d1681e
@@ -5983,6 +6087,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
d1681e
         glusterd_snap_t      *snap           = NULL;
d1681e
         gf_boolean_t          start_svcs     = _gf_false;
d1681e
         xlator_t             *this           = NULL;
d1681e
+        glusterd_conf_t      *conf           = NULL;
d1681e
         int                   active_count   = 0;
d1681e
         int                   quorum_count   = 0;
d1681e
         gf_boolean_t          node_quorum    = _gf_false;
d1681e
@@ -5993,6 +6098,17 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
d1681e
         conf = this->private;
d1681e
         GF_VALIDATE_OR_GOTO (this->name, conf, return_block);
d1681e
 
d1681e
+        /* We need to ensure that restarting the bricks during glusterd restart
d1681e
+         * shouldn't race with the import volume thread (refer
d1681e
+         * glusterd_compare_friend_data ())
d1681e
+         */
d1681e
+        while (conf->restart_bricks) {
d1681e
+                synclock_unlock (&conf->big_lock);
d1681e
+                sleep (2);
d1681e
+                synclock_lock (&conf->big_lock);
d1681e
+        }
d1681e
+        conf->restart_bricks = _gf_true;
d1681e
+
d1681e
         ++(conf->blockers);
d1681e
         ret = glusterd_get_quorum_cluster_counts (this, &active_count,
d1681e
                                                   &quorum_count);
d1681e
@@ -6003,8 +6119,9 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
d1681e
                 node_quorum = _gf_true;
d1681e
 
d1681e
         cds_list_for_each_entry (volinfo, &conf->volumes, vol_list) {
d1681e
-                if (volinfo->status != GLUSTERD_STATUS_STARTED)
d1681e
+                if (volinfo->status != GLUSTERD_STATUS_STARTED) {
d1681e
                         continue;
d1681e
+                }
d1681e
                 gf_msg_debug (this->name, 0, "starting the volume %s",
d1681e
                         volinfo->volname);
d1681e
 
d1681e
@@ -6111,6 +6228,7 @@ glusterd_restart_bricks (glusterd_conf_t *conf)
d1681e
 out:
d1681e
         --(conf->blockers);
d1681e
         conf->restart_done = _gf_true;
d1681e
+        conf->restart_bricks = _gf_false;
d1681e
 
d1681e
 return_block:
d1681e
         return ret;
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h
d1681e
index 9194da0..3b82b1e 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.h
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.h
d1681e
@@ -245,6 +245,10 @@ glusterd_pending_node_put_rpc (glusterd_pending_node_t *pending_node);
d1681e
 int
d1681e
 glusterd_remote_hostname_get (rpcsvc_request_t *req,
d1681e
                               char *remote_host, int len);
d1681e
+
d1681e
+int32_t
d1681e
+glusterd_import_friend_volumes_synctask (void *opaque);
d1681e
+
d1681e
 int32_t
d1681e
 glusterd_import_friend_volumes (dict_t *peer_data);
d1681e
 void
d1681e
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
d1681e
index 3ad5ed6..b0656e6 100644
d1681e
--- a/xlators/mgmt/glusterd/src/glusterd.h
d1681e
+++ b/xlators/mgmt/glusterd/src/glusterd.h
d1681e
@@ -199,6 +199,7 @@ typedef struct {
d1681e
         int32_t                    workers;
d1681e
         uint32_t                   blockers;
d1681e
         uint32_t                   mgmt_v3_lock_timeout;
d1681e
+        gf_boolean_t               restart_bricks;
d1681e
 } glusterd_conf_t;
d1681e
 
d1681e
 
d1681e
@@ -1077,7 +1078,7 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
d1681e
                                     dict_t  *volumes, int   count);
d1681e
 
d1681e
 int
d1681e
-glusterd_restart_bricks (glusterd_conf_t *conf);
d1681e
+glusterd_restart_bricks ();
d1681e
 
d1681e
 int32_t
d1681e
 glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags,
d1681e
-- 
d1681e
1.8.3.1
d1681e