21ab4e
From 91add881bca659d456c0738c611c001babb62cbb Mon Sep 17 00:00:00 2001
21ab4e
From: Jeff Darcy <jdarcy@redhat.com>
21ab4e
Date: Fri, 3 Feb 2017 10:51:21 -0500
21ab4e
Subject: [PATCH 332/361] glusterd: keep snapshot bricks separate from regular
21ab4e
 ones
21ab4e
21ab4e
The problem here is that a volume's transport options can change, but
21ab4e
any snapshots' bricks don't follow along even though they're now
21ab4e
incompatible (with respect to multiplexing).  This was causing the
21ab4e
USS+SSL test to fail.  By keeping the snapshot bricks separate
21ab4e
(though still potentially multiplexed with other snapshot bricks
21ab4e
including those for other volumes) we can ensure that they remain
21ab4e
unaffected by changes to their parent volumes.
21ab4e
21ab4e
Also fixed various issues with how the test waits (or more precisely
21ab4e
didn't) for various events to complete before it continues.
21ab4e
21ab4e
mainline:
21ab4e
> BUG: 1385758
21ab4e
> Reviewed-on: https://review.gluster.org/16544
21ab4e
> Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
> Reviewed-by: Avra Sengupta <asengupt@redhat.com>
21ab4e
> Tested-by: Avra Sengupta <asengupt@redhat.com>
21ab4e
(cherry picked from commit 45c3d3f3c505c772446bb63e57ec7e3477ad4042)
21ab4e
21ab4e
BUG: 1417815
21ab4e
Change-Id: Iab4a8a44fac5760373fac36956a3bcc27cf969da
21ab4e
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/101313
21ab4e
Tested-by: Milind Changire <mchangir@redhat.com>
21ab4e
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
---
21ab4e
 tests/bugs/snapshot/bug-1399598-uss-with-ssl.t |  36 +++--
21ab4e
 tests/volume.rc                                |   1 +
21ab4e
 xlators/mgmt/glusterd/src/glusterd-utils.c     | 206 +++++++++++++++----------
21ab4e
 3 files changed, 148 insertions(+), 95 deletions(-)
21ab4e
21ab4e
diff --git a/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
21ab4e
index 1c50f74..7d62526 100755
21ab4e
--- a/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
21ab4e
+++ b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
21ab4e
@@ -16,6 +16,13 @@ function volume_online_brick_count
21ab4e
         $CLI volume status $V0 | awk '$1 == "Brick" &&  $6 != "N/A" { print $6}' | wc -l;
21ab4e
 }
21ab4e
 
21ab4e
+function total_online_bricks
21ab4e
+{
21ab4e
+        # This will count snapd, which isn't really a brick, but callers can
21ab4e
+        # account for that so it's OK.
21ab4e
+        find $GLUSTERD_WORKDIR -name '*.pid' | wc -l
21ab4e
+}
21ab4e
+
21ab4e
 cleanup;
21ab4e
 
21ab4e
 # Initialize the test setup
21ab4e
@@ -26,15 +33,17 @@ TEST create_self_signed_certs
21ab4e
 # Start glusterd
21ab4e
 TEST glusterd
21ab4e
 TEST pidof glusterd;
21ab4e
+#EST $CLI volume set all cluster.brick-multiplex on
21ab4e
 
21ab4e
 # Create and start the volume
21ab4e
 TEST $CLI volume create $V0 $H0:$L1/b1;
21ab4e
 
21ab4e
 TEST $CLI volume start $V0;
21ab4e
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" total_online_bricks
21ab4e
 
21ab4e
 # Mount the volume and create some files
21ab4e
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
21ab4e
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
21ab4e
 
21ab4e
 TEST touch $M0/file;
21ab4e
 
21ab4e
@@ -43,12 +52,13 @@ TEST $CLI snapshot config activate-on-create enable;
21ab4e
 
21ab4e
 # Create a snapshot
21ab4e
 TEST $CLI snapshot create snap1 $V0 no-timestamp;
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" total_online_bricks
21ab4e
 
21ab4e
 TEST $CLI volume set $V0 features.uss enable;
21ab4e
-
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
21ab4e
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
21ab4e
 
21ab4e
-EXPECT "Y" file_exists $M0/file
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
21ab4e
 # Volume set can trigger graph switch therefore chances are we send this
21ab4e
 # req to old graph. Old graph will not have .snaps. Therefore we should
21ab4e
 # wait for some time.
21ab4e
@@ -63,14 +73,14 @@ killall_gluster
21ab4e
 TEST glusterd
21ab4e
 TEST pidof glusterd;
21ab4e
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
21ab4e
 
21ab4e
 # Mount the volume
21ab4e
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
21ab4e
-
21ab4e
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
21ab4e
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
21ab4e
 
21ab4e
-EXPECT "Y" file_exists $M0/file
21ab4e
-EXPECT "Y" file_exists $M0/.snaps/snap1/file
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
21ab4e
 
21ab4e
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
21ab4e
 
21ab4e
@@ -82,14 +92,14 @@ killall_gluster
21ab4e
 
21ab4e
 TEST glusterd
21ab4e
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
21ab4e
 
21ab4e
 # Mount the volume
21ab4e
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
21ab4e
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
21ab4e
 
21ab4e
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
21ab4e
-
21ab4e
-EXPECT "Y" file_exists $M0/file
21ab4e
-EXPECT "Y" file_exists $M0/.snaps/snap1/file
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
21ab4e
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
21ab4e
 
21ab4e
 TEST $CLI snapshot delete all
21ab4e
 TEST $CLI volume stop $V0
21ab4e
diff --git a/tests/volume.rc b/tests/volume.rc
21ab4e
index 25918a4..7b13e13 100644
21ab4e
--- a/tests/volume.rc
21ab4e
+++ b/tests/volume.rc
21ab4e
@@ -476,6 +476,7 @@ function volume_exists() {
21ab4e
 
21ab4e
 function killall_gluster() {
21ab4e
         pkill gluster
21ab4e
+        find $GLUSTERD_WORKDIR -name '*.pid' | xargs rm -f
21ab4e
         sleep 1
21ab4e
 }
21ab4e
 
21ab4e
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
index 3539b54..9a294a8 100644
21ab4e
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
21ab4e
@@ -5056,43 +5056,6 @@ attach_brick (xlator_t *this,
21ab4e
         return ret;
21ab4e
 }
21ab4e
 
21ab4e
-static glusterd_brickinfo_t *
21ab4e
-find_compatible_brick_in_volume (glusterd_conf_t *conf,
21ab4e
-                                 glusterd_volinfo_t *volinfo,
21ab4e
-                                 glusterd_brickinfo_t *brickinfo)
21ab4e
-{
21ab4e
-        xlator_t                *this                   = THIS;
21ab4e
-        glusterd_brickinfo_t    *other_brick;
21ab4e
-        char                    pidfile2[PATH_MAX]      = {0};
21ab4e
-        int32_t                 pid2                    = -1;
21ab4e
-
21ab4e
-        cds_list_for_each_entry (other_brick, &volinfo->bricks,
21ab4e
-                                 brick_list) {
21ab4e
-                if (other_brick == brickinfo) {
21ab4e
-                        continue;
21ab4e
-                }
21ab4e
-                if (!other_brick->started_here) {
21ab4e
-                        continue;
21ab4e
-                }
21ab4e
-                if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
21ab4e
-                        continue;
21ab4e
-                }
21ab4e
-                GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, other_brick,
21ab4e
-                                            conf);
21ab4e
-                if (!gf_is_service_running (pidfile2, &pid2)) {
21ab4e
-                        gf_log (this->name, GF_LOG_INFO,
21ab4e
-                                "cleaning up dead brick %s:%s",
21ab4e
-                                other_brick->hostname, other_brick->path);
21ab4e
-                        other_brick->started_here = _gf_false;
21ab4e
-                        sys_unlink (pidfile2);
21ab4e
-                        continue;
21ab4e
-                }
21ab4e
-                return other_brick;
21ab4e
-        }
21ab4e
-
21ab4e
-        return NULL;
21ab4e
-}
21ab4e
-
21ab4e
 static gf_boolean_t
21ab4e
 unsafe_option (dict_t *this, char *key, data_t *value, void *arg)
21ab4e
 {
21ab4e
@@ -5143,69 +5106,148 @@ opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2)
21ab4e
         return 0;
21ab4e
 }
21ab4e
 
21ab4e
+/* This name was just getting too long, hence the abbreviations. */
21ab4e
+static glusterd_brickinfo_t *
21ab4e
+find_compat_brick_in_vol (glusterd_conf_t *conf,
21ab4e
+                          glusterd_volinfo_t *srch_vol, /* volume to search */
21ab4e
+                          glusterd_volinfo_t *comp_vol, /* volume to compare */
21ab4e
+                          glusterd_brickinfo_t *brickinfo)
21ab4e
+{
21ab4e
+        xlator_t                *this                   = THIS;
21ab4e
+        glusterd_brickinfo_t    *other_brick;
21ab4e
+        char                    pidfile2[PATH_MAX]      = {0};
21ab4e
+        int32_t                 pid2                    = -1;
21ab4e
+
21ab4e
+        /*
21ab4e
+         * If comp_vol is provided, we have to check *volume* compatibility
21ab4e
+         * before we can check *brick* compatibility.
21ab4e
+         */
21ab4e
+        if (comp_vol) {
21ab4e
+                /*
21ab4e
+                 * It's kind of a shame that we have to do this check in both
21ab4e
+                 * directions, but an option might only exist on one of the two
21ab4e
+                 * dictionaries and dict_foreach_match will only find that one.
21ab4e
+                 */
21ab4e
+
21ab4e
+                gf_log (THIS->name, GF_LOG_DEBUG,
21ab4e
+                        "comparing options for %s and %s",
21ab4e
+                         comp_vol->volname, srch_vol->volname);
21ab4e
+
21ab4e
+                if (dict_foreach_match (comp_vol->dict, unsafe_option, NULL,
21ab4e
+                                        opts_mismatch, srch_vol->dict) < 0) {
21ab4e
+                        gf_log (THIS->name, GF_LOG_DEBUG, "failure forward");
21ab4e
+                        return NULL;
21ab4e
+                }
21ab4e
+
21ab4e
+                if (dict_foreach_match (srch_vol->dict, unsafe_option, NULL,
21ab4e
+                                        opts_mismatch, comp_vol->dict) < 0) {
21ab4e
+                        gf_log (THIS->name, GF_LOG_DEBUG, "failure backward");
21ab4e
+                        return NULL;
21ab4e
+                }
21ab4e
+
21ab4e
+                gf_log (THIS->name, GF_LOG_DEBUG, "all options match");
21ab4e
+        }
21ab4e
+
21ab4e
+        cds_list_for_each_entry (other_brick, &srch_vol->bricks,
21ab4e
+                                 brick_list) {
21ab4e
+                if (other_brick == brickinfo) {
21ab4e
+                        continue;
21ab4e
+                }
21ab4e
+                if (!other_brick->started_here) {
21ab4e
+                        continue;
21ab4e
+                }
21ab4e
+                if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
21ab4e
+                        continue;
21ab4e
+                }
21ab4e
+                GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
21ab4e
+                                            conf);
21ab4e
+                if (!gf_is_service_running (pidfile2, &pid2)) {
21ab4e
+                        gf_log (this->name, GF_LOG_INFO,
21ab4e
+                                "cleaning up dead brick %s:%s",
21ab4e
+                                other_brick->hostname, other_brick->path);
21ab4e
+                        other_brick->started_here = _gf_false;
21ab4e
+                        sys_unlink (pidfile2);
21ab4e
+                        continue;
21ab4e
+                }
21ab4e
+                return other_brick;
21ab4e
+        }
21ab4e
+
21ab4e
+        return NULL;
21ab4e
+}
21ab4e
+
21ab4e
 static glusterd_brickinfo_t *
21ab4e
 find_compatible_brick (glusterd_conf_t *conf,
21ab4e
                        glusterd_volinfo_t *volinfo,
21ab4e
                        glusterd_brickinfo_t *brickinfo,
21ab4e
                        glusterd_volinfo_t **other_vol_p)
21ab4e
 {
21ab4e
-        glusterd_brickinfo_t    *other_brick;
21ab4e
-        glusterd_volinfo_t      *other_vol;
21ab4e
+        glusterd_brickinfo_t    *other_brick = NULL;
21ab4e
+        glusterd_volinfo_t      *other_vol   = NULL;
21ab4e
+        glusterd_snap_t         *snap        = NULL;
21ab4e
 
21ab4e
         /* Just return NULL here if multiplexing is disabled. */
21ab4e
         if (!is_brick_mx_enabled ()) {
21ab4e
                 return NULL;
21ab4e
         }
21ab4e
 
21ab4e
-        other_brick = find_compatible_brick_in_volume (conf, volinfo,
21ab4e
-                                                       brickinfo);
21ab4e
+        other_brick = find_compat_brick_in_vol (conf, volinfo, NULL, brickinfo);
21ab4e
         if (other_brick) {
21ab4e
                 *other_vol_p = volinfo;
21ab4e
                 return other_brick;
21ab4e
         }
21ab4e
 
21ab4e
-        cds_list_for_each_entry (other_vol, &conf->volumes, vol_list) {
21ab4e
-                if (other_vol == volinfo) {
21ab4e
-                        continue;
21ab4e
-                }
21ab4e
-                if (volinfo->is_snap_volume) {
21ab4e
-                        /*
21ab4e
-                         * Snap volumes do have different options than their
21ab4e
-                         * parents, but are nonetheless generally compatible.
21ab4e
-                         * Skip the option comparison for now, until we figure
21ab4e
-                         * out how to handle this (e.g. compare at the brick
21ab4e
-                         * level instead of the volume level for this case).
21ab4e
-                         *
21ab4e
-                         * TBD: figure out compatibility for snap bricks
21ab4e
-                         */
21ab4e
-                        goto no_opt_compare;
21ab4e
-                }
21ab4e
-                /*
21ab4e
-                 * It's kind of a shame that we have to do this check in both
21ab4e
-                 * directions, but an option might only exist on one of the two
21ab4e
-                 * dictionaries and dict_foreach_match will only find that one.
21ab4e
-                 */
21ab4e
-                gf_log (THIS->name, GF_LOG_DEBUG,
21ab4e
-                        "comparing options for %s and %s",
21ab4e
-                        volinfo->volname, other_vol->volname);
21ab4e
-                if (dict_foreach_match (volinfo->dict, unsafe_option, NULL,
21ab4e
-                                        opts_mismatch, other_vol->dict) < 0) {
21ab4e
-                        gf_log (THIS->name, GF_LOG_DEBUG, "failure forward");
21ab4e
-                        continue;
21ab4e
-                }
21ab4e
-                if (dict_foreach_match (other_vol->dict, unsafe_option, NULL,
21ab4e
-                                        opts_mismatch, volinfo->dict) < 0) {
21ab4e
-                        gf_log (THIS->name, GF_LOG_DEBUG, "failure backward");
21ab4e
-                        continue;
21ab4e
+        /*
21ab4e
+         * This check is necessary because changes to a volume's
21ab4e
+         * transport options aren't propagated to snapshots.  Such a
21ab4e
+         * change might break compatibility between the two, but we
21ab4e
+         * have no way to "evict" a brick from the process it's
21ab4e
+         * currently in.  If we keep it separate from the start, we
21ab4e
+         * avoid the problem.  Note that snapshot bricks can still be
21ab4e
+         * colocated with one another, even if they're for different
21ab4e
+         * volumes, because the only thing likely to differ is their
21ab4e
+         * auth options and those are not a factor in determining
21ab4e
+         * compatibility.
21ab4e
+         *
21ab4e
+         * The very same immutability of snapshot bricks' transport
21ab4e
+         * options, which can make them incompatible with their parent
21ab4e
+         * volumes, ensures that once-compatible snapshot bricks will
21ab4e
+         * remain compatible.  However, the same is not true for bricks
21ab4e
+         * belonging to two non-snapshot volumes.  In that case, a
21ab4e
+         * change to one might break compatibility and require them to
21ab4e
+         * be separated, which is not yet done.
21ab4e
+         *
21ab4e
+         * TBD: address the option-change issue for non-snapshot bricks
21ab4e
+         */
21ab4e
+        if (!volinfo->is_snap_volume) {
21ab4e
+                cds_list_for_each_entry (other_vol, &conf->volumes, vol_list) {
21ab4e
+                        if (other_vol == volinfo) {
21ab4e
+                                continue;
21ab4e
+                        }
21ab4e
+                        other_brick = find_compat_brick_in_vol (conf,
21ab4e
+                                                                other_vol,
21ab4e
+                                                                volinfo,
21ab4e
+                                                                brickinfo);
21ab4e
+                        if (other_brick) {
21ab4e
+                                *other_vol_p = other_vol;
21ab4e
+                                return other_brick;
21ab4e
+                        }
21ab4e
                 }
21ab4e
-                gf_log (THIS->name, GF_LOG_DEBUG, "all options match");
21ab4e
-no_opt_compare:
21ab4e
-                other_brick = find_compatible_brick_in_volume (conf,
21ab4e
-                                                               other_vol,
21ab4e
-                                                               brickinfo);
21ab4e
-                if (other_brick) {
21ab4e
-                        *other_vol_p = other_vol;
21ab4e
-                        return other_brick;
21ab4e
+        } else {
21ab4e
+                cds_list_for_each_entry (snap, &conf->snapshots, snap_list) {
21ab4e
+                    cds_list_for_each_entry (other_vol, &snap->volumes,
21ab4e
+                                             vol_list) {
21ab4e
+                        if (other_vol == volinfo) {
21ab4e
+                                continue;
21ab4e
+                        }
21ab4e
+                        other_brick = find_compat_brick_in_vol (conf,
21ab4e
+                                                                other_vol,
21ab4e
+                                                                volinfo,
21ab4e
+                                                                brickinfo);
21ab4e
+                        if (other_brick) {
21ab4e
+                                *other_vol_p = other_vol;
21ab4e
+                                return other_brick;
21ab4e
+                        }
21ab4e
+                    }
21ab4e
                 }
21ab4e
         }
21ab4e
 
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e