Blob Blame History Raw
From 91add881bca659d456c0738c611c001babb62cbb Mon Sep 17 00:00:00 2001
From: Jeff Darcy <jdarcy@redhat.com>
Date: Fri, 3 Feb 2017 10:51:21 -0500
Subject: [PATCH 332/361] glusterd: keep snapshot bricks separate from regular
 ones

The problem here is that a volume's transport options can change, but
any snapshots' bricks don't follow along even though they're now
incompatible (with respect to multiplexing).  This was causing the
USS+SSL test to fail.  By keeping the snapshot bricks separate
(though still potentially multiplexed with other snapshot bricks
including those for other volumes) we can ensure that they remain
unaffected by changes to their parent volumes.

Also fixed various issues with how the test waits (or more precisely
didn't) for various events to complete before it continues.

mainline:
> BUG: 1385758
> Reviewed-on: https://review.gluster.org/16544
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Avra Sengupta <asengupt@redhat.com>
> Tested-by: Avra Sengupta <asengupt@redhat.com>
(cherry picked from commit 45c3d3f3c505c772446bb63e57ec7e3477ad4042)

BUG: 1417815
Change-Id: Iab4a8a44fac5760373fac36956a3bcc27cf969da
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/101313
Tested-by: Milind Changire <mchangir@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 tests/bugs/snapshot/bug-1399598-uss-with-ssl.t |  36 +++--
 tests/volume.rc                                |   1 +
 xlators/mgmt/glusterd/src/glusterd-utils.c     | 206 +++++++++++++++----------
 3 files changed, 148 insertions(+), 95 deletions(-)

diff --git a/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
index 1c50f74..7d62526 100755
--- a/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
+++ b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
@@ -16,6 +16,13 @@ function volume_online_brick_count
         $CLI volume status $V0 | awk '$1 == "Brick" &&  $6 != "N/A" { print $6}' | wc -l;
 }
 
+function total_online_bricks
+{
+        # This will count snapd, which isn't really a brick, but callers can
+        # account for that so it's OK.
+        find $GLUSTERD_WORKDIR -name '*.pid' | wc -l
+}
+
 cleanup;
 
 # Initialize the test setup
@@ -26,15 +33,17 @@ TEST create_self_signed_certs
 # Start glusterd
 TEST glusterd
 TEST pidof glusterd;
+#EST $CLI volume set all cluster.brick-multiplex on
 
 # Create and start the volume
 TEST $CLI volume create $V0 $H0:$L1/b1;
 
 TEST $CLI volume start $V0;
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" total_online_bricks
 
 # Mount the volume and create some files
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
 
 TEST touch $M0/file;
 
@@ -43,12 +52,13 @@ TEST $CLI snapshot config activate-on-create enable;
 
 # Create a snapshot
 TEST $CLI snapshot create snap1 $V0 no-timestamp;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" total_online_bricks
 
 TEST $CLI volume set $V0 features.uss enable;
-
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
 
-EXPECT "Y" file_exists $M0/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
 # Volume set can trigger graph switch therefore chances are we send this
 # req to old graph. Old graph will not have .snaps. Therefore we should
 # wait for some time.
@@ -63,14 +73,14 @@ killall_gluster
 TEST glusterd
 TEST pidof glusterd;
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
 
 # Mount the volume
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
-
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
 
-EXPECT "Y" file_exists $M0/file
-EXPECT "Y" file_exists $M0/.snaps/snap1/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
 
 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
 
@@ -82,14 +92,14 @@ killall_gluster
 
 TEST glusterd
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
 
 # Mount the volume
-TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
 
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
-
-EXPECT "Y" file_exists $M0/file
-EXPECT "Y" file_exists $M0/.snaps/snap1/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
 
 TEST $CLI snapshot delete all
 TEST $CLI volume stop $V0
diff --git a/tests/volume.rc b/tests/volume.rc
index 25918a4..7b13e13 100644
--- a/tests/volume.rc
+++ b/tests/volume.rc
@@ -476,6 +476,7 @@ function volume_exists() {
 
 function killall_gluster() {
         pkill gluster
+        find $GLUSTERD_WORKDIR -name '*.pid' | xargs rm -f
         sleep 1
 }
 
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 3539b54..9a294a8 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -5056,43 +5056,6 @@ attach_brick (xlator_t *this,
         return ret;
 }
 
-static glusterd_brickinfo_t *
-find_compatible_brick_in_volume (glusterd_conf_t *conf,
-                                 glusterd_volinfo_t *volinfo,
-                                 glusterd_brickinfo_t *brickinfo)
-{
-        xlator_t                *this                   = THIS;
-        glusterd_brickinfo_t    *other_brick;
-        char                    pidfile2[PATH_MAX]      = {0};
-        int32_t                 pid2                    = -1;
-
-        cds_list_for_each_entry (other_brick, &volinfo->bricks,
-                                 brick_list) {
-                if (other_brick == brickinfo) {
-                        continue;
-                }
-                if (!other_brick->started_here) {
-                        continue;
-                }
-                if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
-                        continue;
-                }
-                GLUSTERD_GET_BRICK_PIDFILE (pidfile2, volinfo, other_brick,
-                                            conf);
-                if (!gf_is_service_running (pidfile2, &pid2)) {
-                        gf_log (this->name, GF_LOG_INFO,
-                                "cleaning up dead brick %s:%s",
-                                other_brick->hostname, other_brick->path);
-                        other_brick->started_here = _gf_false;
-                        sys_unlink (pidfile2);
-                        continue;
-                }
-                return other_brick;
-        }
-
-        return NULL;
-}
-
 static gf_boolean_t
 unsafe_option (dict_t *this, char *key, data_t *value, void *arg)
 {
@@ -5143,69 +5106,148 @@ opts_mismatch (dict_t *dict1, char *key, data_t *value1, void *dict2)
         return 0;
 }
 
+/* This name was just getting too long, hence the abbreviations. */
+static glusterd_brickinfo_t *
+find_compat_brick_in_vol (glusterd_conf_t *conf,
+                          glusterd_volinfo_t *srch_vol, /* volume to search */
+                          glusterd_volinfo_t *comp_vol, /* volume to compare */
+                          glusterd_brickinfo_t *brickinfo)
+{
+        xlator_t                *this                   = THIS;
+        glusterd_brickinfo_t    *other_brick;
+        char                    pidfile2[PATH_MAX]      = {0};
+        int32_t                 pid2                    = -1;
+
+        /*
+         * If comp_vol is provided, we have to check *volume* compatibility
+         * before we can check *brick* compatibility.
+         */
+        if (comp_vol) {
+                /*
+                 * It's kind of a shame that we have to do this check in both
+                 * directions, but an option might only exist on one of the two
+                 * dictionaries and dict_foreach_match will only find that one.
+                 */
+
+                gf_log (THIS->name, GF_LOG_DEBUG,
+                        "comparing options for %s and %s",
+                         comp_vol->volname, srch_vol->volname);
+
+                if (dict_foreach_match (comp_vol->dict, unsafe_option, NULL,
+                                        opts_mismatch, srch_vol->dict) < 0) {
+                        gf_log (THIS->name, GF_LOG_DEBUG, "failure forward");
+                        return NULL;
+                }
+
+                if (dict_foreach_match (srch_vol->dict, unsafe_option, NULL,
+                                        opts_mismatch, comp_vol->dict) < 0) {
+                        gf_log (THIS->name, GF_LOG_DEBUG, "failure backward");
+                        return NULL;
+                }
+
+                gf_log (THIS->name, GF_LOG_DEBUG, "all options match");
+        }
+
+        cds_list_for_each_entry (other_brick, &srch_vol->bricks,
+                                 brick_list) {
+                if (other_brick == brickinfo) {
+                        continue;
+                }
+                if (!other_brick->started_here) {
+                        continue;
+                }
+                if (strcmp (brickinfo->hostname, other_brick->hostname) != 0) {
+                        continue;
+                }
+                GLUSTERD_GET_BRICK_PIDFILE (pidfile2, srch_vol, other_brick,
+                                            conf);
+                if (!gf_is_service_running (pidfile2, &pid2)) {
+                        gf_log (this->name, GF_LOG_INFO,
+                                "cleaning up dead brick %s:%s",
+                                other_brick->hostname, other_brick->path);
+                        other_brick->started_here = _gf_false;
+                        sys_unlink (pidfile2);
+                        continue;
+                }
+                return other_brick;
+        }
+
+        return NULL;
+}
+
 static glusterd_brickinfo_t *
 find_compatible_brick (glusterd_conf_t *conf,
                        glusterd_volinfo_t *volinfo,
                        glusterd_brickinfo_t *brickinfo,
                        glusterd_volinfo_t **other_vol_p)
 {
-        glusterd_brickinfo_t    *other_brick;
-        glusterd_volinfo_t      *other_vol;
+        glusterd_brickinfo_t    *other_brick = NULL;
+        glusterd_volinfo_t      *other_vol   = NULL;
+        glusterd_snap_t         *snap        = NULL;
 
         /* Just return NULL here if multiplexing is disabled. */
         if (!is_brick_mx_enabled ()) {
                 return NULL;
         }
 
-        other_brick = find_compatible_brick_in_volume (conf, volinfo,
-                                                       brickinfo);
+        other_brick = find_compat_brick_in_vol (conf, volinfo, NULL, brickinfo);
         if (other_brick) {
                 *other_vol_p = volinfo;
                 return other_brick;
         }
 
-        cds_list_for_each_entry (other_vol, &conf->volumes, vol_list) {
-                if (other_vol == volinfo) {
-                        continue;
-                }
-                if (volinfo->is_snap_volume) {
-                        /*
-                         * Snap volumes do have different options than their
-                         * parents, but are nonetheless generally compatible.
-                         * Skip the option comparison for now, until we figure
-                         * out how to handle this (e.g. compare at the brick
-                         * level instead of the volume level for this case).
-                         *
-                         * TBD: figure out compatibility for snap bricks
-                         */
-                        goto no_opt_compare;
-                }
-                /*
-                 * It's kind of a shame that we have to do this check in both
-                 * directions, but an option might only exist on one of the two
-                 * dictionaries and dict_foreach_match will only find that one.
-                 */
-                gf_log (THIS->name, GF_LOG_DEBUG,
-                        "comparing options for %s and %s",
-                        volinfo->volname, other_vol->volname);
-                if (dict_foreach_match (volinfo->dict, unsafe_option, NULL,
-                                        opts_mismatch, other_vol->dict) < 0) {
-                        gf_log (THIS->name, GF_LOG_DEBUG, "failure forward");
-                        continue;
-                }
-                if (dict_foreach_match (other_vol->dict, unsafe_option, NULL,
-                                        opts_mismatch, volinfo->dict) < 0) {
-                        gf_log (THIS->name, GF_LOG_DEBUG, "failure backward");
-                        continue;
+        /*
+         * This check is necessary because changes to a volume's
+         * transport options aren't propagated to snapshots.  Such a
+         * change might break compatibility between the two, but we
+         * have no way to "evict" a brick from the process it's
+         * currently in.  If we keep it separate from the start, we
+         * avoid the problem.  Note that snapshot bricks can still be
+         * colocated with one another, even if they're for different
+         * volumes, because the only thing likely to differ is their
+         * auth options and those are not a factor in determining
+         * compatibility.
+         *
+         * The very same immutability of snapshot bricks' transport
+         * options, which can make them incompatible with their parent
+         * volumes, ensures that once-compatible snapshot bricks will
+         * remain compatible.  However, the same is not true for bricks
+         * belonging to two non-snapshot volumes.  In that case, a
+         * change to one might break compatibility and require them to
+         * be separated, which is not yet done.
+         *
+         * TBD: address the option-change issue for non-snapshot bricks
+         */
+        if (!volinfo->is_snap_volume) {
+                cds_list_for_each_entry (other_vol, &conf->volumes, vol_list) {
+                        if (other_vol == volinfo) {
+                                continue;
+                        }
+                        other_brick = find_compat_brick_in_vol (conf,
+                                                                other_vol,
+                                                                volinfo,
+                                                                brickinfo);
+                        if (other_brick) {
+                                *other_vol_p = other_vol;
+                                return other_brick;
+                        }
                 }
-                gf_log (THIS->name, GF_LOG_DEBUG, "all options match");
-no_opt_compare:
-                other_brick = find_compatible_brick_in_volume (conf,
-                                                               other_vol,
-                                                               brickinfo);
-                if (other_brick) {
-                        *other_vol_p = other_vol;
-                        return other_brick;
+        } else {
+                cds_list_for_each_entry (snap, &conf->snapshots, snap_list) {
+                    cds_list_for_each_entry (other_vol, &snap->volumes,
+                                             vol_list) {
+                        if (other_vol == volinfo) {
+                                continue;
+                        }
+                        other_brick = find_compat_brick_in_vol (conf,
+                                                                other_vol,
+                                                                volinfo,
+                                                                brickinfo);
+                        if (other_brick) {
+                                *other_vol_p = other_vol;
+                                return other_brick;
+                        }
+                    }
                 }
         }
 
-- 
1.8.3.1