From 01bb17a0910a638e89a44a6da4b1359123940498 Mon Sep 17 00:00:00 2001 From: Hari Gowtham Date: Wed, 17 Apr 2019 12:17:27 +0530 Subject: [PATCH 130/141] tier/test: new-tier-cmds.t fails after a glusterd restart Problem: new-tier-cmds.t does a restart of gluster processes and after the restart the bricks and the tier process takes more time than before to come online. This causes the detach start to fail. Fix: Give it enough time to come online after the restart. label: DOWNSTREAM ONLY Change-Id: I0f50b0bb77fe49ebd3a0292e190d0350d7994cfe Signed-off-by: Hari Gowtham Reviewed-on: https://code.engineering.redhat.com/gerrit/168130 Tested-by: RHGS Build Bot Reviewed-by: Atin Mukherjee --- tests/basic/tier/new-tier-cmds.t | 45 ++++++++++++++++++++++++++-------------- tests/volume.rc | 8 +++++++ 2 files changed, 37 insertions(+), 16 deletions(-) diff --git a/tests/basic/tier/new-tier-cmds.t b/tests/basic/tier/new-tier-cmds.t index b9c9390..92881ac 100644 --- a/tests/basic/tier/new-tier-cmds.t +++ b/tests/basic/tier/new-tier-cmds.t @@ -19,14 +19,6 @@ function create_dist_tier_vol () { TEST $CLI_1 volume tier $V0 attach replica 2 $H1:$B1/${V0}_h1 $H2:$B2/${V0}_h2 $H3:$B3/${V0}_h3 $H1:$B1/${V0}_h4 $H2:$B2/${V0}_h5 $H3:$B3/${V0}_h6 } -function tier_daemon_status { - local _VAR=CLI_$1 - local xpath_sel='//node[hostname="Tier Daemon"][path="localhost"]/status' - ${!_VAR} --xml volume status $V0 \ - | xmllint --xpath "$xpath_sel" - \ - | sed -n '/.*\([0-9]*\).*/s//\1/p' -} - function detach_xml_status { $CLI_1 volume tier $V0 detach status --xml | sed -n \ '/.*Detach tier status successful/p' | wc -l @@ -70,7 +62,20 @@ TEST $glusterd_2; EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; #after starting detach tier the detach tier status should display the status -sleep 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b4 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h4 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b5 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h5 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b3 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b6 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h3 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h6 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_shd_count +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_tierd_count $CLI_1 volume status TEST $CLI_1 volume tier $V0 detach start @@ -91,13 +96,21 @@ EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers; EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_b2 EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H2 $B2/${V0}_h2 -# Parsing normal output doesn't work because of line-wrap issues on our -# regression machines, and the version of xmllint there doesn't support --xpath -# so we can't do it that way either. In short, there's no way for us to detect -# when we can stop waiting, so we just have to wait the maximum time every time -# and hope any failures will show up later in the script. -sleep $PROCESS_UP_TIMEOUT -#XPECT_WITHIN $PROCESS_UP_TIMEOUT 1 tier_daemon_status 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_b4 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 1 $V0 $H1 $B1/${V0}_h4 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_b5 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 2 $V0 $H2 $B2/${V0}_h5 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b3 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_b6 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h3 +EXPECT_WITHIN $CHILD_UP_TIMEOUT 1 cluster_brick_up_status 3 $V0 $H3 $B3/${V0}_h6 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" get_shd_count +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_tierd_count +$CLI_1 volume status EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" tier_detach_status diff --git a/tests/volume.rc b/tests/volume.rc index 289b197..b326098 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -719,6 +719,14 @@ function get_snapd_count { ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l } +function get_tierd_count { + ps auxww | grep glusterfs | grep tierd.pid | grep -v grep | wc -l +} + +function get_shd_count { + ps auxww | grep glusterfs | grep shd.pid | grep -v grep | wc -l +} + function drop_cache() { case $OSTYPE in Linux) -- 1.8.3.1