|
|
74096c |
From ecaa0f10820f4b6e803021919ce59a43aedf356b Mon Sep 17 00:00:00 2001
|
|
|
74096c |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
74096c |
Date: Thu, 4 Jun 2020 16:15:35 +0530
|
|
|
74096c |
Subject: [PATCH 402/449] afr: wake up index healer threads
|
|
|
74096c |
|
|
|
74096c |
...whenever shd is re-enabled after disabling or there is a change in
|
|
|
74096c |
`cluster.heal-timeout`, without needing to restart shd or waiting for the
|
|
|
74096c |
current `cluster.heal-timeout` seconds to expire.
|
|
|
74096c |
|
|
|
74096c |
> Upstream patch link:https://review.gluster.org/#/c/glusterfs/+/23288/
|
|
|
74096c |
> Change-Id: Ia5ebd7c8e9f5b54cba3199c141fdd1af2f9b9bfe
|
|
|
74096c |
> fixes: bz#1744548
|
|
|
74096c |
> Reported-by: Glen Kiessling <glenk1973@hotmail.com>
|
|
|
74096c |
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
74096c |
|
|
|
74096c |
BUG: 1764091
|
|
|
74096c |
Change-Id: I42aa0807f09b5a09510fe9efb4a1697dad3410a3
|
|
|
74096c |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
74096c |
Reviewed-on: https://code.engineering.redhat.com/gerrit/202368
|
|
|
74096c |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
74096c |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
74096c |
---
|
|
|
74096c |
tests/bugs/replicate/bug-1744548-heal-timeout.t | 42 +++++++++++++++++++++++++
|
|
|
74096c |
xlators/cluster/afr/src/afr-common.c | 6 ++--
|
|
|
74096c |
xlators/cluster/afr/src/afr-self-heald.c | 14 ++++++---
|
|
|
74096c |
xlators/cluster/afr/src/afr-self-heald.h | 3 --
|
|
|
74096c |
xlators/cluster/afr/src/afr.c | 10 ++++++
|
|
|
74096c |
xlators/cluster/afr/src/afr.h | 2 ++
|
|
|
74096c |
6 files changed, 66 insertions(+), 11 deletions(-)
|
|
|
74096c |
create mode 100644 tests/bugs/replicate/bug-1744548-heal-timeout.t
|
|
|
74096c |
|
|
|
74096c |
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
|
|
|
74096c |
new file mode 100644
|
|
|
74096c |
index 0000000..3cb73bc
|
|
|
74096c |
--- /dev/null
|
|
|
74096c |
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
|
|
|
74096c |
@@ -0,0 +1,42 @@
|
|
|
74096c |
+#!/bin/bash
|
|
|
74096c |
+
|
|
|
74096c |
+. $(dirname $0)/../../include.rc
|
|
|
74096c |
+. $(dirname $0)/../../volume.rc
|
|
|
74096c |
+. $(dirname $0)/../../afr.rc
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
+
|
|
|
74096c |
+TEST glusterd;
|
|
|
74096c |
+TEST pidof glusterd;
|
|
|
74096c |
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
|
|
|
74096c |
+TEST $CLI volume heal $V0 disable
|
|
|
74096c |
+TEST $CLI volume start $V0
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
|
|
|
74096c |
+TEST ! $CLI volume heal $V0
|
|
|
74096c |
+
|
|
|
74096c |
+# Enable shd and verify that index crawl is triggered immediately.
|
|
|
74096c |
+TEST $CLI volume profile $V0 start
|
|
|
74096c |
+TEST $CLI volume profile $V0 info clear
|
|
|
74096c |
+TEST $CLI volume heal $V0 enable
|
|
|
74096c |
+TEST $CLI volume heal $V0
|
|
|
74096c |
+# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
|
|
|
74096c |
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
|
|
|
74096c |
+TEST [ "$COUNT" == "333" ]
|
|
|
74096c |
+
|
|
|
74096c |
+# Check that a change in heal-timeout is honoured immediately.
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.heal-timeout 5
|
|
|
74096c |
+sleep 10
|
|
|
74096c |
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
|
|
|
74096c |
+# Two crawls must have happened.
|
|
|
74096c |
+TEST [ "$COUNT" == "666" ]
|
|
|
74096c |
+
|
|
|
74096c |
+# shd must not heal if it is disabled and heal-timeout is changed.
|
|
|
74096c |
+TEST $CLI volume heal $V0 disable
|
|
|
74096c |
+TEST $CLI volume profile $V0 info clear
|
|
|
74096c |
+TEST $CLI volume set $V0 cluster.heal-timeout 6
|
|
|
74096c |
+sleep 6
|
|
|
74096c |
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
|
|
|
74096c |
+TEST [ -z $COUNT ]
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
|
74096c |
index 3690b84..eef7fd2 100644
|
|
|
74096c |
--- a/xlators/cluster/afr/src/afr-common.c
|
|
|
74096c |
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
|
74096c |
@@ -5613,10 +5613,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
|
|
|
74096c |
* b) Already heard from everyone, but we now got a child-up
|
|
|
74096c |
* event.
|
|
|
74096c |
*/
|
|
|
74096c |
- if (have_heard_from_all && priv->shd.iamshd) {
|
|
|
74096c |
- for (i = 0; i < priv->child_count; i++)
|
|
|
74096c |
- if (priv->child_up[i])
|
|
|
74096c |
- afr_selfheal_childup(this, i);
|
|
|
74096c |
+ if (have_heard_from_all) {
|
|
|
74096c |
+ afr_selfheal_childup(this, priv);
|
|
|
74096c |
}
|
|
|
74096c |
}
|
|
|
74096c |
out:
|
|
|
74096c |
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
|
|
|
74096c |
index 7eb1207..95ac5f2 100644
|
|
|
74096c |
--- a/xlators/cluster/afr/src/afr-self-heald.c
|
|
|
74096c |
+++ b/xlators/cluster/afr/src/afr-self-heald.c
|
|
|
74096c |
@@ -1258,12 +1258,18 @@ out:
|
|
|
74096c |
return ret;
|
|
|
74096c |
}
|
|
|
74096c |
|
|
|
74096c |
-int
|
|
|
74096c |
-afr_selfheal_childup(xlator_t *this, int subvol)
|
|
|
74096c |
+void
|
|
|
74096c |
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv)
|
|
|
74096c |
{
|
|
|
74096c |
- afr_shd_index_healer_spawn(this, subvol);
|
|
|
74096c |
+ int subvol = 0;
|
|
|
74096c |
|
|
|
74096c |
- return 0;
|
|
|
74096c |
+ if (!priv->shd.iamshd)
|
|
|
74096c |
+ return;
|
|
|
74096c |
+ for (subvol = 0; subvol < priv->child_count; subvol++)
|
|
|
74096c |
+ if (priv->child_up[subvol])
|
|
|
74096c |
+ afr_shd_index_healer_spawn(this, subvol);
|
|
|
74096c |
+
|
|
|
74096c |
+ return;
|
|
|
74096c |
}
|
|
|
74096c |
|
|
|
74096c |
int
|
|
|
74096c |
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
|
|
|
74096c |
index 7de7c43..1990539 100644
|
|
|
74096c |
--- a/xlators/cluster/afr/src/afr-self-heald.h
|
|
|
74096c |
+++ b/xlators/cluster/afr/src/afr-self-heald.h
|
|
|
74096c |
@@ -60,9 +60,6 @@ typedef struct {
|
|
|
74096c |
} afr_self_heald_t;
|
|
|
74096c |
|
|
|
74096c |
int
|
|
|
74096c |
-afr_selfheal_childup(xlator_t *this, int subvol);
|
|
|
74096c |
-
|
|
|
74096c |
-int
|
|
|
74096c |
afr_selfheal_daemon_init(xlator_t *this);
|
|
|
74096c |
|
|
|
74096c |
int
|
|
|
74096c |
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
|
|
|
74096c |
index 33258a0..8f9e71f 100644
|
|
|
74096c |
--- a/xlators/cluster/afr/src/afr.c
|
|
|
74096c |
+++ b/xlators/cluster/afr/src/afr.c
|
|
|
74096c |
@@ -141,6 +141,7 @@ reconfigure(xlator_t *this, dict_t *options)
|
|
|
74096c |
afr_private_t *priv = NULL;
|
|
|
74096c |
xlator_t *read_subvol = NULL;
|
|
|
74096c |
int read_subvol_index = -1;
|
|
|
74096c |
+ int timeout_old = 0;
|
|
|
74096c |
int ret = -1;
|
|
|
74096c |
int index = -1;
|
|
|
74096c |
char *qtype = NULL;
|
|
|
74096c |
@@ -150,6 +151,7 @@ reconfigure(xlator_t *this, dict_t *options)
|
|
|
74096c |
char *locking_scheme = NULL;
|
|
|
74096c |
gf_boolean_t consistent_io = _gf_false;
|
|
|
74096c |
gf_boolean_t choose_local_old = _gf_false;
|
|
|
74096c |
+ gf_boolean_t enabled_old = _gf_false;
|
|
|
74096c |
|
|
|
74096c |
priv = this->private;
|
|
|
74096c |
|
|
|
74096c |
@@ -255,11 +257,13 @@ reconfigure(xlator_t *this, dict_t *options)
|
|
|
74096c |
GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
|
|
|
74096c |
bool, out);
|
|
|
74096c |
|
|
|
74096c |
+ enabled_old = priv->shd.enabled;
|
|
|
74096c |
GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
|
|
|
74096c |
|
|
|
74096c |
GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
|
|
|
74096c |
out);
|
|
|
74096c |
|
|
|
74096c |
+ timeout_old = priv->shd.timeout;
|
|
|
74096c |
GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
|
|
|
74096c |
|
|
|
74096c |
GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
|
|
|
74096c |
@@ -283,6 +287,12 @@ reconfigure(xlator_t *this, dict_t *options)
|
|
|
74096c |
consistent_io = _gf_false;
|
|
|
74096c |
priv->consistent_io = consistent_io;
|
|
|
74096c |
|
|
|
74096c |
+ if (priv->shd.enabled) {
|
|
|
74096c |
+ if ((priv->shd.enabled != enabled_old) ||
|
|
|
74096c |
+ (timeout_old != priv->shd.timeout))
|
|
|
74096c |
+ afr_selfheal_childup(this, priv);
|
|
|
74096c |
+ }
|
|
|
74096c |
+
|
|
|
74096c |
ret = 0;
|
|
|
74096c |
out:
|
|
|
74096c |
return ret;
|
|
|
74096c |
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
|
74096c |
index e731cfa..18f1a6a 100644
|
|
|
74096c |
--- a/xlators/cluster/afr/src/afr.h
|
|
|
74096c |
+++ b/xlators/cluster/afr/src/afr.h
|
|
|
74096c |
@@ -1332,4 +1332,6 @@ afr_lookup_has_quorum(call_frame_t *frame, xlator_t *this,
|
|
|
74096c |
void
|
|
|
74096c |
afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
|
|
|
74096c |
|
|
|
74096c |
+void
|
|
|
74096c |
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
|
|
|
74096c |
#endif /* __AFR_H__ */
|
|
|
74096c |
--
|
|
|
74096c |
1.8.3.1
|
|
|
74096c |
|