|
|
190130 |
From b9b479de2a7fd1c5eefa7aa1142e0a39e0c96ca9 Mon Sep 17 00:00:00 2001
|
|
|
190130 |
From: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
190130 |
Date: Sun, 1 Mar 2020 19:49:04 +0100
|
|
|
190130 |
Subject: [PATCH 419/449] cluster/afr: fix race when bricks come up
|
|
|
190130 |
|
|
|
190130 |
The was a problem when self-heal was sending lookups at the same time
|
|
|
190130 |
that one of the bricks was coming up. In this case there was a chance
|
|
|
190130 |
that the number of 'up' bricks changes in the middle of sending the
|
|
|
190130 |
requests to subvolumes which caused a discrepancy in the expected
|
|
|
190130 |
number of replies and the actual number of sent requests.
|
|
|
190130 |
|
|
|
190130 |
This discrepancy caused that AFR continued executing requests before
|
|
|
190130 |
all requests were complete. Eventually, the frame of the pending
|
|
|
190130 |
request was destroyed when the operation terminated, causing a use-
|
|
|
190130 |
after-free issue when the answer was finally received.
|
|
|
190130 |
|
|
|
190130 |
In theory the same thing could happen in the reverse way, i.e. AFR
|
|
|
190130 |
tries to wait for more replies than sent requests, causing a hang.
|
|
|
190130 |
|
|
|
190130 |
Backport of:
|
|
|
190130 |
> Upstream-patch-link: https://review.gluster.org/24191
|
|
|
190130 |
> Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
|
|
|
190130 |
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
190130 |
> Fixes: bz#1808875
|
|
|
190130 |
|
|
|
190130 |
BUG: 1794663
|
|
|
190130 |
Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
|
|
|
190130 |
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
190130 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/202489
|
|
|
190130 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
190130 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
190130 |
---
|
|
|
190130 |
xlators/cluster/afr/src/afr-self-heal-common.c | 6 +++---
|
|
|
190130 |
xlators/cluster/afr/src/afr-self-heal-name.c | 4 +++-
|
|
|
190130 |
xlators/cluster/afr/src/afr-self-heal.h | 7 +++++--
|
|
|
190130 |
3 files changed, 11 insertions(+), 6 deletions(-)
|
|
|
190130 |
|
|
|
190130 |
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
190130 |
index ce1ea50..d942ccf 100644
|
|
|
190130 |
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
190130 |
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
|
|
|
190130 |
@@ -1869,12 +1869,12 @@ int
|
|
|
190130 |
afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
|
|
|
190130 |
struct afr_reply *replies)
|
|
|
190130 |
{
|
|
|
190130 |
- afr_private_t *priv = NULL;
|
|
|
190130 |
+ afr_local_t *local = NULL;
|
|
|
190130 |
|
|
|
190130 |
- priv = frame->this->private;
|
|
|
190130 |
+ local = frame->local;
|
|
|
190130 |
|
|
|
190130 |
return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
|
|
|
190130 |
- priv->child_up);
|
|
|
190130 |
+ local->child_up);
|
|
|
190130 |
}
|
|
|
190130 |
|
|
|
190130 |
unsigned int
|
|
|
190130 |
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
|
|
|
190130 |
index 7d4f208..dace071 100644
|
|
|
190130 |
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
|
|
|
190130 |
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
|
|
|
190130 |
@@ -560,13 +560,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
|
|
|
190130 |
struct afr_reply *replies = NULL;
|
|
|
190130 |
inode_t *inode = NULL;
|
|
|
190130 |
int first_idx = -1;
|
|
|
190130 |
+ afr_local_t *local = NULL;
|
|
|
190130 |
|
|
|
190130 |
priv = this->private;
|
|
|
190130 |
+ local = frame->local;
|
|
|
190130 |
|
|
|
190130 |
replies = alloca0(sizeof(*replies) * priv->child_count);
|
|
|
190130 |
|
|
|
190130 |
inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
|
|
|
190130 |
- priv->child_up, NULL);
|
|
|
190130 |
+ local->child_up, NULL);
|
|
|
190130 |
if (!inode)
|
|
|
190130 |
return -ENOMEM;
|
|
|
190130 |
|
|
|
190130 |
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
|
|
|
190130 |
index 8234cec..f7ecf5d 100644
|
|
|
190130 |
--- a/xlators/cluster/afr/src/afr-self-heal.h
|
|
|
190130 |
+++ b/xlators/cluster/afr/src/afr-self-heal.h
|
|
|
190130 |
@@ -46,13 +46,16 @@
|
|
|
190130 |
afr_local_t *__local = frame->local; \
|
|
|
190130 |
afr_private_t *__priv = frame->this->private; \
|
|
|
190130 |
int __i = 0; \
|
|
|
190130 |
- int __count = AFR_COUNT(list, __priv->child_count); \
|
|
|
190130 |
+ int __count = 0; \
|
|
|
190130 |
+ unsigned char *__list = alloca(__priv->child_count); \
|
|
|
190130 |
\
|
|
|
190130 |
+ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
|
|
|
190130 |
+ __count = AFR_COUNT(__list, __priv->child_count); \
|
|
|
190130 |
__local->barrier.waitfor = __count; \
|
|
|
190130 |
afr_local_replies_wipe(__local, __priv); \
|
|
|
190130 |
\
|
|
|
190130 |
for (__i = 0; __i < __priv->child_count; __i++) { \
|
|
|
190130 |
- if (!list[__i]) \
|
|
|
190130 |
+ if (!__list[__i]) \
|
|
|
190130 |
continue; \
|
|
|
190130 |
STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
|
|
|
190130 |
__priv->children[__i], \
|
|
|
190130 |
--
|
|
|
190130 |
1.8.3.1
|
|
|
190130 |
|