9ae3f9
From b9b479de2a7fd1c5eefa7aa1142e0a39e0c96ca9 Mon Sep 17 00:00:00 2001
9ae3f9
From: Xavi Hernandez <xhernandez@redhat.com>
9ae3f9
Date: Sun, 1 Mar 2020 19:49:04 +0100
9ae3f9
Subject: [PATCH 419/449] cluster/afr: fix race when bricks come up
9ae3f9
9ae3f9
The was a problem when self-heal was sending lookups at the same time
9ae3f9
that one of the bricks was coming up. In this case there was a chance
9ae3f9
that the number of 'up' bricks changes in the middle of sending the
9ae3f9
requests to subvolumes which caused a discrepancy in the expected
9ae3f9
number of replies and the actual number of sent requests.
9ae3f9
9ae3f9
This discrepancy caused that AFR continued executing requests before
9ae3f9
all requests were complete. Eventually, the frame of the pending
9ae3f9
request was destroyed when the operation terminated, causing a use-
9ae3f9
after-free issue when the answer was finally received.
9ae3f9
9ae3f9
In theory the same thing could happen in the reverse way, i.e. AFR
9ae3f9
tries to wait for more replies than sent requests, causing a hang.
9ae3f9
9ae3f9
Backport of:
9ae3f9
> Upstream-patch-link: https://review.gluster.org/24191
9ae3f9
> Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
9ae3f9
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
9ae3f9
> Fixes: bz#1808875
9ae3f9
9ae3f9
BUG: 1794663
9ae3f9
Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
9ae3f9
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
9ae3f9
Reviewed-on: https://code.engineering.redhat.com/gerrit/202489
9ae3f9
Tested-by: RHGS Build Bot <nigelb@redhat.com>
9ae3f9
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
9ae3f9
---
9ae3f9
 xlators/cluster/afr/src/afr-self-heal-common.c | 6 +++---
9ae3f9
 xlators/cluster/afr/src/afr-self-heal-name.c   | 4 +++-
9ae3f9
 xlators/cluster/afr/src/afr-self-heal.h        | 7 +++++--
9ae3f9
 3 files changed, 11 insertions(+), 6 deletions(-)
9ae3f9
9ae3f9
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
9ae3f9
index ce1ea50..d942ccf 100644
9ae3f9
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
9ae3f9
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
9ae3f9
@@ -1869,12 +1869,12 @@ int
9ae3f9
 afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
9ae3f9
                                struct afr_reply *replies)
9ae3f9
 {
9ae3f9
-    afr_private_t *priv = NULL;
9ae3f9
+    afr_local_t *local = NULL;
9ae3f9
 
9ae3f9
-    priv = frame->this->private;
9ae3f9
+    local = frame->local;
9ae3f9
 
9ae3f9
     return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
9ae3f9
-                                             priv->child_up);
9ae3f9
+                                             local->child_up);
9ae3f9
 }
9ae3f9
 
9ae3f9
 unsigned int
9ae3f9
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
9ae3f9
index 7d4f208..dace071 100644
9ae3f9
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
9ae3f9
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
9ae3f9
@@ -560,13 +560,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
9ae3f9
     struct afr_reply *replies = NULL;
9ae3f9
     inode_t *inode = NULL;
9ae3f9
     int first_idx = -1;
9ae3f9
+    afr_local_t *local = NULL;
9ae3f9
 
9ae3f9
     priv = this->private;
9ae3f9
+    local = frame->local;
9ae3f9
 
9ae3f9
     replies = alloca0(sizeof(*replies) * priv->child_count);
9ae3f9
 
9ae3f9
     inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
9ae3f9
-                                            priv->child_up, NULL);
9ae3f9
+                                            local->child_up, NULL);
9ae3f9
     if (!inode)
9ae3f9
         return -ENOMEM;
9ae3f9
 
9ae3f9
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
9ae3f9
index 8234cec..f7ecf5d 100644
9ae3f9
--- a/xlators/cluster/afr/src/afr-self-heal.h
9ae3f9
+++ b/xlators/cluster/afr/src/afr-self-heal.h
9ae3f9
@@ -46,13 +46,16 @@
9ae3f9
         afr_local_t *__local = frame->local;                                   \
9ae3f9
         afr_private_t *__priv = frame->this->private;                          \
9ae3f9
         int __i = 0;                                                           \
9ae3f9
-        int __count = AFR_COUNT(list, __priv->child_count);                    \
9ae3f9
+        int __count = 0;                                                       \
9ae3f9
+        unsigned char *__list = alloca(__priv->child_count);                   \
9ae3f9
                                                                                \
9ae3f9
+        memcpy(__list, list, sizeof(*__list) * __priv->child_count);           \
9ae3f9
+        __count = AFR_COUNT(__list, __priv->child_count);                      \
9ae3f9
         __local->barrier.waitfor = __count;                                    \
9ae3f9
         afr_local_replies_wipe(__local, __priv);                               \
9ae3f9
                                                                                \
9ae3f9
         for (__i = 0; __i < __priv->child_count; __i++) {                      \
9ae3f9
-            if (!list[__i])                                                    \
9ae3f9
+            if (!__list[__i])                                                  \
9ae3f9
                 continue;                                                      \
9ae3f9
             STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i,                   \
9ae3f9
                               __priv->children[__i],                           \
9ae3f9
-- 
9ae3f9
1.8.3.1
9ae3f9