14f8ab
From b9b479de2a7fd1c5eefa7aa1142e0a39e0c96ca9 Mon Sep 17 00:00:00 2001
14f8ab
From: Xavi Hernandez <xhernandez@redhat.com>
14f8ab
Date: Sun, 1 Mar 2020 19:49:04 +0100
14f8ab
Subject: [PATCH 419/449] cluster/afr: fix race when bricks come up
14f8ab
14f8ab
The was a problem when self-heal was sending lookups at the same time
14f8ab
that one of the bricks was coming up. In this case there was a chance
14f8ab
that the number of 'up' bricks changes in the middle of sending the
14f8ab
requests to subvolumes which caused a discrepancy in the expected
14f8ab
number of replies and the actual number of sent requests.
14f8ab
14f8ab
This discrepancy caused that AFR continued executing requests before
14f8ab
all requests were complete. Eventually, the frame of the pending
14f8ab
request was destroyed when the operation terminated, causing a use-
14f8ab
after-free issue when the answer was finally received.
14f8ab
14f8ab
In theory the same thing could happen in the reverse way, i.e. AFR
14f8ab
tries to wait for more replies than sent requests, causing a hang.
14f8ab
14f8ab
Backport of:
14f8ab
> Upstream-patch-link: https://review.gluster.org/24191
14f8ab
> Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
14f8ab
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
14f8ab
> Fixes: bz#1808875
14f8ab
14f8ab
BUG: 1794663
14f8ab
Change-Id: I7ed6108554ca379d532efb1a29b2de8085410b70
14f8ab
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/202489
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 xlators/cluster/afr/src/afr-self-heal-common.c | 6 +++---
14f8ab
 xlators/cluster/afr/src/afr-self-heal-name.c   | 4 +++-
14f8ab
 xlators/cluster/afr/src/afr-self-heal.h        | 7 +++++--
14f8ab
 3 files changed, 11 insertions(+), 6 deletions(-)
14f8ab
14f8ab
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
14f8ab
index ce1ea50..d942ccf 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
14f8ab
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
14f8ab
@@ -1869,12 +1869,12 @@ int
14f8ab
 afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
14f8ab
                                struct afr_reply *replies)
14f8ab
 {
14f8ab
-    afr_private_t *priv = NULL;
14f8ab
+    afr_local_t *local = NULL;
14f8ab
 
14f8ab
-    priv = frame->this->private;
14f8ab
+    local = frame->local;
14f8ab
 
14f8ab
     return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
14f8ab
-                                             priv->child_up);
14f8ab
+                                             local->child_up);
14f8ab
 }
14f8ab
 
14f8ab
 unsigned int
14f8ab
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
14f8ab
index 7d4f208..dace071 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-self-heal-name.c
14f8ab
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
14f8ab
@@ -560,13 +560,15 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
14f8ab
     struct afr_reply *replies = NULL;
14f8ab
     inode_t *inode = NULL;
14f8ab
     int first_idx = -1;
14f8ab
+    afr_local_t *local = NULL;
14f8ab
 
14f8ab
     priv = this->private;
14f8ab
+    local = frame->local;
14f8ab
 
14f8ab
     replies = alloca0(sizeof(*replies) * priv->child_count);
14f8ab
 
14f8ab
     inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
14f8ab
-                                            priv->child_up, NULL);
14f8ab
+                                            local->child_up, NULL);
14f8ab
     if (!inode)
14f8ab
         return -ENOMEM;
14f8ab
 
14f8ab
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
14f8ab
index 8234cec..f7ecf5d 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-self-heal.h
14f8ab
+++ b/xlators/cluster/afr/src/afr-self-heal.h
14f8ab
@@ -46,13 +46,16 @@
14f8ab
         afr_local_t *__local = frame->local;                                   \
14f8ab
         afr_private_t *__priv = frame->this->private;                          \
14f8ab
         int __i = 0;                                                           \
14f8ab
-        int __count = AFR_COUNT(list, __priv->child_count);                    \
14f8ab
+        int __count = 0;                                                       \
14f8ab
+        unsigned char *__list = alloca(__priv->child_count);                   \
14f8ab
                                                                                \
14f8ab
+        memcpy(__list, list, sizeof(*__list) * __priv->child_count);           \
14f8ab
+        __count = AFR_COUNT(__list, __priv->child_count);                      \
14f8ab
         __local->barrier.waitfor = __count;                                    \
14f8ab
         afr_local_replies_wipe(__local, __priv);                               \
14f8ab
                                                                                \
14f8ab
         for (__i = 0; __i < __priv->child_count; __i++) {                      \
14f8ab
-            if (!list[__i])                                                    \
14f8ab
+            if (!__list[__i])                                                  \
14f8ab
                 continue;                                                      \
14f8ab
             STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i,                   \
14f8ab
                               __priv->children[__i],                           \
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab