d2787b
From e431321f1348b5d51733a6b6c5e046fd8c6e28cc Mon Sep 17 00:00:00 2001
d2787b
From: karthik-us <ksubrahm@redhat.com>
d2787b
Date: Mon, 5 Jul 2021 10:52:10 +0530
d2787b
Subject: [PATCH 586/586] protocol/client: Do not reopen fd post handshake if
d2787b
 posix lock is held
d2787b
d2787b
Problem:
d2787b
With client.strict-locks enabled, in some cases where the posix lock is
d2787b
taken after a brick gets disconnected, the fd is getting reopened when
d2787b
the brick gets reconnected to the client as part of client_post_handshake.
d2787b
In such cases the saved fdctx's lock_list may not have the latest
d2787b
information.
d2787b
d2787b
Fix:
d2787b
Check the lock information in the fdctx->lk_ctx as well post handshake
d2787b
which will have the latest information on the locks.
d2787b
Also check for this field in other places as well to prevent writes
d2787b
happening with anonymous fd even without re-opening the fd on the
d2787b
restarted brick.
d2787b
d2787b
> Upstream patch: https://github.com/gluster/glusterfs/pull/2582
d2787b
> Fixes: #2581
d2787b
> Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
d2787b
> Signed-off-by: karthik-us ksubrahm@redhat.com
d2787b
d2787b
BUG: 1689375
d2787b
Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
d2787b
Signed-off-by: karthik-us <ksubrahm@redhat.com>
d2787b
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/252588
d2787b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d2787b
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
d2787b
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d2787b
---
d2787b
 tests/bugs/replicate/do-not-reopen-fd.t        | 76 ++++++++++++++++++--------
d2787b
 xlators/protocol/client/src/client-handshake.c |  2 +-
d2787b
 xlators/protocol/client/src/client-helpers.c   | 11 +++-
d2787b
 xlators/protocol/client/src/client.c           |  2 +-
d2787b
 xlators/protocol/client/src/client.h           |  3 +
d2787b
 5 files changed, 67 insertions(+), 27 deletions(-)
d2787b
d2787b
diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
d2787b
index 13b5218..f346709 100644
d2787b
--- a/tests/bugs/replicate/do-not-reopen-fd.t
d2787b
+++ b/tests/bugs/replicate/do-not-reopen-fd.t
d2787b
@@ -20,10 +20,41 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
d2787b
 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
d2787b
 
d2787b
 TEST touch $M0/a
d2787b
+gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
d2787b
+gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
d2787b
+
d2787b
+
d2787b
+# Open fd from a client, check for open fd on all the bricks.
d2787b
+TEST fd1=`fd_available`
d2787b
+TEST fd_open $fd1 'rw' $M0/a
d2787b
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
d2787b
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
+
d2787b
+# Kill a brick and take lock on the fd
d2787b
+TEST kill_brick $V0 $H0 $B0/${V0}0
d2787b
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
+TEST flock -x $fd1
d2787b
+
d2787b
+# Restart the brick and check for no open fd on the restarted brick.
d2787b
+TEST $CLI volume start $V0 force
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
d2787b
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
+
d2787b
+# Write on the fd. It should fail on the restarted brick.
d2787b
+TEST fd_write $fd1 "data-0"
d2787b
+EXPECT "" cat $B0/${V0}0/a
d2787b
+EXPECT "data-0" cat $B0/${V0}1/a
d2787b
+EXPECT "data-0" cat $B0/${V0}2/a
d2787b
+
d2787b
+TEST fd_close $fd1
d2787b
 
d2787b
 # Kill one brick and take lock on the fd and do a write.
d2787b
 TEST kill_brick $V0 $H0 $B0/${V0}0
d2787b
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
 TEST fd1=`fd_available`
d2787b
 TEST fd_open $fd1 'rw' $M0/a
d2787b
 
d2787b
@@ -34,7 +65,7 @@ TEST fd_write $fd1 "data-1"
d2787b
 # should still succeed as there were no quorum disconnects.
d2787b
 TEST $CLI volume start $V0 force
d2787b
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
d2787b
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
 TEST fd_write $fd1 "data-2"
d2787b
 EXPECT "" cat $B0/${V0}0/a
d2787b
 EXPECT "data-2" cat $B0/${V0}1/a
d2787b
@@ -42,9 +73,6 @@ EXPECT "data-2" cat $B0/${V0}2/a
d2787b
 
d2787b
 # Check there is no fd opened on the 1st brick by checking for the gfid inside
d2787b
 # /proc/pid-of-brick/fd/ directory
d2787b
-gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
d2787b
-gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
d2787b
-
d2787b
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
@@ -59,7 +87,7 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
 # Kill 2nd brick and try writing to the file. The write should fail due to
d2787b
 # quorum failure.
d2787b
 TEST kill_brick $V0 $H0 $B0/${V0}1
d2787b
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
d2787b
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 1
d2787b
 TEST ! fd_write $fd1 "data-3"
d2787b
 TEST ! fd_cat $fd1
d2787b
 
d2787b
@@ -67,7 +95,7 @@ TEST ! fd_cat $fd1
d2787b
 # which were down previously, will return EBADFD now.
d2787b
 TEST $CLI volume start $V0 force
d2787b
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
d2787b
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
d2787b
 TEST ! fd_write $fd1 "data-4"
d2787b
 TEST ! fd_cat $fd1
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
@@ -79,9 +107,9 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
 EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
d2787b
 TEST $CLI volume heal $V0 enable
d2787b
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
d2787b
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
d2787b
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
d2787b
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
d2787b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
d2787b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
d2787b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
d2787b
 
d2787b
 TEST $CLI volume heal $V0
d2787b
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
d2787b
@@ -103,7 +131,7 @@ TEST ! fd_write $fd1 "data-5"
d2787b
 # Kill the only brick that is having lock and try taking lock on another client
d2787b
 # which should succeed.
d2787b
 TEST kill_brick $V0 $H0 $B0/${V0}2
d2787b
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
d2787b
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 2
d2787b
 TEST flock -x $fd2
d2787b
 TEST fd_write $fd2 "data-6"
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
@@ -114,17 +142,17 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
d2787b
 # fail and operations on the 2nd fd should succeed.
d2787b
 TEST $CLI volume start $V0 force
d2787b
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
d2787b
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
d2787b
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 2
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M1 $V0-replicate-0 2
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
d2787b
-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
 TEST ! fd_write $fd1 "data-7"
d2787b
 
d2787b
 TEST ! fd_cat $fd1
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
d2787b
-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
+EXPECT "^0" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
 TEST fd_cat $fd2
d2787b
 
d2787b
 # Close both the fds which will release the locks and then re-open and take lock
d2787b
@@ -159,9 +187,9 @@ EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0
d2787b
 # Heal the volume
d2787b
 TEST $CLI volume heal $V0 enable
d2787b
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
d2787b
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
d2787b
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
d2787b
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
d2787b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
d2787b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
d2787b
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
d2787b
 
d2787b
 TEST $CLI volume heal $V0
d2787b
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
d2787b
@@ -169,7 +197,7 @@ TEST $CLI volume heal $V0 disable
d2787b
 
d2787b
 # Kill one brick and open a fd.
d2787b
 TEST kill_brick $V0 $H0 $B0/${V0}0
d2787b
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
 TEST fd1=`fd_available`
d2787b
 TEST fd_open $fd1 'rw' $M0/a
d2787b
 
d2787b
@@ -182,7 +210,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
 # any of the bricks.
d2787b
 TEST $CLI volume start $V0 force
d2787b
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
d2787b
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
 TEST fd_write $fd1 "data-10"
d2787b
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
 
d2787b
@@ -193,7 +221,7 @@ TEST fd_close $fd1
d2787b
 
d2787b
 # Kill one brick, open and take lock on a fd.
d2787b
 TEST kill_brick $V0 $H0 $B0/${V0}0
d2787b
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
 TEST fd1=`fd_available`
d2787b
 TEST fd_open $fd1 'rw' $M0/a
d2787b
 TEST flock -x $fd1
d2787b
@@ -204,7 +232,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
d2787b
 
d2787b
 # Kill & restart another brick so that it will return EBADFD
d2787b
 TEST kill_brick $V0 $H0 $B0/${V0}1
d2787b
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
d2787b
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" brick_up_status $V0 $H0 $B0/${V0}1
d2787b
 
d2787b
 # Restart the bricks and then write. Now fd should not get re-opened since lock
d2787b
 # is still held on one brick and write should also fail as there is no quorum.
d2787b
@@ -212,8 +240,8 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
d2787b
 TEST $CLI volume start $V0 force
d2787b
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
d2787b
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
d2787b
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
d2787b
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
d2787b
 TEST ! fd_write $fd1 "data-11"
d2787b
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
d2787b
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
d2787b
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
d2787b
index a12472b..20e03d8 100644
d2787b
--- a/xlators/protocol/client/src/client-handshake.c
d2787b
+++ b/xlators/protocol/client/src/client-handshake.c
d2787b
@@ -911,7 +911,7 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
d2787b
         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
d2787b
         {
d2787b
             if (fdctx->remote_fd != -1 ||
d2787b
-                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
d2787b
+                (!fdctx_lock_lists_empty(fdctx) && conf->strict_locks))
d2787b
                 continue;
d2787b
 
d2787b
             fdctx->reopen_done = client_child_up_reopen_done;
d2787b
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
d2787b
index a80f303..b4a7294 100644
d2787b
--- a/xlators/protocol/client/src/client-helpers.c
d2787b
+++ b/xlators/protocol/client/src/client-helpers.c
d2787b
@@ -15,6 +15,15 @@
d2787b
 #include <glusterfs/compat-errno.h>
d2787b
 #include <glusterfs/common-utils.h>
d2787b
 
d2787b
+gf_boolean_t
d2787b
+fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx)
d2787b
+{
d2787b
+    if (list_empty(&fdctx->lock_list) && fd_lk_ctx_empty(fdctx->lk_ctx))
d2787b
+        return _gf_true;
d2787b
+
d2787b
+    return _gf_false;
d2787b
+}
d2787b
+
d2787b
 int
d2787b
 client_fd_lk_list_empty(fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
d2787b
 {
d2787b
@@ -441,7 +450,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
d2787b
                 *remote_fd = fdctx->remote_fd;
d2787b
             }
d2787b
 
d2787b
-            locks_involved = !list_empty(&fdctx->lock_list);
d2787b
+            locks_involved = !fdctx_lock_lists_empty(fdctx);
d2787b
         }
d2787b
     }
d2787b
     pthread_spin_unlock(&conf->fd_lock);
d2787b
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
d2787b
index 35a5340..6df2ed1 100644
d2787b
--- a/xlators/protocol/client/src/client.c
d2787b
+++ b/xlators/protocol/client/src/client.c
d2787b
@@ -881,7 +881,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
d2787b
         pthread_spin_lock(&conf->fd_lock);
d2787b
         {
d2787b
             fdctx = this_fd_get_ctx(fd, this);
d2787b
-            if (fdctx && !list_empty(&fdctx->lock_list)) {
d2787b
+            if (fdctx && !fdctx_lock_lists_empty(fdctx)) {
d2787b
                 ret = -1;
d2787b
                 op_errno = EBADFD;
d2787b
             }
d2787b
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
d2787b
index f952aea..799fe6e 100644
d2787b
--- a/xlators/protocol/client/src/client.h
d2787b
+++ b/xlators/protocol/client/src/client.h
d2787b
@@ -535,4 +535,7 @@ client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
d2787b
 int
d2787b
 client_is_setlk(int32_t cmd);
d2787b
 
d2787b
+gf_boolean_t
d2787b
+fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx);
d2787b
+
d2787b
 #endif /* !_CLIENT_H */
d2787b
-- 
d2787b
1.8.3.1
d2787b