c460ee
From e431321f1348b5d51733a6b6c5e046fd8c6e28cc Mon Sep 17 00:00:00 2001
c460ee
From: karthik-us <ksubrahm@redhat.com>
c460ee
Date: Mon, 5 Jul 2021 10:52:10 +0530
c460ee
Subject: [PATCH 586/586] protocol/client: Do not reopen fd post handshake if
c460ee
 posix lock is held
c460ee
c460ee
Problem:
c460ee
With client.strict-locks enabled, in some cases where the posix lock is
c460ee
taken after a brick gets disconnected, the fd is getting reopened when
c460ee
the brick gets reconnected to the client as part of client_post_handshake.
c460ee
In such cases the saved fdctx's lock_list may not have the latest
c460ee
information.
c460ee
c460ee
Fix:
c460ee
Check the lock information in the fdctx->lk_ctx as well post handshake
c460ee
which will have the latest information on the locks.
c460ee
Also check for this field in other places as well to prevent writes
c460ee
happening with anonymous fd even without re-opening the fd on the
c460ee
restarted brick.
c460ee
c460ee
> Upstream patch: https://github.com/gluster/glusterfs/pull/2582
c460ee
> Fixes: #2581
c460ee
> Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
c460ee
> Signed-off-by: karthik-us ksubrahm@redhat.com
c460ee
c460ee
BUG: 1689375
c460ee
Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
c460ee
Signed-off-by: karthik-us <ksubrahm@redhat.com>
c460ee
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/252588
c460ee
Tested-by: RHGS Build Bot <nigelb@redhat.com>
c460ee
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
c460ee
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
c460ee
---
c460ee
 tests/bugs/replicate/do-not-reopen-fd.t        | 76 ++++++++++++++++++--------
c460ee
 xlators/protocol/client/src/client-handshake.c |  2 +-
c460ee
 xlators/protocol/client/src/client-helpers.c   | 11 +++-
c460ee
 xlators/protocol/client/src/client.c           |  2 +-
c460ee
 xlators/protocol/client/src/client.h           |  3 +
c460ee
 5 files changed, 67 insertions(+), 27 deletions(-)
c460ee
c460ee
diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
c460ee
index 13b5218..f346709 100644
c460ee
--- a/tests/bugs/replicate/do-not-reopen-fd.t
c460ee
+++ b/tests/bugs/replicate/do-not-reopen-fd.t
c460ee
@@ -20,10 +20,41 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
c460ee
 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
c460ee
 
c460ee
 TEST touch $M0/a
c460ee
+gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
c460ee
+gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
c460ee
+
c460ee
+
c460ee
+# Open fd from a client, check for open fd on all the bricks.
c460ee
+TEST fd1=`fd_available`
c460ee
+TEST fd_open $fd1 'rw' $M0/a
c460ee
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
c460ee
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
+
c460ee
+# Kill a brick and take lock on the fd
c460ee
+TEST kill_brick $V0 $H0 $B0/${V0}0
c460ee
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
+TEST flock -x $fd1
c460ee
+
c460ee
+# Restart the brick and check for no open fd on the restarted brick.
c460ee
+TEST $CLI volume start $V0 force
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
c460ee
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
+
c460ee
+# Write on the fd. It should fail on the restarted brick.
c460ee
+TEST fd_write $fd1 "data-0"
c460ee
+EXPECT "" cat $B0/${V0}0/a
c460ee
+EXPECT "data-0" cat $B0/${V0}1/a
c460ee
+EXPECT "data-0" cat $B0/${V0}2/a
c460ee
+
c460ee
+TEST fd_close $fd1
c460ee
 
c460ee
 # Kill one brick and take lock on the fd and do a write.
c460ee
 TEST kill_brick $V0 $H0 $B0/${V0}0
c460ee
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
 TEST fd1=`fd_available`
c460ee
 TEST fd_open $fd1 'rw' $M0/a
c460ee
 
c460ee
@@ -34,7 +65,7 @@ TEST fd_write $fd1 "data-1"
c460ee
 # should still succeed as there were no quorum disconnects.
c460ee
 TEST $CLI volume start $V0 force
c460ee
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
c460ee
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
 TEST fd_write $fd1 "data-2"
c460ee
 EXPECT "" cat $B0/${V0}0/a
c460ee
 EXPECT "data-2" cat $B0/${V0}1/a
c460ee
@@ -42,9 +73,6 @@ EXPECT "data-2" cat $B0/${V0}2/a
c460ee
 
c460ee
 # Check there is no fd opened on the 1st brick by checking for the gfid inside
c460ee
 # /proc/pid-of-brick/fd/ directory
c460ee
-gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
c460ee
-gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
c460ee
-
c460ee
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
@@ -59,7 +87,7 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
 # Kill 2nd brick and try writing to the file. The write should fail due to
c460ee
 # quorum failure.
c460ee
 TEST kill_brick $V0 $H0 $B0/${V0}1
c460ee
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
c460ee
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 1
c460ee
 TEST ! fd_write $fd1 "data-3"
c460ee
 TEST ! fd_cat $fd1
c460ee
 
c460ee
@@ -67,7 +95,7 @@ TEST ! fd_cat $fd1
c460ee
 # which were down previously, will return EBADFD now.
c460ee
 TEST $CLI volume start $V0 force
c460ee
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
c460ee
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
c460ee
 TEST ! fd_write $fd1 "data-4"
c460ee
 TEST ! fd_cat $fd1
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
@@ -79,9 +107,9 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
 EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
c460ee
 TEST $CLI volume heal $V0 enable
c460ee
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
c460ee
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
c460ee
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
c460ee
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
c460ee
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
c460ee
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
c460ee
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
c460ee
 
c460ee
 TEST $CLI volume heal $V0
c460ee
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
c460ee
@@ -103,7 +131,7 @@ TEST ! fd_write $fd1 "data-5"
c460ee
 # Kill the only brick that is having lock and try taking lock on another client
c460ee
 # which should succeed.
c460ee
 TEST kill_brick $V0 $H0 $B0/${V0}2
c460ee
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
c460ee
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 2
c460ee
 TEST flock -x $fd2
c460ee
 TEST fd_write $fd2 "data-6"
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
@@ -114,17 +142,17 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
c460ee
 # fail and operations on the 2nd fd should succeed.
c460ee
 TEST $CLI volume start $V0 force
c460ee
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
c460ee
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
c460ee
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 2
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M1 $V0-replicate-0 2
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
c460ee
-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
 TEST ! fd_write $fd1 "data-7"
c460ee
 
c460ee
 TEST ! fd_cat $fd1
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
c460ee
-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
+EXPECT "^0" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
 TEST fd_cat $fd2
c460ee
 
c460ee
 # Close both the fds which will release the locks and then re-open and take lock
c460ee
@@ -159,9 +187,9 @@ EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0
c460ee
 # Heal the volume
c460ee
 TEST $CLI volume heal $V0 enable
c460ee
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
c460ee
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
c460ee
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
c460ee
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
c460ee
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
c460ee
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
c460ee
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
c460ee
 
c460ee
 TEST $CLI volume heal $V0
c460ee
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
c460ee
@@ -169,7 +197,7 @@ TEST $CLI volume heal $V0 disable
c460ee
 
c460ee
 # Kill one brick and open a fd.
c460ee
 TEST kill_brick $V0 $H0 $B0/${V0}0
c460ee
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
 TEST fd1=`fd_available`
c460ee
 TEST fd_open $fd1 'rw' $M0/a
c460ee
 
c460ee
@@ -182,7 +210,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
 # any of the bricks.
c460ee
 TEST $CLI volume start $V0 force
c460ee
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
c460ee
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
 TEST fd_write $fd1 "data-10"
c460ee
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
 
c460ee
@@ -193,7 +221,7 @@ TEST fd_close $fd1
c460ee
 
c460ee
 # Kill one brick, open and take lock on a fd.
c460ee
 TEST kill_brick $V0 $H0 $B0/${V0}0
c460ee
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
 TEST fd1=`fd_available`
c460ee
 TEST fd_open $fd1 'rw' $M0/a
c460ee
 TEST flock -x $fd1
c460ee
@@ -204,7 +232,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
c460ee
 
c460ee
 # Kill & restart another brick so that it will return EBADFD
c460ee
 TEST kill_brick $V0 $H0 $B0/${V0}1
c460ee
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
c460ee
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" brick_up_status $V0 $H0 $B0/${V0}1
c460ee
 
c460ee
 # Restart the bricks and then write. Now fd should not get re-opened since lock
c460ee
 # is still held on one brick and write should also fail as there is no quorum.
c460ee
@@ -212,8 +240,8 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
c460ee
 TEST $CLI volume start $V0 force
c460ee
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
c460ee
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
c460ee
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
c460ee
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
c460ee
 TEST ! fd_write $fd1 "data-11"
c460ee
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
c460ee
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
c460ee
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
c460ee
index a12472b..20e03d8 100644
c460ee
--- a/xlators/protocol/client/src/client-handshake.c
c460ee
+++ b/xlators/protocol/client/src/client-handshake.c
c460ee
@@ -911,7 +911,7 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
c460ee
         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
c460ee
         {
c460ee
             if (fdctx->remote_fd != -1 ||
c460ee
-                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
c460ee
+                (!fdctx_lock_lists_empty(fdctx) && conf->strict_locks))
c460ee
                 continue;
c460ee
 
c460ee
             fdctx->reopen_done = client_child_up_reopen_done;
c460ee
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
c460ee
index a80f303..b4a7294 100644
c460ee
--- a/xlators/protocol/client/src/client-helpers.c
c460ee
+++ b/xlators/protocol/client/src/client-helpers.c
c460ee
@@ -15,6 +15,15 @@
c460ee
 #include <glusterfs/compat-errno.h>
c460ee
 #include <glusterfs/common-utils.h>
c460ee
 
c460ee
+gf_boolean_t
c460ee
+fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx)
c460ee
+{
c460ee
+    if (list_empty(&fdctx->lock_list) && fd_lk_ctx_empty(fdctx->lk_ctx))
c460ee
+        return _gf_true;
c460ee
+
c460ee
+    return _gf_false;
c460ee
+}
c460ee
+
c460ee
 int
c460ee
 client_fd_lk_list_empty(fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
c460ee
 {
c460ee
@@ -441,7 +450,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
c460ee
                 *remote_fd = fdctx->remote_fd;
c460ee
             }
c460ee
 
c460ee
-            locks_involved = !list_empty(&fdctx->lock_list);
c460ee
+            locks_involved = !fdctx_lock_lists_empty(fdctx);
c460ee
         }
c460ee
     }
c460ee
     pthread_spin_unlock(&conf->fd_lock);
c460ee
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
c460ee
index 35a5340..6df2ed1 100644
c460ee
--- a/xlators/protocol/client/src/client.c
c460ee
+++ b/xlators/protocol/client/src/client.c
c460ee
@@ -881,7 +881,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
c460ee
         pthread_spin_lock(&conf->fd_lock);
c460ee
         {
c460ee
             fdctx = this_fd_get_ctx(fd, this);
c460ee
-            if (fdctx && !list_empty(&fdctx->lock_list)) {
c460ee
+            if (fdctx && !fdctx_lock_lists_empty(fdctx)) {
c460ee
                 ret = -1;
c460ee
                 op_errno = EBADFD;
c460ee
             }
c460ee
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
c460ee
index f952aea..799fe6e 100644
c460ee
--- a/xlators/protocol/client/src/client.h
c460ee
+++ b/xlators/protocol/client/src/client.h
c460ee
@@ -535,4 +535,7 @@ client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
c460ee
 int
c460ee
 client_is_setlk(int32_t cmd);
c460ee
 
c460ee
+gf_boolean_t
c460ee
+fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx);
c460ee
+
c460ee
 #endif /* !_CLIENT_H */
c460ee
-- 
c460ee
1.8.3.1
c460ee