From e431321f1348b5d51733a6b6c5e046fd8c6e28cc Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Mon, 5 Jul 2021 10:52:10 +0530
Subject: [PATCH 586/586] protocol/client: Do not reopen fd post handshake if
 posix lock is held

Problem:
With client.strict-locks enabled, in some cases where the posix lock is
taken after a brick gets disconnected, the fd is getting reopened when
the brick gets reconnected to the client as part of client_post_handshake.
In such cases the saved fdctx's lock_list may not have the latest
information.

Fix:
Check the lock information in the fdctx->lk_ctx as well post handshake
which will have the latest information on the locks.
Also check for this field in other places as well to prevent writes
happening with anonymous fd even without re-opening the fd on the
restarted brick.

> Upstream patch: https://github.com/gluster/glusterfs/pull/2582
> Fixes: #2581
> Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
> Signed-off-by: karthik-us ksubrahm@redhat.com

BUG: 1689375
Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/252588
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 tests/bugs/replicate/do-not-reopen-fd.t        | 76 ++++++++++++++++++--------
 xlators/protocol/client/src/client-handshake.c |  2 +-
 xlators/protocol/client/src/client-helpers.c   | 11 +++-
 xlators/protocol/client/src/client.c           |  2 +-
 xlators/protocol/client/src/client.h           |  3 +
 5 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
index 13b5218..f346709 100644
--- a/tests/bugs/replicate/do-not-reopen-fd.t
+++ b/tests/bugs/replicate/do-not-reopen-fd.t
@@ -20,10 +20,41 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
 
 TEST touch $M0/a
+gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
+gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
+
+
+# Open fd from a client, check for open fd on all the bricks.
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'rw' $M0/a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+# Kill a brick and take lock on the fd
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST flock -x $fd1
+
+# Restart the brick and check for no open fd on the restarted brick.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+# Write on the fd. It should fail on the restarted brick.
+TEST fd_write $fd1 "data-0"
+EXPECT "" cat $B0/${V0}0/a
+EXPECT "data-0" cat $B0/${V0}1/a
+EXPECT "data-0" cat $B0/${V0}2/a
+
+TEST fd_close $fd1
 
 # Kill one brick and take lock on the fd and do a write.
 TEST kill_brick $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
 TEST fd1=`fd_available`
 TEST fd_open $fd1 'rw' $M0/a
 
@@ -34,7 +65,7 @@ TEST fd_write $fd1 "data-1"
 # should still succeed as there were no quorum disconnects.
 TEST $CLI volume start $V0 force
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
 TEST fd_write $fd1 "data-2"
 EXPECT "" cat $B0/${V0}0/a
 EXPECT "data-2" cat $B0/${V0}1/a
@@ -42,9 +73,6 @@ EXPECT "data-2" cat $B0/${V0}2/a
 
 # Check there is no fd opened on the 1st brick by checking for the gfid inside
 # /proc/pid-of-brick/fd/ directory
-gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
-gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
-
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
@@ -59,7 +87,7 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
 # Kill 2nd brick and try writing to the file. The write should fail due to
 # quorum failure.
 TEST kill_brick $V0 $H0 $B0/${V0}1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 1
 TEST ! fd_write $fd1 "data-3"
 TEST ! fd_cat $fd1
 
@@ -67,7 +95,7 @@ TEST ! fd_cat $fd1
 # which were down previously, will return EBADFD now.
 TEST $CLI volume start $V0 force
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
 TEST ! fd_write $fd1 "data-4"
 TEST ! fd_cat $fd1
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
@@ -79,9 +107,9 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
 EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
 TEST $CLI volume heal $V0 enable
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
 
 TEST $CLI volume heal $V0
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
@@ -103,7 +131,7 @@ TEST ! fd_write $fd1 "data-5"
 # Kill the only brick that is having lock and try taking lock on another client
 # which should succeed.
 TEST kill_brick $V0 $H0 $B0/${V0}2
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 2
 TEST flock -x $fd2
 TEST fd_write $fd2 "data-6"
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
@@ -114,17 +142,17 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
 # fail and operations on the 2nd fd should succeed.
 TEST $CLI volume start $V0 force
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M1 $V0-replicate-0 2
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
 TEST ! fd_write $fd1 "data-7"
 
 TEST ! fd_cat $fd1
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
-EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+EXPECT "^0" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
 TEST fd_cat $fd2
 
 # Close both the fds which will release the locks and then re-open and take lock
@@ -159,9 +187,9 @@ EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0
 # Heal the volume
 TEST $CLI volume heal $V0 enable
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
 
 TEST $CLI volume heal $V0
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
@@ -169,7 +197,7 @@ TEST $CLI volume heal $V0 disable
 
 # Kill one brick and open a fd.
 TEST kill_brick $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
 TEST fd1=`fd_available`
 TEST fd_open $fd1 'rw' $M0/a
 
@@ -182,7 +210,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
 # any of the bricks.
 TEST $CLI volume start $V0 force
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
 TEST fd_write $fd1 "data-10"
 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
 
@@ -193,7 +221,7 @@ TEST fd_close $fd1
 
 # Kill one brick, open and take lock on a fd.
 TEST kill_brick $V0 $H0 $B0/${V0}0
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0
 TEST fd1=`fd_available`
 TEST fd_open $fd1 'rw' $M0/a
 TEST flock -x $fd1
@@ -204,7 +232,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
 
 # Kill & restart another brick so that it will return EBADFD
 TEST kill_brick $V0 $H0 $B0/${V0}1
-EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" brick_up_status $V0 $H0 $B0/${V0}1
 
 # Restart the bricks and then write. Now fd should not get re-opened since lock
 # is still held on one brick and write should also fail as there is no quorum.
@@ -212,8 +240,8 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
 TEST $CLI volume start $V0 force
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1
 TEST ! fd_write $fd1 "data-11"
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
 EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
index a12472b..20e03d8 100644
--- a/xlators/protocol/client/src/client-handshake.c
+++ b/xlators/protocol/client/src/client-handshake.c
@@ -911,7 +911,7 @@ client_post_handshake(call_frame_t *frame, xlator_t *this)
         list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos)
         {
             if (fdctx->remote_fd != -1 ||
-                (!list_empty(&fdctx->lock_list) && conf->strict_locks))
+                (!fdctx_lock_lists_empty(fdctx) && conf->strict_locks))
                 continue;
 
             fdctx->reopen_done = client_child_up_reopen_done;
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index a80f303..b4a7294 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -15,6 +15,15 @@
 #include <glusterfs/compat-errno.h>
 #include <glusterfs/common-utils.h>
 
+gf_boolean_t
+fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx)
+{
+    if (list_empty(&fdctx->lock_list) && fd_lk_ctx_empty(fdctx->lk_ctx))
+        return _gf_true;
+
+    return _gf_false;
+}
+
 int
 client_fd_lk_list_empty(fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock)
 {
@@ -441,7 +450,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
                 *remote_fd = fdctx->remote_fd;
             }
 
-            locks_involved = !list_empty(&fdctx->lock_list);
+            locks_involved = !fdctx_lock_lists_empty(fdctx);
         }
     }
     pthread_spin_unlock(&conf->fd_lock);
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 35a5340..6df2ed1 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -881,7 +881,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
         pthread_spin_lock(&conf->fd_lock);
         {
             fdctx = this_fd_get_ctx(fd, this);
-            if (fdctx && !list_empty(&fdctx->lock_list)) {
+            if (fdctx && !fdctx_lock_lists_empty(fdctx)) {
                 ret = -1;
                 op_errno = EBADFD;
             }
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index f952aea..799fe6e 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -535,4 +535,7 @@ client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock,
 int
 client_is_setlk(int32_t cmd);
 
+gf_boolean_t
+fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx);
+
 #endif /* !_CLIENT_H */
-- 
1.8.3.1