From e431321f1348b5d51733a6b6c5e046fd8c6e28cc Mon Sep 17 00:00:00 2001 From: karthik-us Date: Mon, 5 Jul 2021 10:52:10 +0530 Subject: [PATCH 586/586] protocol/client: Do not reopen fd post handshake if posix lock is held Problem: With client.strict-locks enabled, in some cases where the posix lock is taken after a brick gets disconnected, the fd is getting reopened when the brick gets reconnected to the client as part of client_post_handshake. In such cases the saved fdctx's lock_list may not have the latest information. Fix: Check the lock information in the fdctx->lk_ctx as well post handshake which will have the latest information on the locks. Also check for this field in other places as well to prevent writes happening with anonymous fd even without re-opening the fd on the restarted brick. > Upstream patch: https://github.com/gluster/glusterfs/pull/2582 > Fixes: #2581 > Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b > Signed-off-by: karthik-us ksubrahm@redhat.com BUG: 1689375 Change-Id: I7a0799e242ce188c6597dec0a65b4dae7dcd815b Signed-off-by: karthik-us Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/252588 Tested-by: RHGS Build Bot Reviewed-by: Ravishankar Narayanankutty Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- tests/bugs/replicate/do-not-reopen-fd.t | 76 ++++++++++++++++++-------- xlators/protocol/client/src/client-handshake.c | 2 +- xlators/protocol/client/src/client-helpers.c | 11 +++- xlators/protocol/client/src/client.c | 2 +- xlators/protocol/client/src/client.h | 3 + 5 files changed, 67 insertions(+), 27 deletions(-) diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t index 13b5218..f346709 100644 --- a/tests/bugs/replicate/do-not-reopen-fd.t +++ b/tests/bugs/replicate/do-not-reopen-fd.t @@ -20,10 +20,41 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1 TEST touch $M0/a +gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a) +gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a) + + +# Open fd from a client, check for open fd on all the bricks. +TEST fd1=`fd_available` +TEST fd_open $fd1 'rw' $M0/a +EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a +EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a +EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a + +# Kill a brick and take lock on the fd +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST flock -x $fd1 + +# Restart the brick and check for no open fd on the restarted brick. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a +EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a +EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a + +# Write on the fd. It should fail on the restarted brick. +TEST fd_write $fd1 "data-0" +EXPECT "" cat $B0/${V0}0/a +EXPECT "data-0" cat $B0/${V0}1/a +EXPECT "data-0" cat $B0/${V0}2/a + +TEST fd_close $fd1 # Kill one brick and take lock on the fd and do a write. TEST kill_brick $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0 TEST fd1=`fd_available` TEST fd_open $fd1 'rw' $M0/a @@ -34,7 +65,7 @@ TEST fd_write $fd1 "data-1" # should still succeed as there were no quorum disconnects. TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0 TEST fd_write $fd1 "data-2" EXPECT "" cat $B0/${V0}0/a EXPECT "data-2" cat $B0/${V0}1/a @@ -42,9 +73,6 @@ EXPECT "data-2" cat $B0/${V0}2/a # Check there is no fd opened on the 1st brick by checking for the gfid inside # /proc/pid-of-brick/fd/ directory -gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a) -gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a) - EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a @@ -59,7 +87,7 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a # Kill 2nd brick and try writing to the file. The write should fail due to # quorum failure. TEST kill_brick $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 1 TEST ! fd_write $fd1 "data-3" TEST ! fd_cat $fd1 @@ -67,7 +95,7 @@ TEST ! fd_cat $fd1 # which were down previously, will return EBADFD now. TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1 TEST ! fd_write $fd1 "data-4" TEST ! fd_cat $fd1 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a @@ -79,9 +107,9 @@ EXPECT "^2$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 TEST $CLI volume heal $V0 enable EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 @@ -103,7 +131,7 @@ TEST ! fd_write $fd1 "data-5" # Kill the only brick that is having lock and try taking lock on another client # which should succeed. TEST kill_brick $V0 $H0 $B0/${V0}2 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 2 TEST flock -x $fd2 TEST fd_write $fd2 "data-6" EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a @@ -114,17 +142,17 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a # fail and operations on the 2nd fd should succeed. TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M1 $V0-replicate-0 2 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a -EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a +EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a TEST ! fd_write $fd1 "data-7" TEST ! fd_cat $fd1 EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a -EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a +EXPECT "^0" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a TEST fd_cat $fd2 # Close both the fds which will release the locks and then re-open and take lock @@ -159,9 +187,9 @@ EXPECT_WITHIN $REOPEN_TIMEOUT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0 # Heal the volume TEST $CLI volume heal $V0 enable EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 @@ -169,7 +197,7 @@ TEST $CLI volume heal $V0 disable # Kill one brick and open a fd. TEST kill_brick $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0 TEST fd1=`fd_available` TEST fd_open $fd1 'rw' $M0/a @@ -182,7 +210,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a # any of the bricks. TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0 TEST fd_write $fd1 "data-10" EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a @@ -193,7 +221,7 @@ TEST fd_close $fd1 # Kill one brick, open and take lock on a fd. TEST kill_brick $V0 $H0 $B0/${V0}0 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_child_up_status_meta $M0 $V0-replicate-0 0 TEST fd1=`fd_available` TEST fd_open $fd1 'rw' $M0/a TEST flock -x $fd1 @@ -204,7 +232,7 @@ EXPECT "^1$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a # Kill & restart another brick so that it will return EBADFD TEST kill_brick $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" brick_up_status $V0 $H0 $B0/${V0}1 # Restart the bricks and then write. Now fd should not get re-opened since lock # is still held on one brick and write should also fail as there is no quorum. @@ -212,8 +240,8 @@ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1 TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_child_up_status_meta $M0 $V0-replicate-0 1 TEST ! fd_write $fd1 "data-11" EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a EXPECT "^0$" gf_open_file_count_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index a12472b..20e03d8 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -911,7 +911,7 @@ client_post_handshake(call_frame_t *frame, xlator_t *this) list_for_each_entry_safe(fdctx, tmp, &conf->saved_fds, sfd_pos) { if (fdctx->remote_fd != -1 || - (!list_empty(&fdctx->lock_list) && conf->strict_locks)) + (!fdctx_lock_lists_empty(fdctx) && conf->strict_locks)) continue; fdctx->reopen_done = client_child_up_reopen_done; diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index a80f303..b4a7294 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -15,6 +15,15 @@ #include #include +gf_boolean_t +fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx) +{ + if (list_empty(&fdctx->lock_list) && fd_lk_ctx_empty(fdctx->lk_ctx)) + return _gf_true; + + return _gf_false; +} + int client_fd_lk_list_empty(fd_lk_ctx_t *lk_ctx, gf_boolean_t try_lock) { @@ -441,7 +450,7 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd, *remote_fd = fdctx->remote_fd; } - locks_involved = !list_empty(&fdctx->lock_list); + locks_involved = !fdctx_lock_lists_empty(fdctx); } } pthread_spin_unlock(&conf->fd_lock); diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 35a5340..6df2ed1 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -881,7 +881,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, pthread_spin_lock(&conf->fd_lock); { fdctx = this_fd_get_ctx(fd, this); - if (fdctx && !list_empty(&fdctx->lock_list)) { + if (fdctx && !fdctx_lock_lists_empty(fdctx)) { ret = -1; op_errno = EBADFD; } diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index f952aea..799fe6e 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -535,4 +535,7 @@ client_add_lock_for_recovery(fd_t *fd, struct gf_flock *flock, int client_is_setlk(int32_t cmd); +gf_boolean_t +fdctx_lock_lists_empty(clnt_fd_ctx_t *fdctx); + #endif /* !_CLIENT_H */ -- 1.8.3.1