From 57c794e31c0333f508ada740227c9afa1889f8ae Mon Sep 17 00:00:00 2001 From: karthik-us Date: Thu, 15 Apr 2021 11:27:57 +0530 Subject: [PATCH 581/584] afr: don't reopen fds on which POSIX locks are held When client.strict-locks is enabled on a volume and there are POSIX locks held on the files, after disconnect and reconnection of the clients do not re-open such fds which might lead to multiple clients acquiring the locks and cause data corruption. > Upstream patch: https://github.com/gluster/glusterfs/pull/1980/commits/56bde56c2741c5eac59937a6cf951a14f2878460 > Change-Id: I8777ffbc2cc8d15ab57b58b72b56eb67521787c5 > Fixes: #1977 > Signed-off-by: karthik-us BUG: 1689375 Change-Id: I8777ffbc2cc8d15ab57b58b72b56eb67521787c5 Signed-off-by: karthik-us Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245414 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya Reviewed-by: Ravishankar Narayanankutty --- rpc/rpc-lib/src/protocol-common.h | 6 + tests/bugs/replicate/do-not-reopen-fd.t | 206 +++++++++++++++++ xlators/cluster/afr/src/afr-common.c | 15 +- xlators/cluster/afr/src/afr-open.c | 280 +++++++++++++++++++---- xlators/cluster/afr/src/afr.h | 3 + xlators/protocol/client/src/client-common.c | 148 ++++++++---- xlators/protocol/client/src/client-common.h | 4 + xlators/protocol/client/src/client-helpers.c | 22 +- xlators/protocol/client/src/client-rpc-fops.c | 23 +- xlators/protocol/client/src/client-rpc-fops_v2.c | 25 +- xlators/protocol/client/src/client.c | 21 +- xlators/protocol/client/src/client.h | 8 +- 12 files changed, 654 insertions(+), 107 deletions(-) create mode 100644 tests/bugs/replicate/do-not-reopen-fd.t diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 779878f..f56aaaa 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -312,6 +312,12 @@ enum glusterd_mgmt_v3_procnum { GLUSTERD_MGMT_V3_MAXVALUE, }; +enum gf_fd_reopen_status { + FD_REOPEN_ALLOWED = 0, + FD_REOPEN_NOT_ALLOWED, + FD_BAD, +}; + typedef struct gf_gsync_detailed_status_ gf_gsync_status_t; enum gf_get_volume_info_type { diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t new file mode 100644 index 0000000..76d8e70 --- /dev/null +++ b/tests/bugs/replicate/do-not-reopen-fd.t @@ -0,0 +1,206 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fileio.rc + +cleanup; + +TEST glusterd; +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 client.strict-locks on +TEST $CLI volume heal $V0 disable +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status'; +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1 + +TEST touch $M0/a + +# Kill one brick and take lock on the fd and do a write. +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST fd1=`fd_available` +TEST fd_open $fd1 'rw' $M0/a + +TEST flock -x $fd1 +TEST fd_write $fd1 "data-1" + +# Restart the brick and then write. Now fd should not get re-opened but write +# should still succeed as there were no quorum disconnects. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST fd_write $fd1 "data-2" +EXPECT "" cat $B0/${V0}0/a +EXPECT "data-2" cat $B0/${V0}1/a +EXPECT "data-2" cat $B0/${V0}2/a + +# Check there is no fd opened on the 1st brick by checking for the gfid inside +# /proc/pid-of-brick/fd/ directory +gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a) +gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a) + +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a +EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a +EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a + +TEST fd2=`fd_available` +TEST fd_open $fd2 'rw' $M1/a + +# Kill 2nd brick and try writing to the file. The write should fail due to +# quorum failure. +TEST kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1 +TEST ! fd_write $fd1 "data-3" +TEST ! fd_cat $fd1 + +# Restart the bricks and try writing to the file. This should fail as two bricks +# which were down previously, will return EBADFD now. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +TEST ! fd_write $fd1 "data-4" +TEST ! fd_cat $fd1 + +# Enable heal and check the files will have same content on all the bricks after +# the heal is completed. +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT "data-4" cat $B0/${V0}0/a +EXPECT "data-4" cat $B0/${V0}1/a +EXPECT "data-4" cat $B0/${V0}2/a +TEST $CLI volume heal $V0 disable + +# Try writing to the file again on the same fd, which should fail again, since +# it is not yet re-opened. +TEST ! fd_write $fd1 "data-5" + +# At this point only one brick will have the lock. Try taking the lock again on +# the bad fd, which should also fail with EBADFD. +TEST ! flock -x $fd1 + +# Kill the only brick that is having lock and try taking lock on another client +# which should succeed. +TEST kill_brick $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2 +TEST flock -x $fd2 +TEST fd_write $fd2 "data-6" + +# Bring the brick up and try writing & reading on the old fd, which should still +# fail and operations on the 2nd fd should succeed. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2 +TEST ! fd_write $fd1 "data-7" + +TEST ! fd_cat $fd1 +TEST fd_cat $fd2 + +# Close both the fds which will release the locks and then re-open and take lock +# on the old fd. Operations on that fd should succeed afterwards. +TEST fd_close $fd1 +TEST fd_close $fd2 + +TEST ! ls /proc/$$/fd/$fd1 +TEST ! ls /proc/$$/fd/$fd2 +EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a +EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a +EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a + +TEST fd1=`fd_available` +TEST fd_open $fd1 'rw' $M0/a +EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a +EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a +EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a + +TEST flock -x $fd1 +TEST fd_write $fd1 "data-8" +TEST fd_cat $fd1 + +EXPECT "data-8" head -n 1 $B0/${V0}0/a +EXPECT "data-8" head -n 1 $B0/${V0}1/a +EXPECT "data-8" head -n 1 $B0/${V0}2/a + +TEST fd_close $fd1 + +# Heal the volume +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +TEST $CLI volume heal $V0 disable + +# Kill one brick and open a fd. +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST fd1=`fd_available` +TEST fd_open $fd1 'rw' $M0/a + +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a +EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a +EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a + +# Restart the brick and then write. Now fd should get re-opened and write should +# succeed on the previously down brick as well since there are no locks held on +# any of the bricks. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST fd_write $fd1 "data-10" +EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a + +EXPECT "data-10" head -n 1 $B0/${V0}0/a +EXPECT "data-10" head -n 1 $B0/${V0}1/a +EXPECT "data-10" head -n 1 $B0/${V0}2/a +TEST fd_close $fd1 + +# Kill one brick, open and take lock on a fd. +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST fd1=`fd_available` +TEST fd_open $fd1 'rw' $M0/a +TEST flock -x $fd1 + +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a +EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a +EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a + +# Kill & restart another brick so that it will return EBADFD +TEST kill_brick $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1 + +# Restart the bricks and then write. Now fd should not get re-opened since lock +# is still held on one brick and write should also fail as there is no quorum. + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1 +TEST ! fd_write $fd1 "data-11" +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a +EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a +EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a + +EXPECT "data-10" head -n 1 $B0/${V0}0/a +EXPECT "data-10" head -n 1 $B0/${V0}1/a +EXPECT "data-11" head -n 1 $B0/${V0}2/a + +TEST fd_close $fd1 +cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 416012c..bd46e59 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2067,6 +2067,8 @@ afr_local_cleanup(afr_local_t *local, xlator_t *this) dict_unref(local->cont.entrylk.xdata); } + GF_FREE(local->need_open); + if (local->xdata_req) dict_unref(local->xdata_req); @@ -5689,6 +5691,14 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno) } local->is_new_entry = _gf_false; + local->need_open = GF_CALLOC(priv->child_count, sizeof(*local->need_open), + gf_afr_mt_char); + if (!local->need_open) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } + INIT_LIST_HEAD(&local->healer); return 0; out: @@ -6124,9 +6134,8 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) char *substr = NULL; char *status = NULL; - ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, - &entry_selfheal, &data_selfheal, - &metadata_selfheal, &pending); + ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, &entry_selfheal, + &data_selfheal, &metadata_selfheal, &pending); if (ret == -ENOMEM) { ret = -1; diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index ff72c73..73c1552 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -35,6 +35,8 @@ #include "afr-dir-read.h" #include "afr-dir-write.h" #include "afr-transaction.h" +#include "afr-self-heal.h" +#include "protocol-common.h" gf_boolean_t afr_is_fd_fixable(fd_t *fd) @@ -239,8 +241,32 @@ afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, return 0; } +static void +afr_fd_ctx_reset_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open) +{ + afr_fd_ctx_t *fd_ctx = NULL; + afr_private_t *priv = NULL; + int i = 0; + + priv = this->private; + fd_ctx = afr_fd_ctx_get(fd, this); + if (!fd_ctx) + return; + + LOCK(&fd->lock); + { + for (i = 0; i < priv->child_count; i++) { + if (fd_ctx->opened_on[i] == AFR_FD_OPENING && need_open[i]) { + fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED; + need_open[i] = 0; + } + } + } + UNLOCK(&fd->lock); +} + static int -afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open) +afr_fd_ctx_set_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open) { afr_fd_ctx_t *fd_ctx = NULL; afr_private_t *priv = NULL; @@ -248,7 +274,6 @@ afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open) int count = 0; priv = this->private; - fd_ctx = afr_fd_ctx_get(fd, this); if (!fd_ctx) return 0; @@ -271,21 +296,217 @@ afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open) return count; } +static int +afr_do_fix_open(call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = frame->local; + afr_private_t *priv = NULL; + int i = 0; + int need_open_count = 0; + + priv = this->private; + + need_open_count = AFR_COUNT(local->need_open, priv->child_count); + if (!need_open_count) { + goto out; + } + gf_msg_debug(this->name, 0, "need open count: %d", need_open_count); + local->call_count = need_open_count; + + for (i = 0; i < priv->child_count; i++) { + if (!local->need_open[i]) + continue; + + if (IA_IFDIR == local->fd->inode->ia_type) { + gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s", + local->loc.path, priv->children[i]->name); + STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i, + priv->children[i], + priv->children[i]->fops->opendir, &local->loc, + local->fd, NULL); + } else { + gf_msg_debug(this->name, 0, + "opening fd for file %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE( + frame, afr_openfd_fix_open_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->open, &local->loc, + local->fd_ctx->flags & ~(O_CREAT | O_EXCL | O_TRUNC), local->fd, + NULL); + } + if (!--need_open_count) + break; + } + return 0; + +out: + afr_fd_ctx_reset_need_open(local->fd, this, local->need_open); + AFR_STACK_DESTROY(frame); + return 0; +} + +static int +afr_is_reopen_allowed_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct gf_flock *lock, dict_t *xdata) +{ + afr_local_t *local = frame->local; + afr_private_t *priv = NULL; + int ret = -1; + int call_count = 0; + int i = (long)cookie; + int32_t fd_reopen_status = -1; + int32_t final_reopen_status = -1; + + priv = this->private; + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (op_ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_DICT_GET_FAILED, + "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid)); + } + + if (xdata) + local->replies[i].xdata = dict_ref(xdata); + + call_count = afr_frame_return(frame); + + if (call_count) + return 0; + + /* Currently we get 3 values from the lower layer (protocol/client) in the + * getlk_cbk. + * FD_REOPEN_ALLOWED : No conflicting locks are held and reopen is allowed + * FD_REOPEN_NOT_ALLOWED : Conflicting locks are held and reopen is not + * allowed + * FD_BAD : FD is not valid + * + * - If we get FD_REOPEN_NOT_ALLOWED from any of the bricks, will block the + * reopen taking this as high priority. + * - If we get FD_BAD from all the replies, we will not reopen since we do + * not know the correct status. + * - If we get FD_BAD from few brick and FD_REOPEN_NOT_ALLOWED from one or + * more bricks, then we will block reopen. + * - If we get FD_BAD from few bricks and FD_REOPEN_ALLOWED from one or + * more bricks, then we will allow the reopen. + * + * We will update the final_reopen_status only when the value returned + * from lower layer is >= FD_REOPEN_ALLOWED and < FD_BAD. We will not set + * FD_BAD in final_reopen_status, since it can lead to unexpected + * behaviours. + * + * At the end of this loop, if we still have final_reopen_status as -1 + * i.e., the init value, it means we failed to get the fd status from any + * of the bricks or we do not have a valid fd on any of the bricks. We + * will not reopen the fd in this case as well. + */ + + for (i = 0; i < priv->child_count; i++) { + if (final_reopen_status != FD_REOPEN_NOT_ALLOWED && + local->replies[i].xdata) { + ret = dict_get_int32(xdata, "fd-reopen-status", &fd_reopen_status); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED, + "Failed to get whether reopen is allowed or not on fd " + "for file %s on subvolume %s.", + local->loc.path, priv->children[i]->name); + } else if (fd_reopen_status >= FD_REOPEN_ALLOWED && + fd_reopen_status < FD_BAD) { + final_reopen_status = fd_reopen_status; + } + } + + if (final_reopen_status == FD_REOPEN_NOT_ALLOWED) + break; + } + + if (final_reopen_status == FD_REOPEN_NOT_ALLOWED) { + gf_log(this->name, GF_LOG_INFO, + "Conflicting locks held on file %s. FD reopen is not allowed.", + local->loc.path); + } else if (final_reopen_status == -1) { + gf_log(this->name, GF_LOG_INFO, + "Failed to get the lock information " + "on file %s. FD reopen is not allowed.", + local->loc.path); + } else { + afr_local_replies_wipe(local, priv); + afr_do_fix_open(frame, this); + return 0; + } + + afr_fd_ctx_reset_need_open(local->fd, this, local->need_open); + AFR_STACK_DESTROY(frame); + return 0; +} + void -afr_fix_open(fd_t *fd, xlator_t *this) +afr_is_reopen_allowed(xlator_t *this, call_frame_t *frame) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; + dict_t *xdata = NULL; int i = 0; + int call_count = 0; + struct gf_flock flock = { + 0, + }; + + local = frame->local; + priv = this->private; + + flock.l_type = F_WRLCK; + afr_set_lk_owner(frame, this, frame->root); + lk_owner_copy(&flock.l_owner, &frame->root->lk_owner); + + call_count = AFR_COUNT(local->child_up, priv->child_count); + if (!call_count) + goto out; + local->call_count = call_count; + + xdata = dict_new(); + if (xdata == NULL) + goto out; + + if (dict_set_int32(xdata, "fd-reopen-status", -1)) + goto out; + + for (i = 0; i < priv->child_count; i++) { + if (local->child_up[i]) { + STACK_WIND_COOKIE(frame, afr_is_reopen_allowed_cbk, (void *)(long)i, + priv->children[i], priv->children[i]->fops->lk, + local->fd, F_GETLK, &flock, xdata); + } else { + continue; + } + + if (!--call_count) + break; + } + + dict_unref(xdata); + return; + +out: + if (xdata) + dict_unref(xdata); + afr_fd_ctx_reset_need_open(local->fd, this, local->need_open); + AFR_STACK_DESTROY(frame); + return; +} + +void +afr_fix_open(fd_t *fd, xlator_t *this) +{ call_frame_t *frame = NULL; afr_local_t *local = NULL; int ret = -1; int32_t op_errno = 0; afr_fd_ctx_t *fd_ctx = NULL; - unsigned char *need_open = NULL; int call_count = 0; - priv = this->private; - if (!afr_is_fd_fixable(fd)) goto out; @@ -293,12 +514,6 @@ afr_fix_open(fd_t *fd, xlator_t *this) if (!fd_ctx) goto out; - need_open = alloca0(priv->child_count); - - call_count = afr_fd_ctx_need_open(fd, this, need_open); - if (!call_count) - goto out; - frame = create_frame(this, this->ctx->pool); if (!frame) goto out; @@ -307,47 +522,24 @@ afr_fix_open(fd_t *fd, xlator_t *this) if (!local) goto out; + call_count = afr_fd_ctx_set_need_open(fd, this, local->need_open); + if (!call_count) + goto out; + local->loc.inode = inode_ref(fd->inode); ret = loc_path(&local->loc, NULL); if (ret < 0) goto out; - local->fd = fd_ref(fd); local->fd_ctx = fd_ctx; - local->call_count = call_count; - - gf_msg_debug(this->name, 0, "need open count: %d", call_count); - - for (i = 0; i < priv->child_count; i++) { - if (!need_open[i]) - continue; - - if (IA_IFDIR == fd->inode->ia_type) { - gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i, - priv->children[i], - priv->children[i]->fops->opendir, &local->loc, - local->fd, NULL); - } else { - gf_msg_debug(this->name, 0, - "opening fd for file %s on subvolume %s", - local->loc.path, priv->children[i]->name); - - STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i, - priv->children[i], priv->children[i]->fops->open, - &local->loc, fd_ctx->flags & (~O_TRUNC), - local->fd, NULL); - } - - if (!--call_count) - break; - } - + afr_is_reopen_allowed(this, frame); return; + out: + if (call_count) + afr_fd_ctx_reset_need_open(fd, this, local->need_open); if (frame) AFR_STACK_DESTROY(frame); + return; } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 6a9a763..ffc7317 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -895,6 +895,9 @@ typedef struct _afr_local { afr_ta_fop_state_t fop_state; int ta_failed_subvol; gf_boolean_t is_new_entry; + + /* For fix_open */ + unsigned char *need_open; } afr_local_t; typedef struct afr_spbc_timeout { diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c index 1417a60..92cda12 100644 --- a/xlators/protocol/client/src/client-common.c +++ b/xlators/protocol/client/src/client-common.c @@ -343,7 +343,7 @@ client_pre_readv(xlator_t *this, gfs3_read_req *req, fd_t *fd, size_t size, int op_errno = ESTALE; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_READ, out); req->size = size; req->offset = offset; @@ -368,7 +368,7 @@ client_pre_writev(xlator_t *this, gfs3_write_req *req, fd_t *fd, size_t size, int op_errno = ESTALE; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_WRITE, out); req->size = size; req->offset = offset; @@ -429,7 +429,8 @@ client_pre_flush(xlator_t *this, gfs3_flush_req *req, fd_t *fd, dict_t *xdata) int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FLUSH, out); req->fd = remote_fd; memcpy(req->gfid, fd->inode->gfid, 16); @@ -450,7 +451,7 @@ client_pre_fsync(xlator_t *this, gfs3_fsync_req *req, fd_t *fd, int32_t flags, int op_errno = 0; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_FSYNC, out); req->fd = remote_fd; req->data = flags; @@ -591,7 +592,8 @@ client_pre_fsyncdir(xlator_t *this, gfs3_fsyncdir_req *req, fd_t *fd, int32_t op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FSYNCDIR, out); req->fd = remote_fd; req->data = flags; @@ -668,7 +670,8 @@ client_pre_ftruncate(xlator_t *this, gfs3_ftruncate_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = EINVAL; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FTRUNCATE, out); req->offset = offset; req->fd = remote_fd; @@ -687,7 +690,8 @@ client_pre_fstat(xlator_t *this, gfs3_fstat_req *req, fd_t *fd, dict_t *xdata) int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FSTAT, out); req->fd = remote_fd; memcpy(req->gfid, fd->inode->gfid, 16); @@ -710,7 +714,8 @@ client_pre_lk(xlator_t *this, gfs3_lk_req *req, int32_t cmd, int32_t gf_type = 0; int ret = 0; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_LK, out); ret = client_cmd_to_gf_cmd(cmd, &gf_cmd); if (ret) { @@ -787,7 +792,8 @@ client_pre_readdir(xlator_t *this, gfs3_readdir_req *req, fd_t *fd, size_t size, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_READDIR, out); req->size = size; req->offset = offset; @@ -869,7 +875,7 @@ client_pre_finodelk(xlator_t *this, gfs3_finodelk_req *req, fd_t *fd, int cmd, int32_t gf_cmd = 0; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_FINODELK, out); if (cmd == F_GETLK || cmd == F_GETLK64) gf_cmd = GF_LK_GETLK; @@ -952,7 +958,8 @@ client_pre_fentrylk(xlator_t *this, gfs3_fentrylk_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FENTRYLK, out); req->fd = remote_fd; req->cmd = cmd_entrylk; @@ -1013,7 +1020,7 @@ client_pre_fxattrop(xlator_t *this, gfs3_fxattrop_req *req, fd_t *fd, int64_t remote_fd = -1; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_FXATTROP, out); req->fd = remote_fd; req->flags = flags; @@ -1039,7 +1046,8 @@ client_pre_fgetxattr(xlator_t *this, gfs3_fgetxattr_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FGETXATTR, out); req->namelen = 1; /* Use it as a flag */ req->fd = remote_fd; @@ -1065,7 +1073,8 @@ client_pre_fsetxattr(xlator_t *this, gfs3_fsetxattr_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FSETXATTR, out); req->fd = remote_fd; req->flags = flags; @@ -1091,7 +1100,8 @@ client_pre_rchecksum(xlator_t *this, gfs3_rchecksum_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_RCHECKSUM, out); req->len = len; req->offset = offset; @@ -1141,7 +1151,8 @@ client_pre_fsetattr(xlator_t *this, gfs3_fsetattr_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FSETATTR, out); req->fd = remote_fd; req->valid = valid; @@ -1161,7 +1172,8 @@ client_pre_readdirp(xlator_t *this, gfs3_readdirp_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_READDIRP, out); req->size = size; req->offset = offset; @@ -1187,7 +1199,8 @@ client_pre_fremovexattr(xlator_t *this, gfs3_fremovexattr_req *req, fd_t *fd, if (!(fd && fd->inode)) goto out; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FREMOVEXATTR, out); memcpy(req->gfid, fd->inode->gfid, 16); req->name = (char *)name; @@ -1208,7 +1221,8 @@ client_pre_fallocate(xlator_t *this, gfs3_fallocate_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FALLOCATE, out); req->fd = remote_fd; req->flags = flags; @@ -1230,7 +1244,8 @@ client_pre_discard(xlator_t *this, gfs3_discard_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_DISCARD, out); req->fd = remote_fd; req->offset = offset; @@ -1251,7 +1266,8 @@ client_pre_zerofill(xlator_t *this, gfs3_zerofill_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_ZEROFILL, out); req->fd = remote_fd; req->offset = offset; @@ -1286,7 +1302,8 @@ client_pre_seek(xlator_t *this, gfs3_seek_req *req, fd_t *fd, off_t offset, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_SEEK, out); memcpy(req->gfid, fd->inode->gfid, 16); req->fd = remote_fd; @@ -2508,7 +2525,7 @@ client_pre_readv_v2(xlator_t *this, gfx_read_req *req, fd_t *fd, size_t size, int op_errno = ESTALE; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_READ, out); req->size = size; req->offset = offset; @@ -2532,7 +2549,7 @@ client_pre_writev_v2(xlator_t *this, gfx_write_req *req, fd_t *fd, size_t size, int op_errno = ESTALE; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_WRITE, out); req->size = size; req->offset = offset; @@ -2567,10 +2584,10 @@ client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req, int op_errno = ESTALE; CLIENT_GET_REMOTE_FD(this, fd_in, FALLBACK_TO_ANON_FD, remote_fd_in, - op_errno, out); + op_errno, GFS3_OP_COPY_FILE_RANGE, out); CLIENT_GET_REMOTE_FD(this, fd_out, FALLBACK_TO_ANON_FD, remote_fd_out, - op_errno, out); + op_errno, GFS3_OP_COPY_FILE_RANGE, out); req->size = size; req->off_in = off_in; req->off_out = off_out; @@ -2623,7 +2640,8 @@ client_pre_flush_v2(xlator_t *this, gfx_flush_req *req, fd_t *fd, dict_t *xdata) int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FLUSH, out); req->fd = remote_fd; memcpy(req->gfid, fd->inode->gfid, 16); @@ -2643,7 +2661,7 @@ client_pre_fsync_v2(xlator_t *this, gfx_fsync_req *req, fd_t *fd, int32_t flags, int op_errno = 0; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_FSYNC, out); req->fd = remote_fd; req->data = flags; @@ -2778,7 +2796,8 @@ client_pre_fsyncdir_v2(xlator_t *this, gfx_fsyncdir_req *req, fd_t *fd, int32_t op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FSYNCDIR, out); req->fd = remote_fd; req->data = flags; @@ -2852,7 +2871,8 @@ client_pre_ftruncate_v2(xlator_t *this, gfx_ftruncate_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = EINVAL; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FTRUNCATE, out); req->offset = offset; req->fd = remote_fd; @@ -2870,7 +2890,8 @@ client_pre_fstat_v2(xlator_t *this, gfx_fstat_req *req, fd_t *fd, dict_t *xdata) int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FSTAT, out); req->fd = remote_fd; memcpy(req->gfid, fd->inode->gfid, 16); @@ -2892,7 +2913,8 @@ client_pre_lk_v2(xlator_t *this, gfx_lk_req *req, int32_t cmd, int32_t gf_type = 0; int ret = 0; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_LK, out); ret = client_cmd_to_gf_cmd(cmd, &gf_cmd); if (ret) { @@ -2967,7 +2989,8 @@ client_pre_readdir_v2(xlator_t *this, gfx_readdir_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_READDIR, out); req->size = size; req->offset = offset; @@ -3048,7 +3071,7 @@ client_pre_finodelk_v2(xlator_t *this, gfx_finodelk_req *req, fd_t *fd, int cmd, int32_t gf_cmd = 0; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_FINODELK, out); if (cmd == F_GETLK || cmd == F_GETLK64) gf_cmd = GF_LK_GETLK; @@ -3129,7 +3152,8 @@ client_pre_fentrylk_v2(xlator_t *this, gfx_fentrylk_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FENTRYLK, out); req->fd = remote_fd; req->cmd = cmd_entrylk; @@ -3185,7 +3209,7 @@ client_pre_fxattrop_v2(xlator_t *this, gfx_fxattrop_req *req, fd_t *fd, int64_t remote_fd = -1; CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno, - out); + GFS3_OP_FXATTROP, out); req->fd = remote_fd; req->flags = flags; @@ -3207,7 +3231,8 @@ client_pre_fgetxattr_v2(xlator_t *this, gfx_fgetxattr_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FGETXATTR, out); req->namelen = 1; /* Use it as a flag */ req->fd = remote_fd; @@ -3232,7 +3257,8 @@ client_pre_fsetxattr_v2(xlator_t *this, gfx_fsetxattr_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FSETXATTR, out); req->fd = remote_fd; req->flags = flags; @@ -3256,7 +3282,8 @@ client_pre_rchecksum_v2(xlator_t *this, gfx_rchecksum_req *req, fd_t *fd, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_RCHECKSUM, out); req->len = len; req->offset = offset; @@ -3304,7 +3331,8 @@ client_pre_fsetattr_v2(xlator_t *this, gfx_fsetattr_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FSETATTR, out); memcpy(req->gfid, fd->inode->gfid, 16); req->fd = remote_fd; @@ -3324,7 +3352,8 @@ client_pre_readdirp_v2(xlator_t *this, gfx_readdirp_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_READDIRP, out); req->size = size; req->offset = offset; @@ -3349,7 +3378,8 @@ client_pre_fremovexattr_v2(xlator_t *this, gfx_fremovexattr_req *req, fd_t *fd, if (!(fd && fd->inode)) goto out; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FREMOVEXATTR, out); memcpy(req->gfid, fd->inode->gfid, 16); req->name = (char *)name; @@ -3369,7 +3399,8 @@ client_pre_fallocate_v2(xlator_t *this, gfx_fallocate_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_FALLOCATE, out); req->fd = remote_fd; req->flags = flags; @@ -3390,7 +3421,8 @@ client_pre_discard_v2(xlator_t *this, gfx_discard_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_DISCARD, out); req->fd = remote_fd; req->offset = offset; @@ -3410,7 +3442,8 @@ client_pre_zerofill_v2(xlator_t *this, gfx_zerofill_req *req, fd_t *fd, int op_errno = ESTALE; int64_t remote_fd = -1; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_ZEROFILL, out); req->fd = remote_fd; req->offset = offset; @@ -3439,7 +3472,8 @@ client_pre_seek_v2(xlator_t *this, gfx_seek_req *req, fd_t *fd, off_t offset, int64_t remote_fd = -1; int op_errno = ESTALE; - CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out); + CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, + GFS3_OP_SEEK, out); memcpy(req->gfid, fd->inode->gfid, 16); req->fd = remote_fd; @@ -3587,3 +3621,25 @@ client_post_rename_v2(xlator_t *this, gfx_rename_rsp *rsp, struct iatt *stbuf, return xdr_to_dict(&rsp->xdata, xdata); } + +void +set_fd_reopen_status(xlator_t *this, dict_t *xdata, + enum gf_fd_reopen_status fd_reopen_status) +{ + clnt_conf_t *conf = NULL; + + conf = this->private; + if (!conf) { + gf_msg_debug(this->name, ENOMEM, "Failed to get client conf"); + return; + } + + if (!conf->strict_locks) + fd_reopen_status = FD_REOPEN_ALLOWED; + + if (dict_set_int32(xdata, "fd-reopen-status", fd_reopen_status)) + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, PC_MSG_DICT_SET_FAILED, + NULL); + + return; +} diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h index a2043d8..16fb167 100644 --- a/xlators/protocol/client/src/client-common.h +++ b/xlators/protocol/client/src/client-common.h @@ -627,4 +627,8 @@ client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req, off64_t off_out, size_t size, int32_t flags, dict_t **xdata); +void +set_fd_reopen_status(xlator_t *this, dict_t *xdata, + enum gf_fd_reopen_status fd_reopen_allowed); + #endif /* __CLIENT_COMMON_H__ */ diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index 6543100..48b6448 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -406,11 +406,12 @@ clnt_readdir_rsp_cleanup_v2(gfx_readdir_rsp *rsp) } int -client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd) +client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd, + enum gf_fop_procnum fop) { clnt_fd_ctx_t *fdctx = NULL; clnt_conf_t *conf = NULL; - gf_boolean_t locks_held = _gf_false; + gf_boolean_t locks_involved = _gf_false; GF_VALIDATE_OR_GOTO(this->name, fd, out); GF_VALIDATE_OR_GOTO(this->name, remote_fd, out); @@ -423,23 +424,32 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd) if (fd->anonymous) { *remote_fd = GF_ANON_FD_NO; } else { + if (conf->strict_locks && + (fop == GFS3_OP_WRITE || fop == GFS3_OP_FTRUNCATE || + fop == GFS3_OP_FALLOCATE || fop == GFS3_OP_ZEROFILL || + fop == GFS3_OP_DISCARD)) { + locks_involved = _gf_true; + } *remote_fd = -1; gf_msg_debug(this->name, EBADF, "not a valid fd for gfid: %s", uuid_utoa(fd->inode->gfid)); } } else { - if (__is_fd_reopen_in_progress(fdctx)) + if (__is_fd_reopen_in_progress(fdctx)) { *remote_fd = -1; - else + } else { *remote_fd = fdctx->remote_fd; + } - locks_held = !list_empty(&fdctx->lock_list); + locks_involved = !list_empty(&fdctx->lock_list); } } pthread_spin_unlock(&conf->fd_lock); - if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held)) + if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && + (!locks_involved)) { *remote_fd = GF_ANON_FD_NO; + } return 0; out: diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index 3110c78..46ac544 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -2439,6 +2439,13 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count, } } + if (local->check_reopen) { + if (lock.l_type == F_WRLCK) + set_fd_reopen_status(this, xdata, FD_REOPEN_NOT_ALLOWED); + else + set_fd_reopen_status(this, xdata, FD_REOPEN_ALLOWED); + } + out: if ((rsp.op_ret == -1) && (EAGAIN != gf_error_to_errno(rsp.op_errno))) { gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno), @@ -5198,6 +5205,7 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data) 0, }, }; + dict_t *xdata = NULL; int32_t gf_cmd = 0; clnt_local_t *local = NULL; clnt_conf_t *conf = NULL; @@ -5224,6 +5232,10 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data) goto unwind; } + ret = dict_get_int32(args->xdata, "fd-reopen-status", &local->check_reopen); + if (ret) + local->check_reopen = 0; + local->owner = frame->root->lk_owner; local->cmd = args->cmd; local->fd = fd_ref(args->fd); @@ -5237,6 +5249,13 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data) client_is_setlk(local->cmd)) { client_add_lock_for_recovery(local->fd, args->flock, &local->owner, local->cmd); + } else if (local->check_reopen) { + xdata = dict_new(); + if (xdata == NULL) { + op_errno = ENOMEM; + goto unwind; + } + set_fd_reopen_status(this, xdata, FD_BAD); } goto unwind; @@ -5254,8 +5273,10 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data) return 0; unwind: - CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL); + CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, xdata); GF_FREE(req.xdata.xdata_val); + if (xdata) + dict_unref(xdata); return 0; } diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c index 954fc58..d0055e9 100644 --- a/xlators/protocol/client/src/client-rpc-fops_v2.c +++ b/xlators/protocol/client/src/client-rpc-fops_v2.c @@ -2234,6 +2234,13 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count, } } + if (local->check_reopen) { + if (lock.l_type == F_WRLCK) + set_fd_reopen_status(this, xdata, FD_REOPEN_NOT_ALLOWED); + else + set_fd_reopen_status(this, xdata, FD_REOPEN_ALLOWED); + } + out: if ((rsp.op_ret == -1) && (EAGAIN != gf_error_to_errno(rsp.op_errno))) { gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno), @@ -4759,6 +4766,7 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data) 0, }, }; + dict_t *xdata = NULL; int32_t gf_cmd = 0; clnt_local_t *local = NULL; clnt_conf_t *conf = NULL; @@ -4785,6 +4793,10 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data) goto unwind; } + ret = dict_get_int32(args->xdata, "fd-reopen-status", &local->check_reopen); + if (ret) + local->check_reopen = 0; + local->owner = frame->root->lk_owner; local->cmd = args->cmd; local->fd = fd_ref(args->fd); @@ -4798,6 +4810,13 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data) client_is_setlk(local->cmd)) { client_add_lock_for_recovery(local->fd, args->flock, &local->owner, local->cmd); + } else if (local->check_reopen) { + xdata = dict_new(); + if (xdata == NULL) { + op_errno = ENOMEM; + goto unwind; + } + set_fd_reopen_status(this, xdata, FD_BAD); } goto unwind; @@ -4815,8 +4834,10 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data) return 0; unwind: - CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL); + CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, xdata); GF_FREE(req.xdata.pairs.pairs_val); + if (xdata) + dict_unref(xdata); return 0; } @@ -6094,7 +6115,7 @@ client4_0_rchecksum(call_frame_t *frame, xlator_t *this, void *data) conf = this->private; CLIENT_GET_REMOTE_FD(this, args->fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, - unwind); + GFS3_OP_RCHECKSUM, unwind); req.len = args->len; req.offset = args->offset; diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 63c90ea..35a5340 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -864,9 +864,11 @@ int32_t client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { - int ret = -1; + int ret = 0; + int op_errno = ENOTCONN; clnt_conf_t *conf = NULL; rpc_clnt_procedure_t *proc = NULL; + clnt_fd_ctx_t *fdctx = NULL; clnt_args_t args = { 0, }; @@ -875,6 +877,21 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, if (!conf || !conf->fops) goto out; + if (conf->strict_locks) { + pthread_spin_lock(&conf->fd_lock); + { + fdctx = this_fd_get_ctx(fd, this); + if (fdctx && !list_empty(&fdctx->lock_list)) { + ret = -1; + op_errno = EBADFD; + } + } + pthread_spin_unlock(&conf->fd_lock); + + if (ret) + goto out; + } + args.loc = loc; args.fd = fd; args.xdata = xdata; @@ -888,7 +905,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, out: if (ret) - STACK_UNWIND_STRICT(open, frame, -1, ENOTCONN, NULL, NULL); + STACK_UNWIND_STRICT(open, frame, -1, op_errno, NULL, NULL); return 0; } diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index bde3d1a..2a50625 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -98,10 +98,10 @@ typedef enum { free(_this_rsp->xdata.xdata_val); \ } while (0) -#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, label) \ +#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, fop, label) \ do { \ int _ret = 0; \ - _ret = client_get_remote_fd(xl, fd, flags, &remote_fd); \ + _ret = client_get_remote_fd(xl, fd, flags, &remote_fd, fop); \ if (_ret < 0) { \ op_errno = errno; \ goto label; \ @@ -286,6 +286,7 @@ typedef struct client_local { client_posix_lock_t *client_lock; gf_lkowner_t owner; int32_t cmd; + int32_t check_reopen; struct list_head lock_list; pthread_mutex_t mutex; char *name; @@ -435,7 +436,8 @@ client_default_reopen_done(clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this); void client_attempt_reopen(fd_t *fd, xlator_t *this); int -client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd); +client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd, + enum gf_fop_procnum fop); int client_fd_fop_prepare_local(call_frame_t *frame, fd_t *fd, int64_t remote_fd); gf_boolean_t -- 1.8.3.1