From 57c794e31c0333f508ada740227c9afa1889f8ae Mon Sep 17 00:00:00 2001
From: karthik-us <ksubrahm@redhat.com>
Date: Thu, 15 Apr 2021 11:27:57 +0530
Subject: [PATCH 581/584] afr: don't reopen fds on which POSIX locks are held
When client.strict-locks is enabled on a volume and there are POSIX
locks held on the files, after disconnect and reconnection of the
clients do not re-open such fds which might lead to multiple clients
acquiring the locks and cause data corruption.
> Upstream patch: https://github.com/gluster/glusterfs/pull/1980/commits/56bde56c2741c5eac59937a6cf951a14f2878460
> Change-Id: I8777ffbc2cc8d15ab57b58b72b56eb67521787c5
> Fixes: #1977
> Signed-off-by: karthik-us <ksubrahm@redhat.com>
BUG: 1689375
Change-Id: I8777ffbc2cc8d15ab57b58b72b56eb67521787c5
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/245414
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
---
rpc/rpc-lib/src/protocol-common.h | 6 +
tests/bugs/replicate/do-not-reopen-fd.t | 206 +++++++++++++++++
xlators/cluster/afr/src/afr-common.c | 15 +-
xlators/cluster/afr/src/afr-open.c | 280 +++++++++++++++++++----
xlators/cluster/afr/src/afr.h | 3 +
xlators/protocol/client/src/client-common.c | 148 ++++++++----
xlators/protocol/client/src/client-common.h | 4 +
xlators/protocol/client/src/client-helpers.c | 22 +-
xlators/protocol/client/src/client-rpc-fops.c | 23 +-
xlators/protocol/client/src/client-rpc-fops_v2.c | 25 +-
xlators/protocol/client/src/client.c | 21 +-
xlators/protocol/client/src/client.h | 8 +-
12 files changed, 654 insertions(+), 107 deletions(-)
create mode 100644 tests/bugs/replicate/do-not-reopen-fd.t
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 779878f..f56aaaa 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -312,6 +312,12 @@ enum glusterd_mgmt_v3_procnum {
GLUSTERD_MGMT_V3_MAXVALUE,
};
+enum gf_fd_reopen_status {
+ FD_REOPEN_ALLOWED = 0,
+ FD_REOPEN_NOT_ALLOWED,
+ FD_BAD,
+};
+
typedef struct gf_gsync_detailed_status_ gf_gsync_status_t;
enum gf_get_volume_info_type {
diff --git a/tests/bugs/replicate/do-not-reopen-fd.t b/tests/bugs/replicate/do-not-reopen-fd.t
new file mode 100644
index 0000000..76d8e70
--- /dev/null
+++ b/tests/bugs/replicate/do-not-reopen-fd.t
@@ -0,0 +1,206 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 client.strict-locks on
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST touch $M0/a
+
+# Kill one brick and take lock on the fd and do a write.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'rw' $M0/a
+
+TEST flock -x $fd1
+TEST fd_write $fd1 "data-1"
+
+# Restart the brick and then write. Now fd should not get re-opened but write
+# should still succeed as there were no quorum disconnects.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST fd_write $fd1 "data-2"
+EXPECT "" cat $B0/${V0}0/a
+EXPECT "data-2" cat $B0/${V0}1/a
+EXPECT "data-2" cat $B0/${V0}2/a
+
+# Check there is no fd opened on the 1st brick by checking for the gfid inside
+# /proc/pid-of-brick/fd/ directory
+gfid_a=$(gf_get_gfid_xattr $B0/${V0}0/a)
+gfid_str_a=$(gf_gfid_xattr_to_str $gfid_a)
+
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+TEST fd2=`fd_available`
+TEST fd_open $fd2 'rw' $M1/a
+
+# Kill 2nd brick and try writing to the file. The write should fail due to
+# quorum failure.
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ! fd_write $fd1 "data-3"
+TEST ! fd_cat $fd1
+
+# Restart the bricks and try writing to the file. This should fail as two bricks
+# which were down previously, will return EBADFD now.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ! fd_write $fd1 "data-4"
+TEST ! fd_cat $fd1
+
+# Enable heal and check the files will have same content on all the bricks after
+# the heal is completed.
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT "data-4" cat $B0/${V0}0/a
+EXPECT "data-4" cat $B0/${V0}1/a
+EXPECT "data-4" cat $B0/${V0}2/a
+TEST $CLI volume heal $V0 disable
+
+# Try writing to the file again on the same fd, which should fail again, since
+# it is not yet re-opened.
+TEST ! fd_write $fd1 "data-5"
+
+# At this point only one brick will have the lock. Try taking the lock again on
+# the bad fd, which should also fail with EBADFD.
+TEST ! flock -x $fd1
+
+# Kill the only brick that is having lock and try taking lock on another client
+# which should succeed.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 2
+TEST flock -x $fd2
+TEST fd_write $fd2 "data-6"
+
+# Bring the brick up and try writing & reading on the old fd, which should still
+# fail and operations on the 2nd fd should succeed.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+TEST ! fd_write $fd1 "data-7"
+
+TEST ! fd_cat $fd1
+TEST fd_cat $fd2
+
+# Close both the fds which will release the locks and then re-open and take lock
+# on the old fd. Operations on that fd should succeed afterwards.
+TEST fd_close $fd1
+TEST fd_close $fd2
+
+TEST ! ls /proc/$$/fd/$fd1
+TEST ! ls /proc/$$/fd/$fd2
+EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'rw' $M0/a
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+TEST flock -x $fd1
+TEST fd_write $fd1 "data-8"
+TEST fd_cat $fd1
+
+EXPECT "data-8" head -n 1 $B0/${V0}0/a
+EXPECT "data-8" head -n 1 $B0/${V0}1/a
+EXPECT "data-8" head -n 1 $B0/${V0}2/a
+
+TEST fd_close $fd1
+
+# Heal the volume
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST $CLI volume heal $V0 disable
+
+# Kill one brick and open a fd.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'rw' $M0/a
+
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+# Restart the brick and then write. Now fd should get re-opened and write should
+# succeed on the previously down brick as well since there are no locks held on
+# any of the bricks.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST fd_write $fd1 "data-10"
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+
+EXPECT "data-10" head -n 1 $B0/${V0}0/a
+EXPECT "data-10" head -n 1 $B0/${V0}1/a
+EXPECT "data-10" head -n 1 $B0/${V0}2/a
+TEST fd_close $fd1
+
+# Kill one brick, open and take lock on a fd.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'rw' $M0/a
+TEST flock -x $fd1
+
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+# Kill & restart another brick so that it will return EBADFD
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
+
+# Restart the bricks and then write. Now fd should not get re-opened since lock
+# is still held on one brick and write should also fail as there is no quorum.
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ! fd_write $fd1 "data-11"
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $gfid_str_a
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}1 $gfid_str_a
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}2 $gfid_str_a
+
+EXPECT "data-10" head -n 1 $B0/${V0}0/a
+EXPECT "data-10" head -n 1 $B0/${V0}1/a
+EXPECT "data-11" head -n 1 $B0/${V0}2/a
+
+TEST fd_close $fd1
+cleanup
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 416012c..bd46e59 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2067,6 +2067,8 @@ afr_local_cleanup(afr_local_t *local, xlator_t *this)
dict_unref(local->cont.entrylk.xdata);
}
+ GF_FREE(local->need_open);
+
if (local->xdata_req)
dict_unref(local->xdata_req);
@@ -5689,6 +5691,14 @@ afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
}
local->is_new_entry = _gf_false;
+ local->need_open = GF_CALLOC(priv->child_count, sizeof(*local->need_open),
+ gf_afr_mt_char);
+ if (!local->need_open) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
INIT_LIST_HEAD(&local->healer);
return 0;
out:
@@ -6124,9 +6134,8 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
char *substr = NULL;
char *status = NULL;
- ret = afr_lockless_inspect(frame, this, loc->gfid, &inode,
- &entry_selfheal, &data_selfheal,
- &metadata_selfheal, &pending);
+ ret = afr_lockless_inspect(frame, this, loc->gfid, &inode, &entry_selfheal,
+ &data_selfheal, &metadata_selfheal, &pending);
if (ret == -ENOMEM) {
ret = -1;
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index ff72c73..73c1552 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -35,6 +35,8 @@
#include "afr-dir-read.h"
#include "afr-dir-write.h"
#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "protocol-common.h"
gf_boolean_t
afr_is_fd_fixable(fd_t *fd)
@@ -239,8 +241,32 @@ afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+static void
+afr_fd_ctx_reset_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ return;
+
+ LOCK(&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->opened_on[i] == AFR_FD_OPENING && need_open[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ need_open[i] = 0;
+ }
+ }
+ }
+ UNLOCK(&fd->lock);
+}
+
static int
-afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
+afr_fd_ctx_set_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
{
afr_fd_ctx_t *fd_ctx = NULL;
afr_private_t *priv = NULL;
@@ -248,7 +274,6 @@ afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
int count = 0;
priv = this->private;
-
fd_ctx = afr_fd_ctx_get(fd, this);
if (!fd_ctx)
return 0;
@@ -271,21 +296,217 @@ afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
return count;
}
+static int
+afr_do_fix_open(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int need_open_count = 0;
+
+ priv = this->private;
+
+ need_open_count = AFR_COUNT(local->need_open, priv->child_count);
+ if (!need_open_count) {
+ goto out;
+ }
+ gf_msg_debug(this->name, 0, "need open count: %d", need_open_count);
+ local->call_count = need_open_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->need_open[i])
+ continue;
+
+ if (IA_IFDIR == local->fd->inode->ia_type) {
+ gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+ STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->opendir, &local->loc,
+ local->fd, NULL);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "opening fd for file %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE(
+ frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->open, &local->loc,
+ local->fd_ctx->flags & ~(O_CREAT | O_EXCL | O_TRUNC), local->fd,
+ NULL);
+ }
+ if (!--need_open_count)
+ break;
+ }
+ return 0;
+
+out:
+ afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
+ AFR_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int
+afr_is_reopen_allowed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int call_count = 0;
+ int i = (long)cookie;
+ int32_t fd_reopen_status = -1;
+ int32_t final_reopen_status = -1;
+
+ priv = this->private;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_DICT_GET_FAILED,
+ "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
+ }
+
+ if (xdata)
+ local->replies[i].xdata = dict_ref(xdata);
+
+ call_count = afr_frame_return(frame);
+
+ if (call_count)
+ return 0;
+
+ /* Currently we get 3 values from the lower layer (protocol/client) in the
+ * getlk_cbk.
+ * FD_REOPEN_ALLOWED : No conflicting locks are held and reopen is allowed
+ * FD_REOPEN_NOT_ALLOWED : Conflicting locks are held and reopen is not
+ * allowed
+ * FD_BAD : FD is not valid
+ *
+ * - If we get FD_REOPEN_NOT_ALLOWED from any of the bricks, will block the
+ * reopen taking this as high priority.
+ * - If we get FD_BAD from all the replies, we will not reopen since we do
+ * not know the correct status.
+ * - If we get FD_BAD from few brick and FD_REOPEN_NOT_ALLOWED from one or
+ * more bricks, then we will block reopen.
+ * - If we get FD_BAD from few bricks and FD_REOPEN_ALLOWED from one or
+ * more bricks, then we will allow the reopen.
+ *
+ * We will update the final_reopen_status only when the value returned
+ * from lower layer is >= FD_REOPEN_ALLOWED and < FD_BAD. We will not set
+ * FD_BAD in final_reopen_status, since it can lead to unexpected
+ * behaviours.
+ *
+ * At the end of this loop, if we still have final_reopen_status as -1
+ * i.e., the init value, it means we failed to get the fd status from any
+ * of the bricks or we do not have a valid fd on any of the bricks. We
+ * will not reopen the fd in this case as well.
+ */
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (final_reopen_status != FD_REOPEN_NOT_ALLOWED &&
+ local->replies[i].xdata) {
+ ret = dict_get_int32(xdata, "fd-reopen-status", &fd_reopen_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "Failed to get whether reopen is allowed or not on fd "
+ "for file %s on subvolume %s.",
+ local->loc.path, priv->children[i]->name);
+ } else if (fd_reopen_status >= FD_REOPEN_ALLOWED &&
+ fd_reopen_status < FD_BAD) {
+ final_reopen_status = fd_reopen_status;
+ }
+ }
+
+ if (final_reopen_status == FD_REOPEN_NOT_ALLOWED)
+ break;
+ }
+
+ if (final_reopen_status == FD_REOPEN_NOT_ALLOWED) {
+ gf_log(this->name, GF_LOG_INFO,
+ "Conflicting locks held on file %s. FD reopen is not allowed.",
+ local->loc.path);
+ } else if (final_reopen_status == -1) {
+ gf_log(this->name, GF_LOG_INFO,
+ "Failed to get the lock information "
+ "on file %s. FD reopen is not allowed.",
+ local->loc.path);
+ } else {
+ afr_local_replies_wipe(local, priv);
+ afr_do_fix_open(frame, this);
+ return 0;
+ }
+
+ afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
+ AFR_STACK_DESTROY(frame);
+ return 0;
+}
+
void
-afr_fix_open(fd_t *fd, xlator_t *this)
+afr_is_reopen_allowed(xlator_t *this, call_frame_t *frame)
{
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ dict_t *xdata = NULL;
int i = 0;
+ int call_count = 0;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+
+ flock.l_type = F_WRLCK;
+ afr_set_lk_owner(frame, this, frame->root);
+ lk_owner_copy(&flock.l_owner, &frame->root->lk_owner);
+
+ call_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (!call_count)
+ goto out;
+ local->call_count = call_count;
+
+ xdata = dict_new();
+ if (xdata == NULL)
+ goto out;
+
+ if (dict_set_int32(xdata, "fd-reopen-status", -1))
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_is_reopen_allowed_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->lk,
+ local->fd, F_GETLK, &flock, xdata);
+ } else {
+ continue;
+ }
+
+ if (!--call_count)
+ break;
+ }
+
+ dict_unref(xdata);
+ return;
+
+out:
+ if (xdata)
+ dict_unref(xdata);
+ afr_fd_ctx_reset_need_open(local->fd, this, local->need_open);
+ AFR_STACK_DESTROY(frame);
+ return;
+}
+
+void
+afr_fix_open(fd_t *fd, xlator_t *this)
+{
call_frame_t *frame = NULL;
afr_local_t *local = NULL;
int ret = -1;
int32_t op_errno = 0;
afr_fd_ctx_t *fd_ctx = NULL;
- unsigned char *need_open = NULL;
int call_count = 0;
- priv = this->private;
-
if (!afr_is_fd_fixable(fd))
goto out;
@@ -293,12 +514,6 @@ afr_fix_open(fd_t *fd, xlator_t *this)
if (!fd_ctx)
goto out;
- need_open = alloca0(priv->child_count);
-
- call_count = afr_fd_ctx_need_open(fd, this, need_open);
- if (!call_count)
- goto out;
-
frame = create_frame(this, this->ctx->pool);
if (!frame)
goto out;
@@ -307,47 +522,24 @@ afr_fix_open(fd_t *fd, xlator_t *this)
if (!local)
goto out;
+ call_count = afr_fd_ctx_set_need_open(fd, this, local->need_open);
+ if (!call_count)
+ goto out;
+
local->loc.inode = inode_ref(fd->inode);
ret = loc_path(&local->loc, NULL);
if (ret < 0)
goto out;
-
local->fd = fd_ref(fd);
local->fd_ctx = fd_ctx;
- local->call_count = call_count;
-
- gf_msg_debug(this->name, 0, "need open count: %d", call_count);
-
- for (i = 0; i < priv->child_count; i++) {
- if (!need_open[i])
- continue;
-
- if (IA_IFDIR == fd->inode->ia_type) {
- gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
- priv->children[i],
- priv->children[i]->fops->opendir, &local->loc,
- local->fd, NULL);
- } else {
- gf_msg_debug(this->name, 0,
- "opening fd for file %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
- priv->children[i], priv->children[i]->fops->open,
- &local->loc, fd_ctx->flags & (~O_TRUNC),
- local->fd, NULL);
- }
-
- if (!--call_count)
- break;
- }
-
+ afr_is_reopen_allowed(this, frame);
return;
+
out:
+ if (call_count)
+ afr_fd_ctx_reset_need_open(fd, this, local->need_open);
if (frame)
AFR_STACK_DESTROY(frame);
+ return;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 6a9a763..ffc7317 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -895,6 +895,9 @@ typedef struct _afr_local {
afr_ta_fop_state_t fop_state;
int ta_failed_subvol;
gf_boolean_t is_new_entry;
+
+ /* For fix_open */
+ unsigned char *need_open;
} afr_local_t;
typedef struct afr_spbc_timeout {
diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c
index 1417a60..92cda12 100644
--- a/xlators/protocol/client/src/client-common.c
+++ b/xlators/protocol/client/src/client-common.c
@@ -343,7 +343,7 @@ client_pre_readv(xlator_t *this, gfs3_read_req *req, fd_t *fd, size_t size,
int op_errno = ESTALE;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_READ, out);
req->size = size;
req->offset = offset;
@@ -368,7 +368,7 @@ client_pre_writev(xlator_t *this, gfs3_write_req *req, fd_t *fd, size_t size,
int op_errno = ESTALE;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_WRITE, out);
req->size = size;
req->offset = offset;
@@ -429,7 +429,8 @@ client_pre_flush(xlator_t *this, gfs3_flush_req *req, fd_t *fd, dict_t *xdata)
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FLUSH, out);
req->fd = remote_fd;
memcpy(req->gfid, fd->inode->gfid, 16);
@@ -450,7 +451,7 @@ client_pre_fsync(xlator_t *this, gfs3_fsync_req *req, fd_t *fd, int32_t flags,
int op_errno = 0;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_FSYNC, out);
req->fd = remote_fd;
req->data = flags;
@@ -591,7 +592,8 @@ client_pre_fsyncdir(xlator_t *this, gfs3_fsyncdir_req *req, fd_t *fd,
int32_t op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FSYNCDIR, out);
req->fd = remote_fd;
req->data = flags;
@@ -668,7 +670,8 @@ client_pre_ftruncate(xlator_t *this, gfs3_ftruncate_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = EINVAL;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FTRUNCATE, out);
req->offset = offset;
req->fd = remote_fd;
@@ -687,7 +690,8 @@ client_pre_fstat(xlator_t *this, gfs3_fstat_req *req, fd_t *fd, dict_t *xdata)
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FSTAT, out);
req->fd = remote_fd;
memcpy(req->gfid, fd->inode->gfid, 16);
@@ -710,7 +714,8 @@ client_pre_lk(xlator_t *this, gfs3_lk_req *req, int32_t cmd,
int32_t gf_type = 0;
int ret = 0;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_LK, out);
ret = client_cmd_to_gf_cmd(cmd, &gf_cmd);
if (ret) {
@@ -787,7 +792,8 @@ client_pre_readdir(xlator_t *this, gfs3_readdir_req *req, fd_t *fd, size_t size,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_READDIR, out);
req->size = size;
req->offset = offset;
@@ -869,7 +875,7 @@ client_pre_finodelk(xlator_t *this, gfs3_finodelk_req *req, fd_t *fd, int cmd,
int32_t gf_cmd = 0;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_FINODELK, out);
if (cmd == F_GETLK || cmd == F_GETLK64)
gf_cmd = GF_LK_GETLK;
@@ -952,7 +958,8 @@ client_pre_fentrylk(xlator_t *this, gfs3_fentrylk_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FENTRYLK, out);
req->fd = remote_fd;
req->cmd = cmd_entrylk;
@@ -1013,7 +1020,7 @@ client_pre_fxattrop(xlator_t *this, gfs3_fxattrop_req *req, fd_t *fd,
int64_t remote_fd = -1;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_FXATTROP, out);
req->fd = remote_fd;
req->flags = flags;
@@ -1039,7 +1046,8 @@ client_pre_fgetxattr(xlator_t *this, gfs3_fgetxattr_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FGETXATTR, out);
req->namelen = 1; /* Use it as a flag */
req->fd = remote_fd;
@@ -1065,7 +1073,8 @@ client_pre_fsetxattr(xlator_t *this, gfs3_fsetxattr_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FSETXATTR, out);
req->fd = remote_fd;
req->flags = flags;
@@ -1091,7 +1100,8 @@ client_pre_rchecksum(xlator_t *this, gfs3_rchecksum_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_RCHECKSUM, out);
req->len = len;
req->offset = offset;
@@ -1141,7 +1151,8 @@ client_pre_fsetattr(xlator_t *this, gfs3_fsetattr_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FSETATTR, out);
req->fd = remote_fd;
req->valid = valid;
@@ -1161,7 +1172,8 @@ client_pre_readdirp(xlator_t *this, gfs3_readdirp_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_READDIRP, out);
req->size = size;
req->offset = offset;
@@ -1187,7 +1199,8 @@ client_pre_fremovexattr(xlator_t *this, gfs3_fremovexattr_req *req, fd_t *fd,
if (!(fd && fd->inode))
goto out;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FREMOVEXATTR, out);
memcpy(req->gfid, fd->inode->gfid, 16);
req->name = (char *)name;
@@ -1208,7 +1221,8 @@ client_pre_fallocate(xlator_t *this, gfs3_fallocate_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FALLOCATE, out);
req->fd = remote_fd;
req->flags = flags;
@@ -1230,7 +1244,8 @@ client_pre_discard(xlator_t *this, gfs3_discard_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_DISCARD, out);
req->fd = remote_fd;
req->offset = offset;
@@ -1251,7 +1266,8 @@ client_pre_zerofill(xlator_t *this, gfs3_zerofill_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_ZEROFILL, out);
req->fd = remote_fd;
req->offset = offset;
@@ -1286,7 +1302,8 @@ client_pre_seek(xlator_t *this, gfs3_seek_req *req, fd_t *fd, off_t offset,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_SEEK, out);
memcpy(req->gfid, fd->inode->gfid, 16);
req->fd = remote_fd;
@@ -2508,7 +2525,7 @@ client_pre_readv_v2(xlator_t *this, gfx_read_req *req, fd_t *fd, size_t size,
int op_errno = ESTALE;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_READ, out);
req->size = size;
req->offset = offset;
@@ -2532,7 +2549,7 @@ client_pre_writev_v2(xlator_t *this, gfx_write_req *req, fd_t *fd, size_t size,
int op_errno = ESTALE;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_WRITE, out);
req->size = size;
req->offset = offset;
@@ -2567,10 +2584,10 @@ client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req,
int op_errno = ESTALE;
CLIENT_GET_REMOTE_FD(this, fd_in, FALLBACK_TO_ANON_FD, remote_fd_in,
- op_errno, out);
+ op_errno, GFS3_OP_COPY_FILE_RANGE, out);
CLIENT_GET_REMOTE_FD(this, fd_out, FALLBACK_TO_ANON_FD, remote_fd_out,
- op_errno, out);
+ op_errno, GFS3_OP_COPY_FILE_RANGE, out);
req->size = size;
req->off_in = off_in;
req->off_out = off_out;
@@ -2623,7 +2640,8 @@ client_pre_flush_v2(xlator_t *this, gfx_flush_req *req, fd_t *fd, dict_t *xdata)
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FLUSH, out);
req->fd = remote_fd;
memcpy(req->gfid, fd->inode->gfid, 16);
@@ -2643,7 +2661,7 @@ client_pre_fsync_v2(xlator_t *this, gfx_fsync_req *req, fd_t *fd, int32_t flags,
int op_errno = 0;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_FSYNC, out);
req->fd = remote_fd;
req->data = flags;
@@ -2778,7 +2796,8 @@ client_pre_fsyncdir_v2(xlator_t *this, gfx_fsyncdir_req *req, fd_t *fd,
int32_t op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FSYNCDIR, out);
req->fd = remote_fd;
req->data = flags;
@@ -2852,7 +2871,8 @@ client_pre_ftruncate_v2(xlator_t *this, gfx_ftruncate_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = EINVAL;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FTRUNCATE, out);
req->offset = offset;
req->fd = remote_fd;
@@ -2870,7 +2890,8 @@ client_pre_fstat_v2(xlator_t *this, gfx_fstat_req *req, fd_t *fd, dict_t *xdata)
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FSTAT, out);
req->fd = remote_fd;
memcpy(req->gfid, fd->inode->gfid, 16);
@@ -2892,7 +2913,8 @@ client_pre_lk_v2(xlator_t *this, gfx_lk_req *req, int32_t cmd,
int32_t gf_type = 0;
int ret = 0;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_LK, out);
ret = client_cmd_to_gf_cmd(cmd, &gf_cmd);
if (ret) {
@@ -2967,7 +2989,8 @@ client_pre_readdir_v2(xlator_t *this, gfx_readdir_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_READDIR, out);
req->size = size;
req->offset = offset;
@@ -3048,7 +3071,7 @@ client_pre_finodelk_v2(xlator_t *this, gfx_finodelk_req *req, fd_t *fd, int cmd,
int32_t gf_cmd = 0;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_FINODELK, out);
if (cmd == F_GETLK || cmd == F_GETLK64)
gf_cmd = GF_LK_GETLK;
@@ -3129,7 +3152,8 @@ client_pre_fentrylk_v2(xlator_t *this, gfx_fentrylk_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FENTRYLK, out);
req->fd = remote_fd;
req->cmd = cmd_entrylk;
@@ -3185,7 +3209,7 @@ client_pre_fxattrop_v2(xlator_t *this, gfx_fxattrop_req *req, fd_t *fd,
int64_t remote_fd = -1;
CLIENT_GET_REMOTE_FD(this, fd, FALLBACK_TO_ANON_FD, remote_fd, op_errno,
- out);
+ GFS3_OP_FXATTROP, out);
req->fd = remote_fd;
req->flags = flags;
@@ -3207,7 +3231,8 @@ client_pre_fgetxattr_v2(xlator_t *this, gfx_fgetxattr_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FGETXATTR, out);
req->namelen = 1; /* Use it as a flag */
req->fd = remote_fd;
@@ -3232,7 +3257,8 @@ client_pre_fsetxattr_v2(xlator_t *this, gfx_fsetxattr_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FSETXATTR, out);
req->fd = remote_fd;
req->flags = flags;
@@ -3256,7 +3282,8 @@ client_pre_rchecksum_v2(xlator_t *this, gfx_rchecksum_req *req, fd_t *fd,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_RCHECKSUM, out);
req->len = len;
req->offset = offset;
@@ -3304,7 +3331,8 @@ client_pre_fsetattr_v2(xlator_t *this, gfx_fsetattr_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FSETATTR, out);
memcpy(req->gfid, fd->inode->gfid, 16);
req->fd = remote_fd;
@@ -3324,7 +3352,8 @@ client_pre_readdirp_v2(xlator_t *this, gfx_readdirp_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_READDIRP, out);
req->size = size;
req->offset = offset;
@@ -3349,7 +3378,8 @@ client_pre_fremovexattr_v2(xlator_t *this, gfx_fremovexattr_req *req, fd_t *fd,
if (!(fd && fd->inode))
goto out;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FREMOVEXATTR, out);
memcpy(req->gfid, fd->inode->gfid, 16);
req->name = (char *)name;
@@ -3369,7 +3399,8 @@ client_pre_fallocate_v2(xlator_t *this, gfx_fallocate_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_FALLOCATE, out);
req->fd = remote_fd;
req->flags = flags;
@@ -3390,7 +3421,8 @@ client_pre_discard_v2(xlator_t *this, gfx_discard_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_DISCARD, out);
req->fd = remote_fd;
req->offset = offset;
@@ -3410,7 +3442,8 @@ client_pre_zerofill_v2(xlator_t *this, gfx_zerofill_req *req, fd_t *fd,
int op_errno = ESTALE;
int64_t remote_fd = -1;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_ZEROFILL, out);
req->fd = remote_fd;
req->offset = offset;
@@ -3439,7 +3472,8 @@ client_pre_seek_v2(xlator_t *this, gfx_seek_req *req, fd_t *fd, off_t offset,
int64_t remote_fd = -1;
int op_errno = ESTALE;
- CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno, out);
+ CLIENT_GET_REMOTE_FD(this, fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
+ GFS3_OP_SEEK, out);
memcpy(req->gfid, fd->inode->gfid, 16);
req->fd = remote_fd;
@@ -3587,3 +3621,25 @@ client_post_rename_v2(xlator_t *this, gfx_rename_rsp *rsp, struct iatt *stbuf,
return xdr_to_dict(&rsp->xdata, xdata);
}
+
+void
+set_fd_reopen_status(xlator_t *this, dict_t *xdata,
+ enum gf_fd_reopen_status fd_reopen_status)
+{
+ clnt_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ gf_msg_debug(this->name, ENOMEM, "Failed to get client conf");
+ return;
+ }
+
+ if (!conf->strict_locks)
+ fd_reopen_status = FD_REOPEN_ALLOWED;
+
+ if (dict_set_int32(xdata, "fd-reopen-status", fd_reopen_status))
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, PC_MSG_DICT_SET_FAILED,
+ NULL);
+
+ return;
+}
diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h
index a2043d8..16fb167 100644
--- a/xlators/protocol/client/src/client-common.h
+++ b/xlators/protocol/client/src/client-common.h
@@ -627,4 +627,8 @@ client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req,
off64_t off_out, size_t size, int32_t flags,
dict_t **xdata);
+void
+set_fd_reopen_status(xlator_t *this, dict_t *xdata,
+ enum gf_fd_reopen_status fd_reopen_allowed);
+
#endif /* __CLIENT_COMMON_H__ */
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index 6543100..48b6448 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -406,11 +406,12 @@ clnt_readdir_rsp_cleanup_v2(gfx_readdir_rsp *rsp)
}
int
-client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
+client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
+ enum gf_fop_procnum fop)
{
clnt_fd_ctx_t *fdctx = NULL;
clnt_conf_t *conf = NULL;
- gf_boolean_t locks_held = _gf_false;
+ gf_boolean_t locks_involved = _gf_false;
GF_VALIDATE_OR_GOTO(this->name, fd, out);
GF_VALIDATE_OR_GOTO(this->name, remote_fd, out);
@@ -423,23 +424,32 @@ client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd)
if (fd->anonymous) {
*remote_fd = GF_ANON_FD_NO;
} else {
+ if (conf->strict_locks &&
+ (fop == GFS3_OP_WRITE || fop == GFS3_OP_FTRUNCATE ||
+ fop == GFS3_OP_FALLOCATE || fop == GFS3_OP_ZEROFILL ||
+ fop == GFS3_OP_DISCARD)) {
+ locks_involved = _gf_true;
+ }
*remote_fd = -1;
gf_msg_debug(this->name, EBADF, "not a valid fd for gfid: %s",
uuid_utoa(fd->inode->gfid));
}
} else {
- if (__is_fd_reopen_in_progress(fdctx))
+ if (__is_fd_reopen_in_progress(fdctx)) {
*remote_fd = -1;
- else
+ } else {
*remote_fd = fdctx->remote_fd;
+ }
- locks_held = !list_empty(&fdctx->lock_list);
+ locks_involved = !list_empty(&fdctx->lock_list);
}
}
pthread_spin_unlock(&conf->fd_lock);
- if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) && (!locks_held))
+ if ((flags & FALLBACK_TO_ANON_FD) && (*remote_fd == -1) &&
+ (!locks_involved)) {
*remote_fd = GF_ANON_FD_NO;
+ }
return 0;
out:
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
index 3110c78..46ac544 100644
--- a/xlators/protocol/client/src/client-rpc-fops.c
+++ b/xlators/protocol/client/src/client-rpc-fops.c
@@ -2439,6 +2439,13 @@ client3_3_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
}
+ if (local->check_reopen) {
+ if (lock.l_type == F_WRLCK)
+ set_fd_reopen_status(this, xdata, FD_REOPEN_NOT_ALLOWED);
+ else
+ set_fd_reopen_status(this, xdata, FD_REOPEN_ALLOWED);
+ }
+
out:
if ((rsp.op_ret == -1) && (EAGAIN != gf_error_to_errno(rsp.op_errno))) {
gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno),
@@ -5198,6 +5205,7 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
0,
},
};
+ dict_t *xdata = NULL;
int32_t gf_cmd = 0;
clnt_local_t *local = NULL;
clnt_conf_t *conf = NULL;
@@ -5224,6 +5232,10 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
goto unwind;
}
+ ret = dict_get_int32(args->xdata, "fd-reopen-status", &local->check_reopen);
+ if (ret)
+ local->check_reopen = 0;
+
local->owner = frame->root->lk_owner;
local->cmd = args->cmd;
local->fd = fd_ref(args->fd);
@@ -5237,6 +5249,13 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
client_is_setlk(local->cmd)) {
client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
local->cmd);
+ } else if (local->check_reopen) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ set_fd_reopen_status(this, xdata, FD_BAD);
}
goto unwind;
@@ -5254,8 +5273,10 @@ client3_3_lk(call_frame_t *frame, xlator_t *this, void *data)
return 0;
unwind:
- CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, xdata);
GF_FREE(req.xdata.xdata_val);
+ if (xdata)
+ dict_unref(xdata);
return 0;
}
diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
index 954fc58..d0055e9 100644
--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
+++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
@@ -2234,6 +2234,13 @@ client4_0_lk_cbk(struct rpc_req *req, struct iovec *iov, int count,
}
}
+ if (local->check_reopen) {
+ if (lock.l_type == F_WRLCK)
+ set_fd_reopen_status(this, xdata, FD_REOPEN_NOT_ALLOWED);
+ else
+ set_fd_reopen_status(this, xdata, FD_REOPEN_ALLOWED);
+ }
+
out:
if ((rsp.op_ret == -1) && (EAGAIN != gf_error_to_errno(rsp.op_errno))) {
gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno),
@@ -4759,6 +4766,7 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
0,
},
};
+ dict_t *xdata = NULL;
int32_t gf_cmd = 0;
clnt_local_t *local = NULL;
clnt_conf_t *conf = NULL;
@@ -4785,6 +4793,10 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
goto unwind;
}
+ ret = dict_get_int32(args->xdata, "fd-reopen-status", &local->check_reopen);
+ if (ret)
+ local->check_reopen = 0;
+
local->owner = frame->root->lk_owner;
local->cmd = args->cmd;
local->fd = fd_ref(args->fd);
@@ -4798,6 +4810,13 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
client_is_setlk(local->cmd)) {
client_add_lock_for_recovery(local->fd, args->flock, &local->owner,
local->cmd);
+ } else if (local->check_reopen) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ set_fd_reopen_status(this, xdata, FD_BAD);
}
goto unwind;
@@ -4815,8 +4834,10 @@ client4_0_lk(call_frame_t *frame, xlator_t *this, void *data)
return 0;
unwind:
- CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ CLIENT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, xdata);
GF_FREE(req.xdata.pairs.pairs_val);
+ if (xdata)
+ dict_unref(xdata);
return 0;
}
@@ -6094,7 +6115,7 @@ client4_0_rchecksum(call_frame_t *frame, xlator_t *this, void *data)
conf = this->private;
CLIENT_GET_REMOTE_FD(this, args->fd, DEFAULT_REMOTE_FD, remote_fd, op_errno,
- unwind);
+ GFS3_OP_RCHECKSUM, unwind);
req.len = args->len;
req.offset = args->offset;
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 63c90ea..35a5340 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -864,9 +864,11 @@ int32_t
client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
fd_t *fd, dict_t *xdata)
{
- int ret = -1;
+ int ret = 0;
+ int op_errno = ENOTCONN;
clnt_conf_t *conf = NULL;
rpc_clnt_procedure_t *proc = NULL;
+ clnt_fd_ctx_t *fdctx = NULL;
clnt_args_t args = {
0,
};
@@ -875,6 +877,21 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
if (!conf || !conf->fops)
goto out;
+ if (conf->strict_locks) {
+ pthread_spin_lock(&conf->fd_lock);
+ {
+ fdctx = this_fd_get_ctx(fd, this);
+ if (fdctx && !list_empty(&fdctx->lock_list)) {
+ ret = -1;
+ op_errno = EBADFD;
+ }
+ }
+ pthread_spin_unlock(&conf->fd_lock);
+
+ if (ret)
+ goto out;
+ }
+
args.loc = loc;
args.fd = fd;
args.xdata = xdata;
@@ -888,7 +905,7 @@ client_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
out:
if (ret)
- STACK_UNWIND_STRICT(open, frame, -1, ENOTCONN, NULL, NULL);
+ STACK_UNWIND_STRICT(open, frame, -1, op_errno, NULL, NULL);
return 0;
}
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index bde3d1a..2a50625 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -98,10 +98,10 @@ typedef enum {
free(_this_rsp->xdata.xdata_val); \
} while (0)
-#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, label) \
+#define CLIENT_GET_REMOTE_FD(xl, fd, flags, remote_fd, op_errno, fop, label) \
do { \
int _ret = 0; \
- _ret = client_get_remote_fd(xl, fd, flags, &remote_fd); \
+ _ret = client_get_remote_fd(xl, fd, flags, &remote_fd, fop); \
if (_ret < 0) { \
op_errno = errno; \
goto label; \
@@ -286,6 +286,7 @@ typedef struct client_local {
client_posix_lock_t *client_lock;
gf_lkowner_t owner;
int32_t cmd;
+ int32_t check_reopen;
struct list_head lock_list;
pthread_mutex_t mutex;
char *name;
@@ -435,7 +436,8 @@ client_default_reopen_done(clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this);
void
client_attempt_reopen(fd_t *fd, xlator_t *this);
int
-client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd);
+client_get_remote_fd(xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd,
+ enum gf_fop_procnum fop);
int
client_fd_fop_prepare_local(call_frame_t *frame, fd_t *fd, int64_t remote_fd);
gf_boolean_t
--
1.8.3.1