From 5ad413d493042d938a021ef944e3c88184b96e68 Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Fri, 4 Aug 2017 14:46:38 +0530
Subject: [PATCH 594/601] cluster/dht: Check for open fd only on EBADF
DHT fd based fops used to check if the fd was open
on the cached subvol before winding the call. However,
this introduced a performance regression of about
30% for reads.
This check was introduced to handle cases where files
were migrated while IOs were happening. As this is not
the common case, dht will now check if the fd is
open on the cached subvol only if the call fails
with EBADF.
This will prevent a performance hit where a rebalance
is not running.
> BUG: 1476665
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-on: https://review.gluster.org/17976
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
> Reviewed-by: Susant Palai <spalai@redhat.com>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
> (cherry picked from commit cdca1cb26a0aba390c6d8485c0d6d95e22ffc8bd)
> BUG: 1479303
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
Change-Id: I2035a858d63c3fcd22bb634055bbb0ad01686808
BUG: 1475136
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/114677
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/cluster/dht/src/dht-common.h | 3 +
xlators/cluster/dht/src/dht-helper.c | 1 +
xlators/cluster/dht/src/dht-inode-read.c | 106 +++++++--------
xlators/cluster/dht/src/dht-inode-write.c | 210 ++++++++++++++++--------------
xlators/storage/posix/src/posix.c | 3 +-
5 files changed, 162 insertions(+), 161 deletions(-)
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 6db4c06..5f125ee 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -292,6 +292,9 @@ struct dht_local {
call_stub_t *stub;
int32_t parent_disk_layout[4];
+
+ /* fd open check */
+ gf_boolean_t fd_checked;
};
typedef struct dht_local dht_local_t;
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 6bad5db..ca9184f 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -484,6 +484,7 @@ dht_check_and_open_fd_on_subvol_task (void *data)
fd = local->fd;
subvol = local->cached_subvol;
+ local->fd_checked = _gf_true;
if (fd_is_anonymous (fd) || dht_fd_open_on_dst (this, fd, subvol)) {
ret = 0;
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 58a0430..a9e4766 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -165,6 +165,15 @@ dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
+ if ((local->fop == GF_FOP_FSTAT) && (op_ret == -1)
+ && (op_errno == EBADF) && !(local->fd_checked)) {
+
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
gf_msg_debug (this->name, op_errno,
@@ -377,8 +386,6 @@ dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
dht_layout_t *layout = NULL;
int i = 0;
int call_cnt = 0;
- int ret = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -404,19 +411,10 @@ dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
local->call_cnt = 1;
subvol = local->cached_subvol;
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND_COOKIE (frame, dht_file_attr_cbk, subvol,
- subvol, subvol->fops->fstat, fd,
- xdata);
-
- } else {
- ret = dht_check_and_open_fd_on_subvol (this, frame);
-
- if (ret)
- goto err;
- }
+ STACK_WIND_COOKIE (frame, dht_file_attr_cbk, subvol,
+ subvol, subvol->fops->fstat, fd,
+ xdata);
return 0;
}
@@ -459,9 +457,17 @@ dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->call_cnt != 1)
goto out;
+ if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if ((op_ret == -1) && !dht_inode_missing(op_errno))
goto out;
+
local->op_errno = op_errno;
if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
@@ -545,7 +551,6 @@ dht_readv (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -573,19 +578,10 @@ dht_readv (call_frame_t *frame, xlator_t *this,
local->rebalance.flags = flags;
local->call_cnt = 1;
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
- local->fd, local->rebalance.size,
- local->rebalance.offset,
- local->rebalance.flags, local->xattr_req);
-
- } else {
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
- }
-
+ STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size,
+ local->rebalance.offset,
+ local->rebalance.flags, local->xattr_req);
return 0;
@@ -743,6 +739,13 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->call_cnt != 1)
goto out;
+ if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
local->rebalance.target_op_fn = dht_flush2;
local->op_ret = op_ret;
@@ -804,7 +807,6 @@ dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -829,18 +831,8 @@ dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
local->call_cnt = 1;
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, fd, local->xattr_req);
- return 0;
-
- } else {
-
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
- }
+ STACK_WIND (frame, dht_flush_cbk,
+ subvol, subvol->fops->flush, fd, local->xattr_req);
return 0;
err:
@@ -867,6 +859,14 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
prev = cookie;
local->op_errno = op_errno;
+
+ if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if (op_ret == -1 && !dht_inode_missing(op_errno)) {
gf_msg_debug (this->name, op_errno,
"subvolume %s returned -1",
@@ -974,7 +974,6 @@ dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -994,20 +993,9 @@ dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
subvol = local->cached_subvol;
-
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND_COOKIE (frame, dht_fsync_cbk, subvol, subvol,
- subvol->fops->fsync, local->fd,
- local->rebalance.flags, local->xattr_req);
-
- } else {
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
- }
-
-
+ STACK_WIND_COOKIE (frame, dht_fsync_cbk, subvol, subvol,
+ subvol->fops->fsync, local->fd,
+ local->rebalance.flags, local->xattr_req);
return 0;
err:
@@ -1301,8 +1289,7 @@ dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
local->call_cnt = 1;
- STACK_WIND (frame,
- dht_xattrop_cbk,
+ STACK_WIND (frame, dht_xattrop_cbk,
subvol, subvol->fops->xattrop,
loc, flags, dict, xdata);
@@ -1401,8 +1388,7 @@ dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
local->call_cnt = 1;
- STACK_WIND (frame,
- dht_inodelk_cbk,
+ STACK_WIND (frame, dht_inodelk_cbk,
lock_subvol, lock_subvol->fops->inodelk,
volume, loc, cmd, lock, xdata);
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index f925d4b..5392ee4 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -44,6 +44,19 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
+ /* writev fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could be a valid bad fd error.
+ */
+
+ if (op_ret == -1 && (op_errno == EBADF) && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if (op_ret == -1 && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
local->op_ret = -1;
@@ -162,7 +175,6 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -183,7 +195,6 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto err;
}
-
if (xdata)
local->xattr_req = dict_ref (xdata);
@@ -194,22 +205,13 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
local->rebalance.iobref = iobref_ref (iobref);
local->call_cnt = 1;
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND_COOKIE (frame, dht_writev_cbk, subvol, subvol,
- subvol->fops->writev, fd,
- local->rebalance.vector,
- local->rebalance.count,
- local->rebalance.offset,
- local->rebalance.flags,
- local->rebalance.iobref, local->xattr_req);
- return 0;
-
- } else {
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
- }
+ STACK_WIND_COOKIE (frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, fd,
+ local->rebalance.vector,
+ local->rebalance.count,
+ local->rebalance.offset,
+ local->rebalance.flags,
+ local->rebalance.iobref, local->xattr_req);
return 0;
@@ -243,6 +245,22 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
+ /* Needs to be checked only for ftruncate.
+ * ftruncate fails with EBADF/EINVAL if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+
+ if ((local->fop == GF_FOP_FTRUNCATE) && (op_ret == -1)
+ && ((op_errno == EBADF) || (op_errno == EINVAL))
+ && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
local->op_ret = -1;
@@ -253,6 +271,7 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
+
if (local->call_cnt != 1) {
if (local->stbuf.ia_blocks) {
dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
@@ -408,7 +427,6 @@ dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
@@ -434,20 +452,9 @@ dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
if (xdata)
local->xattr_req = dict_ref (xdata);
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND_COOKIE (frame, dht_truncate_cbk, subvol, subvol,
- subvol->fops->ftruncate, fd,
- local->rebalance.offset, local->xattr_req);
- return 0;
-
- } else {
-
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
- }
-
+ STACK_WIND_COOKIE (frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd,
+ local->rebalance.offset, local->xattr_req);
return 0;
err:
@@ -477,6 +484,20 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
+ /* fallocate fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+
+ if ((op_ret == -1) && (op_errno == EBADF)
+ && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
local->op_ret = -1;
@@ -586,7 +607,6 @@ dht_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -614,22 +634,12 @@ dht_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
if (xdata)
local->xattr_req = dict_ref (xdata);
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND_COOKIE (frame, dht_fallocate_cbk, subvol, subvol,
- subvol->fops->fallocate, fd,
- local->rebalance.flags,
- local->rebalance.offset,
- local->rebalance.size,
- local->xattr_req);
- return 0;
-
- } else {
-
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
- }
+ STACK_WIND_COOKIE (frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, fd,
+ local->rebalance.flags,
+ local->rebalance.offset,
+ local->rebalance.size,
+ local->xattr_req);
return 0;
@@ -660,6 +670,20 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
+
+ /* discard fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+ if ((op_ret == -1) && (op_errno == EBADF)
+ && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
local->op_ret = -1;
@@ -769,7 +793,6 @@ dht_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -796,22 +819,11 @@ dht_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
if (xdata)
local->xattr_req = dict_ref (xdata);
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND_COOKIE (frame, dht_discard_cbk, subvol, subvol,
- subvol->fops->discard, fd,
- local->rebalance.offset,
- local->rebalance.size,
- local->xattr_req);
- return 0;
-
- } else {
-
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
-
- }
+ STACK_WIND_COOKIE (frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, fd,
+ local->rebalance.offset,
+ local->rebalance.size,
+ local->xattr_req);
return 0;
@@ -840,6 +852,19 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
prev = cookie;
+ /* zerofill fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+ if ((op_ret == -1) && (op_errno == EBADF)
+ && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
local->op_ret = -1;
@@ -952,7 +977,6 @@ dht_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
- int ret = -1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -979,21 +1003,10 @@ dht_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
if (xdata)
local->xattr_req = dict_ref (xdata);
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND_COOKIE (frame, dht_zerofill_cbk, subvol, subvol,
- subvol->fops->zerofill, fd,
- local->rebalance.offset,
- local->rebalance.size, local->xattr_req);
- return 0;
-
- } else {
-
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
- }
-
+ STACK_WIND_COOKIE (frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd,
+ local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
return 0;
@@ -1020,6 +1033,15 @@ dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
prev = cookie;
local->op_errno = op_errno;
+
+ if ((op_ret == -1) && (op_errno == EBADF)
+ && !(local->fd_checked)) {
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
gf_msg_debug (this->name, op_errno,
"subvolume %s returned -1",
@@ -1241,8 +1263,6 @@ dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
int op_errno = -1;
int i = -1;
int call_cnt = 0;
- int ret = -1;
-
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -1279,21 +1299,11 @@ dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
local->call_cnt = 1;
subvol = local->cached_subvol;
- if (dht_fd_open_on_dst (this, fd, subvol)) {
-
- STACK_WIND_COOKIE (frame, dht_file_setattr_cbk, subvol,
- subvol, subvol->fops->fsetattr, fd,
- &local->rebalance.stbuf,
- local->rebalance.flags,
- local->xattr_req);
- return 0;
-
- } else {
- ret = dht_check_and_open_fd_on_subvol (this, frame);
- if (ret)
- goto err;
-
- }
+ STACK_WIND_COOKIE (frame, dht_file_setattr_cbk, subvol,
+ subvol, subvol->fops->fsetattr, fd,
+ &local->rebalance.stbuf,
+ local->rebalance.flags,
+ local->xattr_req);
return 0;
}
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 26038fc..1486d40 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -996,8 +996,9 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
ret = posix_do_zerofill (frame, this, fd, offset, len,
&statpre, &statpost, xdata);
- if (ret < 0)
+ if (ret < 0) {
goto err;
+ }
STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
return 0;
--
1.8.3.1