|
|
c460ee |
From 2b6e6c234dffa72c9f2af747908b1e1f29080698 Mon Sep 17 00:00:00 2001
|
|
|
c460ee |
From: Ravishankar N <ravishankar@redhat.com>
|
|
|
c460ee |
Date: Thu, 25 Mar 2021 11:52:13 +0530
|
|
|
c460ee |
Subject: [PATCH 559/584] afr: make fsync post-op aware of inodelk count
|
|
|
c460ee |
(#2273)
|
|
|
c460ee |
|
|
|
c460ee |
Problem:
|
|
|
c460ee |
Since commit bd540db1e, eager-locking was enabled for fsync. But on
|
|
|
c460ee |
certain VM workloads wit sharding enabled, shard xlator keeps sending
|
|
|
c460ee |
fsync on the base shard. This can cause blocked inodelks from other
|
|
|
c460ee |
clients (including shd) to time out due to call bail.
|
|
|
c460ee |
|
|
|
c460ee |
Fix:
|
|
|
c460ee |
Make afr fsync aware of inodelk count and not delay post-op + unlock
|
|
|
c460ee |
when inodelk count > 1, just like writev.
|
|
|
c460ee |
|
|
|
c460ee |
Code is restructured so that any fd based AFR_DATA_TRANSACTION can be made
|
|
|
c460ee |
aware by setting GLUSTERFS_INODELK_DOM_COUNT in xdata request.
|
|
|
c460ee |
|
|
|
c460ee |
Note: We do not know yet why VMs go in to paused state because of the
|
|
|
c460ee |
blocked inodelks but this patch should be a first step in reducing the
|
|
|
c460ee |
occurence.
|
|
|
c460ee |
|
|
|
c460ee |
Upstream patch details:
|
|
|
c460ee |
> https://github.com/gluster/glusterfs/pull/2273/
|
|
|
c460ee |
> Updates: #2198
|
|
|
c460ee |
> Change-Id: Ib91ebdd3101d590c326e69c829cf9335003e260b
|
|
|
c460ee |
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
c460ee |
|
|
|
c460ee |
BUG: 1943467
|
|
|
c460ee |
Change-Id: Id407ca54007e3bbb206a1d9431ebaf89a2167f74
|
|
|
c460ee |
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
c460ee |
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244516
|
|
|
c460ee |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
c460ee |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
c460ee |
---
|
|
|
c460ee |
xlators/cluster/afr/src/afr-inode-write.c | 40 ++++++++++++++++++-------------
|
|
|
c460ee |
xlators/features/locks/src/posix.c | 1 +
|
|
|
c460ee |
2 files changed, 24 insertions(+), 17 deletions(-)
|
|
|
c460ee |
|
|
|
c460ee |
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
c460ee |
index df82b6e..962a7b1 100644
|
|
|
c460ee |
--- a/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
c460ee |
+++ b/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
c460ee |
@@ -42,6 +42,7 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
|
|
|
c460ee |
struct iatt *stbuf = NULL;
|
|
|
c460ee |
afr_local_t *local = NULL;
|
|
|
c460ee |
afr_private_t *priv = NULL;
|
|
|
c460ee |
+ afr_lock_t *lock = NULL;
|
|
|
c460ee |
afr_read_subvol_args_t args = {
|
|
|
c460ee |
0,
|
|
|
c460ee |
};
|
|
|
c460ee |
@@ -50,6 +51,12 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
|
|
|
c460ee |
priv = this->private;
|
|
|
c460ee |
GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
|
|
|
c460ee |
|
|
|
c460ee |
+ if (local->update_num_inodelks &&
|
|
|
c460ee |
+ local->transaction.type == AFR_DATA_TRANSACTION) {
|
|
|
c460ee |
+ lock = &local->inode_ctx->lock[local->transaction.type];
|
|
|
c460ee |
+ lock->num_inodelks = local->num_inodelks;
|
|
|
c460ee |
+ }
|
|
|
c460ee |
+
|
|
|
c460ee |
/*This code needs to stay till DHT sends fops on linked
|
|
|
c460ee |
* inodes*/
|
|
|
c460ee |
if (!inode_is_linked(local->inode)) {
|
|
|
c460ee |
@@ -134,6 +141,7 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
|
|
|
c460ee |
{
|
|
|
c460ee |
afr_local_t *local = NULL;
|
|
|
c460ee |
afr_private_t *priv = NULL;
|
|
|
c460ee |
+ int num_inodelks = 0;
|
|
|
c460ee |
|
|
|
c460ee |
local = frame->local;
|
|
|
c460ee |
priv = this->private;
|
|
|
c460ee |
@@ -146,8 +154,16 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
|
|
|
c460ee |
|
|
|
c460ee |
local->replies[child_index].op_ret = op_ret;
|
|
|
c460ee |
local->replies[child_index].op_errno = op_errno;
|
|
|
c460ee |
- if (xdata)
|
|
|
c460ee |
+ if (xdata) {
|
|
|
c460ee |
local->replies[child_index].xdata = dict_ref(xdata);
|
|
|
c460ee |
+ if (dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
|
|
|
c460ee |
+ &num_inodelks) == 0) {
|
|
|
c460ee |
+ if (num_inodelks > local->num_inodelks) {
|
|
|
c460ee |
+ local->num_inodelks = num_inodelks;
|
|
|
c460ee |
+ local->update_num_inodelks = _gf_true;
|
|
|
c460ee |
+ }
|
|
|
c460ee |
+ }
|
|
|
c460ee |
+ }
|
|
|
c460ee |
|
|
|
c460ee |
if (op_ret >= 0) {
|
|
|
c460ee |
if (prebuf)
|
|
|
c460ee |
@@ -284,7 +300,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
|
|
|
c460ee |
afr_local_t *local = frame->local;
|
|
|
c460ee |
uint32_t open_fd_count = 0;
|
|
|
c460ee |
uint32_t write_is_append = 0;
|
|
|
c460ee |
- int32_t num_inodelks = 0;
|
|
|
c460ee |
|
|
|
c460ee |
LOCK(&frame->lock);
|
|
|
c460ee |
{
|
|
|
c460ee |
@@ -306,15 +321,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
|
|
|
c460ee |
local->open_fd_count = open_fd_count;
|
|
|
c460ee |
local->update_open_fd_count = _gf_true;
|
|
|
c460ee |
}
|
|
|
c460ee |
-
|
|
|
c460ee |
- ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
|
|
|
c460ee |
- &num_inodelks);
|
|
|
c460ee |
- if (ret < 0)
|
|
|
c460ee |
- goto unlock;
|
|
|
c460ee |
- if (num_inodelks > local->num_inodelks) {
|
|
|
c460ee |
- local->num_inodelks = num_inodelks;
|
|
|
c460ee |
- local->update_num_inodelks = _gf_true;
|
|
|
c460ee |
- }
|
|
|
c460ee |
}
|
|
|
c460ee |
unlock:
|
|
|
c460ee |
UNLOCK(&frame->lock);
|
|
|
c460ee |
@@ -324,7 +330,6 @@ void
|
|
|
c460ee |
afr_process_post_writev(call_frame_t *frame, xlator_t *this)
|
|
|
c460ee |
{
|
|
|
c460ee |
afr_local_t *local = NULL;
|
|
|
c460ee |
- afr_lock_t *lock = NULL;
|
|
|
c460ee |
|
|
|
c460ee |
local = frame->local;
|
|
|
c460ee |
|
|
|
c460ee |
@@ -343,11 +348,6 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
|
|
|
c460ee |
|
|
|
c460ee |
if (local->update_open_fd_count)
|
|
|
c460ee |
local->inode_ctx->open_fd_count = local->open_fd_count;
|
|
|
c460ee |
- if (local->update_num_inodelks &&
|
|
|
c460ee |
- local->transaction.type == AFR_DATA_TRANSACTION) {
|
|
|
c460ee |
- lock = &local->inode_ctx->lock[local->transaction.type];
|
|
|
c460ee |
- lock->num_inodelks = local->num_inodelks;
|
|
|
c460ee |
- }
|
|
|
c460ee |
}
|
|
|
c460ee |
|
|
|
c460ee |
int
|
|
|
c460ee |
@@ -2516,6 +2516,12 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
|
|
|
c460ee |
if (!local->xdata_req)
|
|
|
c460ee |
goto out;
|
|
|
c460ee |
|
|
|
c460ee |
+ if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
|
|
|
c460ee |
+ this->name)) {
|
|
|
c460ee |
+ op_errno = ENOMEM;
|
|
|
c460ee |
+ goto out;
|
|
|
c460ee |
+ }
|
|
|
c460ee |
+
|
|
|
c460ee |
local->fd = fd_ref(fd);
|
|
|
c460ee |
ret = afr_set_inode_local(this, local, fd->inode);
|
|
|
c460ee |
if (ret)
|
|
|
c460ee |
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
|
|
|
c460ee |
index cdd1ff7..22ef5b8 100644
|
|
|
c460ee |
--- a/xlators/features/locks/src/posix.c
|
|
|
c460ee |
+++ b/xlators/features/locks/src/posix.c
|
|
|
c460ee |
@@ -4943,6 +4943,7 @@ struct xlator_fops fops = {
|
|
|
c460ee |
.rchecksum = pl_rchecksum,
|
|
|
c460ee |
.statfs = pl_statfs,
|
|
|
c460ee |
.fsyncdir = pl_fsyncdir,
|
|
|
c460ee |
+ .fsync = pl_fsync,
|
|
|
c460ee |
.readdir = pl_readdir,
|
|
|
c460ee |
.symlink = pl_symlink,
|
|
|
c460ee |
.link = pl_link,
|
|
|
c460ee |
--
|
|
|
c460ee |
1.8.3.1
|
|
|
c460ee |
|