c460ee
From 2b6e6c234dffa72c9f2af747908b1e1f29080698 Mon Sep 17 00:00:00 2001
c460ee
From: Ravishankar N <ravishankar@redhat.com>
c460ee
Date: Thu, 25 Mar 2021 11:52:13 +0530
c460ee
Subject: [PATCH 559/584] afr: make fsync post-op aware of inodelk count
c460ee
 (#2273)
c460ee
c460ee
Problem:
c460ee
Since commit bd540db1e, eager-locking was enabled for fsync. But on
c460ee
certain VM workloads wit sharding enabled, shard xlator keeps sending
c460ee
fsync on the base shard. This can cause blocked inodelks from other
c460ee
clients (including shd) to time out due to call bail.
c460ee
c460ee
Fix:
c460ee
Make afr fsync aware of inodelk count and not delay post-op + unlock
c460ee
when inodelk count > 1, just like writev.
c460ee
c460ee
Code is restructured so that any fd based AFR_DATA_TRANSACTION can be made
c460ee
aware by setting GLUSTERFS_INODELK_DOM_COUNT in xdata request.
c460ee
c460ee
Note: We do not know yet why VMs go in to paused state because of the
c460ee
blocked inodelks but this patch should be a first step in reducing the
c460ee
occurence.
c460ee
c460ee
Upstream patch details:
c460ee
> https://github.com/gluster/glusterfs/pull/2273/
c460ee
> Updates: #2198
c460ee
> Change-Id: Ib91ebdd3101d590c326e69c829cf9335003e260b
c460ee
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
c460ee
c460ee
BUG: 1943467
c460ee
Change-Id: Id407ca54007e3bbb206a1d9431ebaf89a2167f74
c460ee
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
c460ee
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244516
c460ee
Tested-by: RHGS Build Bot <nigelb@redhat.com>
c460ee
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
c460ee
---
c460ee
 xlators/cluster/afr/src/afr-inode-write.c | 40 ++++++++++++++++++-------------
c460ee
 xlators/features/locks/src/posix.c        |  1 +
c460ee
 2 files changed, 24 insertions(+), 17 deletions(-)
c460ee
c460ee
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
c460ee
index df82b6e..962a7b1 100644
c460ee
--- a/xlators/cluster/afr/src/afr-inode-write.c
c460ee
+++ b/xlators/cluster/afr/src/afr-inode-write.c
c460ee
@@ -42,6 +42,7 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
c460ee
     struct iatt *stbuf = NULL;
c460ee
     afr_local_t *local = NULL;
c460ee
     afr_private_t *priv = NULL;
c460ee
+    afr_lock_t *lock = NULL;
c460ee
     afr_read_subvol_args_t args = {
c460ee
         0,
c460ee
     };
c460ee
@@ -50,6 +51,12 @@ __afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
c460ee
     priv = this->private;
c460ee
     GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
c460ee
 
c460ee
+    if (local->update_num_inodelks &&
c460ee
+        local->transaction.type == AFR_DATA_TRANSACTION) {
c460ee
+        lock = &local->inode_ctx->lock[local->transaction.type];
c460ee
+        lock->num_inodelks = local->num_inodelks;
c460ee
+    }
c460ee
+
c460ee
     /*This code needs to stay till DHT sends fops on linked
c460ee
      * inodes*/
c460ee
     if (!inode_is_linked(local->inode)) {
c460ee
@@ -134,6 +141,7 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
c460ee
 {
c460ee
     afr_local_t *local = NULL;
c460ee
     afr_private_t *priv = NULL;
c460ee
+    int num_inodelks = 0;
c460ee
 
c460ee
     local = frame->local;
c460ee
     priv = this->private;
c460ee
@@ -146,8 +154,16 @@ __afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
c460ee
 
c460ee
     local->replies[child_index].op_ret = op_ret;
c460ee
     local->replies[child_index].op_errno = op_errno;
c460ee
-    if (xdata)
c460ee
+    if (xdata) {
c460ee
         local->replies[child_index].xdata = dict_ref(xdata);
c460ee
+        if (dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
c460ee
+                                 &num_inodelks) == 0) {
c460ee
+            if (num_inodelks > local->num_inodelks) {
c460ee
+                local->num_inodelks = num_inodelks;
c460ee
+                local->update_num_inodelks = _gf_true;
c460ee
+            }
c460ee
+        }
c460ee
+    }
c460ee
 
c460ee
     if (op_ret >= 0) {
c460ee
         if (prebuf)
c460ee
@@ -284,7 +300,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
c460ee
     afr_local_t *local = frame->local;
c460ee
     uint32_t open_fd_count = 0;
c460ee
     uint32_t write_is_append = 0;
c460ee
-    int32_t num_inodelks = 0;
c460ee
 
c460ee
     LOCK(&frame->lock);
c460ee
     {
c460ee
@@ -306,15 +321,6 @@ afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
c460ee
             local->open_fd_count = open_fd_count;
c460ee
             local->update_open_fd_count = _gf_true;
c460ee
         }
c460ee
-
c460ee
-        ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
c460ee
-                                   &num_inodelks);
c460ee
-        if (ret < 0)
c460ee
-            goto unlock;
c460ee
-        if (num_inodelks > local->num_inodelks) {
c460ee
-            local->num_inodelks = num_inodelks;
c460ee
-            local->update_num_inodelks = _gf_true;
c460ee
-        }
c460ee
     }
c460ee
 unlock:
c460ee
     UNLOCK(&frame->lock);
c460ee
@@ -324,7 +330,6 @@ void
c460ee
 afr_process_post_writev(call_frame_t *frame, xlator_t *this)
c460ee
 {
c460ee
     afr_local_t *local = NULL;
c460ee
-    afr_lock_t *lock = NULL;
c460ee
 
c460ee
     local = frame->local;
c460ee
 
c460ee
@@ -343,11 +348,6 @@ afr_process_post_writev(call_frame_t *frame, xlator_t *this)
c460ee
 
c460ee
     if (local->update_open_fd_count)
c460ee
         local->inode_ctx->open_fd_count = local->open_fd_count;
c460ee
-    if (local->update_num_inodelks &&
c460ee
-        local->transaction.type == AFR_DATA_TRANSACTION) {
c460ee
-        lock = &local->inode_ctx->lock[local->transaction.type];
c460ee
-        lock->num_inodelks = local->num_inodelks;
c460ee
-    }
c460ee
 }
c460ee
 
c460ee
 int
c460ee
@@ -2516,6 +2516,12 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
c460ee
     if (!local->xdata_req)
c460ee
         goto out;
c460ee
 
c460ee
+    if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
c460ee
+                           this->name)) {
c460ee
+        op_errno = ENOMEM;
c460ee
+        goto out;
c460ee
+    }
c460ee
+
c460ee
     local->fd = fd_ref(fd);
c460ee
     ret = afr_set_inode_local(this, local, fd->inode);
c460ee
     if (ret)
c460ee
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
c460ee
index cdd1ff7..22ef5b8 100644
c460ee
--- a/xlators/features/locks/src/posix.c
c460ee
+++ b/xlators/features/locks/src/posix.c
c460ee
@@ -4943,6 +4943,7 @@ struct xlator_fops fops = {
c460ee
     .rchecksum = pl_rchecksum,
c460ee
     .statfs = pl_statfs,
c460ee
     .fsyncdir = pl_fsyncdir,
c460ee
+    .fsync = pl_fsync,
c460ee
     .readdir = pl_readdir,
c460ee
     .symlink = pl_symlink,
c460ee
     .link = pl_link,
c460ee
-- 
c460ee
1.8.3.1
c460ee