887953
From edc4297530eeb4107477a607042edeb2ce2ccca8 Mon Sep 17 00:00:00 2001
887953
From: Pranith Kumar K <pkarampu@redhat.com>
887953
Date: Tue, 18 Sep 2018 12:15:57 +0530
887953
Subject: [PATCH 381/385] cluster/afr: Make data eager-lock decision based on
887953
 number of locks
887953
887953
For both Virt and block workloads the file is opened multiple times
887953
leading to dynamically setting eager-lock to off for the workload.
887953
Instead of depending on the number-of-open-fds, if we change the
887953
logic to depend on number of inodelks, then it will give better
887953
performance than the earlier logic. When there is an eager-lock
887953
and number of inodelks is more than 1 we know that there is a
887953
conflicting lock, so depend on that information to decide whether
887953
to keep the current transaction go through delayed-post-op or not.
887953
887953
Locks xlator doesn't have implementation to query number of locks in
887953
fxattrop in releases older than 3.10 so to keep things backward
887953
compatible in 3.12, data transactions will use new logic where as
887953
fxattrop transactions will use old logic. I am planning to send one
887953
more patch which makes metadata domain locks also depend on
887953
inodelk-count
887953
887953
Profile info for a dd of 500MB to a file with another fd opened
887953
on the file using exec 250>filename
887953
887953
Without this patch:
887953
 0.14      67.41 us      16.72 us    3870.82 us  892 FINODELK
887953
 0.59     279.87 us      95.71 us    2085.89 us  898 FXATTROP
887953
 3.46     366.43 us      81.75 us    6952.79 us 4000 WRITE
887953
95.79  148733.99 us   50568.12 us  919127.86 us  273 FSYNC
887953
887953
With this patch:
887953
 0.00      51.01 us      38.07 us      80.16 us    4 FINODELK
887953
 0.00     235.43 us     235.43 us     235.43 us    1 TRUNCATE
887953
 0.00     125.07 us      56.80 us     193.33 us    2 GETXATTR
887953
 0.00     135.86 us      62.13 us     209.59 us    2  INODELK
887953
 0.00     197.88 us     155.39 us     253.90 us    4 FXATTROP
887953
 0.00     450.59 us     394.28 us     506.89 us    2  XATTROP
887953
 0.00      56.96 us      19.06 us     406.59 us   23    FLUSH
887953
37.81  273648.93 us      48.43 us 6017657.05 us   44   LOOKUP
887953
62.18    4951.86 us      93.80 us 1143154.75 us 3999    WRITE
887953
887953
postgresql benchmark performance changed from ~1130 TPS to ~2300TPS
887953
randio fio job inside Ovirt based VM went from ~600IOPs to ~2000IOPS
887953
887953
Upstream-Patch: https://review.gluster.org/c/glusterfs/+/21210
887953
BUG: 1630688
887953
Change-Id: If7f7388d2f08cf7f17ca517a4ea222560661dc36
887953
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/150701
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
887953
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
887953
---
887953
 xlators/cluster/afr/src/afr-inode-write.c | 26 ++++++++++++++++++++++++--
887953
 xlators/cluster/afr/src/afr-transaction.c | 27 +++++++++++++++++++--------
887953
 xlators/cluster/afr/src/afr.h             |  8 ++++++++
887953
 3 files changed, 51 insertions(+), 10 deletions(-)
887953
887953
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
887953
index 9e6ba35..8b1dcfd 100644
887953
--- a/xlators/cluster/afr/src/afr-inode-write.c
887953
+++ b/xlators/cluster/afr/src/afr-inode-write.c
887953
@@ -300,6 +300,7 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
887953
         afr_local_t *local = frame->local;
887953
         uint32_t open_fd_count = 0;
887953
         uint32_t write_is_append = 0;
887953
+        int32_t num_inodelks = 0;
887953
 
887953
         LOCK (&frame->lock);
887953
         {
887953
@@ -318,10 +319,19 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
887953
                                        &open_fd_count);
887953
                 if (ret < 0)
887953
                         goto unlock;
887953
-		if (open_fd_count > local->open_fd_count) {
887953
+                if (open_fd_count > local->open_fd_count) {
887953
                         local->open_fd_count = open_fd_count;
887953
                         local->update_open_fd_count = _gf_true;
887953
-		}
887953
+                }
887953
+
887953
+                ret = dict_get_int32(xdata, GLUSTERFS_INODELK_COUNT,
887953
+                                     &num_inodelks);
887953
+                if (ret < 0)
887953
+                        goto unlock;
887953
+                if (num_inodelks > local->num_inodelks) {
887953
+                        local->num_inodelks = num_inodelks;
887953
+                        local->update_num_inodelks = _gf_true;
887953
+                }
887953
         }
887953
 unlock:
887953
         UNLOCK (&frame->lock);
887953
@@ -331,6 +341,7 @@ void
887953
 afr_process_post_writev (call_frame_t *frame, xlator_t *this)
887953
 {
887953
         afr_local_t     *local = NULL;
887953
+        afr_lock_t *lock = NULL;
887953
 
887953
         local = frame->local;
887953
 
887953
@@ -349,6 +360,11 @@ afr_process_post_writev (call_frame_t *frame, xlator_t *this)
887953
 
887953
         if (local->update_open_fd_count)
887953
                 local->inode_ctx->open_fd_count = local->open_fd_count;
887953
+        if (local->update_num_inodelks &&
887953
+                local->transaction.type == AFR_DATA_TRANSACTION) {
887953
+                lock = &local->inode_ctx->lock[local->transaction.type];
887953
+                lock->num_inodelks = local->num_inodelks;
887953
+        }
887953
 
887953
 }
887953
 
887953
@@ -534,6 +550,12 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
887953
                 goto out;
887953
         }
887953
 
887953
+        if (dict_set_str(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
887953
+                         this->name)) {
887953
+                op_errno = ENOMEM;
887953
+                goto out;
887953
+        }
887953
+
887953
 	if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
887953
 		op_errno = ENOMEM;
887953
 		goto out;
887953
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
887953
index 85b00a8..0a67a83 100644
887953
--- a/xlators/cluster/afr/src/afr-transaction.c
887953
+++ b/xlators/cluster/afr/src/afr-transaction.c
887953
@@ -1858,17 +1858,28 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
887953
 }
887953
 
887953
 gf_boolean_t
887953
-afr_are_multiple_fds_opened (afr_local_t *local, xlator_t *this)
887953
+afr_are_conflicting_ops_waiting(afr_local_t *local, xlator_t *this)
887953
 {
887953
+        afr_lock_t *lock = NULL;
887953
+        lock = &local->inode_ctx->lock[local->transaction.type];
887953
+
887953
         /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
887953
-         * is taken mount2 opened the same file, it won't be able to
887953
-         * perform any data operations until mount1 releases eager-lock.
887953
-         * To avoid such scenario do not enable eager-lock for this transaction
887953
-         * if open-fd-count is > 1
887953
+         * is taken mount2 opened the same file, it won't be able to perform
887953
+         * any {meta,}data operations until mount1 releases eager-lock.  To
887953
+         * avoid such scenario do not enable eager-lock for this transaction if
887953
+         * open-fd-count is > 1 for metadata transactions and if
887953
+         * num-inodelks > 1 for data transactions
887953
          */
887953
 
887953
-        if (local->inode_ctx->open_fd_count > 1)
887953
-                return _gf_true;
887953
+        if (local->transaction.type == AFR_METADATA_TRANSACTION) {
887953
+                if (local->inode_ctx->open_fd_count > 1) {
887953
+                    return _gf_true;
887953
+                }
887953
+        } else if (local->transaction.type == AFR_DATA_TRANSACTION) {
887953
+                if (lock->num_inodelks > 1) {
887953
+                    return _gf_true;
887953
+                }
887953
+        }
887953
 
887953
         return _gf_false;
887953
 }
887953
@@ -1890,7 +1901,7 @@ afr_is_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this,
887953
                 goto out;
887953
         }
887953
 
887953
-        if (afr_are_multiple_fds_opened (local, this)) {
887953
+        if (afr_are_conflicting_ops_waiting(local, this)) {
887953
                 lock->release = _gf_true;
887953
                 goto out;
887953
         }
887953
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
887953
index 76ad292..afe4a73 100644
887953
--- a/xlators/cluster/afr/src/afr.h
887953
+++ b/xlators/cluster/afr/src/afr.h
887953
@@ -302,6 +302,12 @@ typedef enum {
887953
 } afr_fop_lock_state_t;
887953
 
887953
 typedef struct _afr_inode_lock_t {
887953
+        /* @num_inodelks:
887953
+           Number of inodelks queried from the server, as queried through
887953
+           xdata in FOPs. Currently, used to decide if eager-locking must be
887953
+           temporarily disabled.
887953
+        */
887953
+        int32_t num_inodelks;
887953
         unsigned int event_generation;
887953
         gf_boolean_t    release;
887953
         gf_boolean_t    acquired;
887953
@@ -354,6 +360,8 @@ typedef struct _afr_local {
887953
 
887953
         uint32_t     open_fd_count;
887953
         gf_boolean_t update_open_fd_count;
887953
+        int32_t num_inodelks;
887953
+        gf_boolean_t update_num_inodelks;
887953
 
887953
 	gf_lkowner_t  saved_lk_owner;
887953
 
887953
-- 
887953
1.8.3.1
887953