Blob Blame History Raw
From edc4297530eeb4107477a607042edeb2ce2ccca8 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Tue, 18 Sep 2018 12:15:57 +0530
Subject: [PATCH 381/385] cluster/afr: Make data eager-lock decision based on
 number of locks

For both Virt and block workloads the file is opened multiple times
leading to dynamically setting eager-lock to off for the workload.
Instead of depending on the number-of-open-fds, if we change the
logic to depend on number of inodelks, then it will give better
performance than the earlier logic. When there is an eager-lock
and number of inodelks is more than 1 we know that there is a
conflicting lock, so depend on that information to decide whether
to keep the current transaction go through delayed-post-op or not.

Locks xlator doesn't have implementation to query number of locks in
fxattrop in releases older than 3.10 so to keep things backward
compatible in 3.12, data transactions will use new logic where as
fxattrop transactions will use old logic. I am planning to send one
more patch which makes metadata domain locks also depend on
inodelk-count

Profile info for a dd of 500MB to a file with another fd opened
on the file using exec 250>filename

Without this patch:
 0.14      67.41 us      16.72 us    3870.82 us  892 FINODELK
 0.59     279.87 us      95.71 us    2085.89 us  898 FXATTROP
 3.46     366.43 us      81.75 us    6952.79 us 4000 WRITE
95.79  148733.99 us   50568.12 us  919127.86 us  273 FSYNC

With this patch:
 0.00      51.01 us      38.07 us      80.16 us    4 FINODELK
 0.00     235.43 us     235.43 us     235.43 us    1 TRUNCATE
 0.00     125.07 us      56.80 us     193.33 us    2 GETXATTR
 0.00     135.86 us      62.13 us     209.59 us    2  INODELK
 0.00     197.88 us     155.39 us     253.90 us    4 FXATTROP
 0.00     450.59 us     394.28 us     506.89 us    2  XATTROP
 0.00      56.96 us      19.06 us     406.59 us   23    FLUSH
37.81  273648.93 us      48.43 us 6017657.05 us   44   LOOKUP
62.18    4951.86 us      93.80 us 1143154.75 us 3999    WRITE

postgresql benchmark performance changed from ~1130 TPS to ~2300TPS
randio fio job inside Ovirt based VM went from ~600IOPs to ~2000IOPS

Upstream-Patch: https://review.gluster.org/c/glusterfs/+/21210
BUG: 1630688
Change-Id: If7f7388d2f08cf7f17ca517a4ea222560661dc36
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/150701
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 xlators/cluster/afr/src/afr-inode-write.c | 26 ++++++++++++++++++++++++--
 xlators/cluster/afr/src/afr-transaction.c | 27 +++++++++++++++++++--------
 xlators/cluster/afr/src/afr.h             |  8 ++++++++
 3 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 9e6ba35..8b1dcfd 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -300,6 +300,7 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
         afr_local_t *local = frame->local;
         uint32_t open_fd_count = 0;
         uint32_t write_is_append = 0;
+        int32_t num_inodelks = 0;
 
         LOCK (&frame->lock);
         {
@@ -318,10 +319,19 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
                                        &open_fd_count);
                 if (ret < 0)
                         goto unlock;
-		if (open_fd_count > local->open_fd_count) {
+                if (open_fd_count > local->open_fd_count) {
                         local->open_fd_count = open_fd_count;
                         local->update_open_fd_count = _gf_true;
-		}
+                }
+
+                ret = dict_get_int32(xdata, GLUSTERFS_INODELK_COUNT,
+                                     &num_inodelks);
+                if (ret < 0)
+                        goto unlock;
+                if (num_inodelks > local->num_inodelks) {
+                        local->num_inodelks = num_inodelks;
+                        local->update_num_inodelks = _gf_true;
+                }
         }
 unlock:
         UNLOCK (&frame->lock);
@@ -331,6 +341,7 @@ void
 afr_process_post_writev (call_frame_t *frame, xlator_t *this)
 {
         afr_local_t     *local = NULL;
+        afr_lock_t *lock = NULL;
 
         local = frame->local;
 
@@ -349,6 +360,11 @@ afr_process_post_writev (call_frame_t *frame, xlator_t *this)
 
         if (local->update_open_fd_count)
                 local->inode_ctx->open_fd_count = local->open_fd_count;
+        if (local->update_num_inodelks &&
+                local->transaction.type == AFR_DATA_TRANSACTION) {
+                lock = &local->inode_ctx->lock[local->transaction.type];
+                lock->num_inodelks = local->num_inodelks;
+        }
 
 }
 
@@ -534,6 +550,12 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
                 goto out;
         }
 
+        if (dict_set_str(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
+                         this->name)) {
+                op_errno = ENOMEM;
+                goto out;
+        }
+
 	if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
 		op_errno = ENOMEM;
 		goto out;
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 85b00a8..0a67a83 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1858,17 +1858,28 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
 }
 
 gf_boolean_t
-afr_are_multiple_fds_opened (afr_local_t *local, xlator_t *this)
+afr_are_conflicting_ops_waiting(afr_local_t *local, xlator_t *this)
 {
+        afr_lock_t *lock = NULL;
+        lock = &local->inode_ctx->lock[local->transaction.type];
+
         /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
-         * is taken mount2 opened the same file, it won't be able to
-         * perform any data operations until mount1 releases eager-lock.
-         * To avoid such scenario do not enable eager-lock for this transaction
-         * if open-fd-count is > 1
+         * is taken mount2 opened the same file, it won't be able to perform
+         * any {meta,}data operations until mount1 releases eager-lock.  To
+         * avoid such scenario do not enable eager-lock for this transaction if
+         * open-fd-count is > 1 for metadata transactions and if
+         * num-inodelks > 1 for data transactions
          */
 
-        if (local->inode_ctx->open_fd_count > 1)
-                return _gf_true;
+        if (local->transaction.type == AFR_METADATA_TRANSACTION) {
+                if (local->inode_ctx->open_fd_count > 1) {
+                    return _gf_true;
+                }
+        } else if (local->transaction.type == AFR_DATA_TRANSACTION) {
+                if (lock->num_inodelks > 1) {
+                    return _gf_true;
+                }
+        }
 
         return _gf_false;
 }
@@ -1890,7 +1901,7 @@ afr_is_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this,
                 goto out;
         }
 
-        if (afr_are_multiple_fds_opened (local, this)) {
+        if (afr_are_conflicting_ops_waiting(local, this)) {
                 lock->release = _gf_true;
                 goto out;
         }
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 76ad292..afe4a73 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -302,6 +302,12 @@ typedef enum {
 } afr_fop_lock_state_t;
 
 typedef struct _afr_inode_lock_t {
+        /* @num_inodelks:
+           Number of inodelks queried from the server, as queried through
+           xdata in FOPs. Currently, used to decide if eager-locking must be
+           temporarily disabled.
+        */
+        int32_t num_inodelks;
         unsigned int event_generation;
         gf_boolean_t    release;
         gf_boolean_t    acquired;
@@ -354,6 +360,8 @@ typedef struct _afr_local {
 
         uint32_t     open_fd_count;
         gf_boolean_t update_open_fd_count;
+        int32_t num_inodelks;
+        gf_boolean_t update_num_inodelks;
 
 	gf_lkowner_t  saved_lk_owner;
 
-- 
1.8.3.1