|
|
887953 |
From edc4297530eeb4107477a607042edeb2ce2ccca8 Mon Sep 17 00:00:00 2001
|
|
|
887953 |
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
887953 |
Date: Tue, 18 Sep 2018 12:15:57 +0530
|
|
|
887953 |
Subject: [PATCH 381/385] cluster/afr: Make data eager-lock decision based on
|
|
|
887953 |
number of locks
|
|
|
887953 |
|
|
|
887953 |
For both Virt and block workloads the file is opened multiple times
|
|
|
887953 |
leading to dynamically setting eager-lock to off for the workload.
|
|
|
887953 |
Instead of depending on the number-of-open-fds, if we change the
|
|
|
887953 |
logic to depend on number of inodelks, then it will give better
|
|
|
887953 |
performance than the earlier logic. When there is an eager-lock
|
|
|
887953 |
and number of inodelks is more than 1 we know that there is a
|
|
|
887953 |
conflicting lock, so depend on that information to decide whether
|
|
|
887953 |
to keep the current transaction go through delayed-post-op or not.
|
|
|
887953 |
|
|
|
887953 |
Locks xlator doesn't have implementation to query number of locks in
|
|
|
887953 |
fxattrop in releases older than 3.10 so to keep things backward
|
|
|
887953 |
compatible in 3.12, data transactions will use new logic where as
|
|
|
887953 |
fxattrop transactions will use old logic. I am planning to send one
|
|
|
887953 |
more patch which makes metadata domain locks also depend on
|
|
|
887953 |
inodelk-count
|
|
|
887953 |
|
|
|
887953 |
Profile info for a dd of 500MB to a file with another fd opened
|
|
|
887953 |
on the file using exec 250>filename
|
|
|
887953 |
|
|
|
887953 |
Without this patch:
|
|
|
887953 |
0.14 67.41 us 16.72 us 3870.82 us 892 FINODELK
|
|
|
887953 |
0.59 279.87 us 95.71 us 2085.89 us 898 FXATTROP
|
|
|
887953 |
3.46 366.43 us 81.75 us 6952.79 us 4000 WRITE
|
|
|
887953 |
95.79 148733.99 us 50568.12 us 919127.86 us 273 FSYNC
|
|
|
887953 |
|
|
|
887953 |
With this patch:
|
|
|
887953 |
0.00 51.01 us 38.07 us 80.16 us 4 FINODELK
|
|
|
887953 |
0.00 235.43 us 235.43 us 235.43 us 1 TRUNCATE
|
|
|
887953 |
0.00 125.07 us 56.80 us 193.33 us 2 GETXATTR
|
|
|
887953 |
0.00 135.86 us 62.13 us 209.59 us 2 INODELK
|
|
|
887953 |
0.00 197.88 us 155.39 us 253.90 us 4 FXATTROP
|
|
|
887953 |
0.00 450.59 us 394.28 us 506.89 us 2 XATTROP
|
|
|
887953 |
0.00 56.96 us 19.06 us 406.59 us 23 FLUSH
|
|
|
887953 |
37.81 273648.93 us 48.43 us 6017657.05 us 44 LOOKUP
|
|
|
887953 |
62.18 4951.86 us 93.80 us 1143154.75 us 3999 WRITE
|
|
|
887953 |
|
|
|
887953 |
postgresql benchmark performance changed from ~1130 TPS to ~2300TPS
|
|
|
887953 |
randio fio job inside Ovirt based VM went from ~600IOPs to ~2000IOPS
|
|
|
887953 |
|
|
|
887953 |
Upstream-Patch: https://review.gluster.org/c/glusterfs/+/21210
|
|
|
887953 |
BUG: 1630688
|
|
|
887953 |
Change-Id: If7f7388d2f08cf7f17ca517a4ea222560661dc36
|
|
|
887953 |
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
887953 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/150701
|
|
|
887953 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
887953 |
Reviewed-by: Karthik Subrahmanya <ksubrahm@redhat.com>
|
|
|
887953 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
887953 |
---
|
|
|
887953 |
xlators/cluster/afr/src/afr-inode-write.c | 26 ++++++++++++++++++++++++--
|
|
|
887953 |
xlators/cluster/afr/src/afr-transaction.c | 27 +++++++++++++++++++--------
|
|
|
887953 |
xlators/cluster/afr/src/afr.h | 8 ++++++++
|
|
|
887953 |
3 files changed, 51 insertions(+), 10 deletions(-)
|
|
|
887953 |
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
887953 |
index 9e6ba35..8b1dcfd 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr-inode-write.c
|
|
|
887953 |
@@ -300,6 +300,7 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
|
|
|
887953 |
afr_local_t *local = frame->local;
|
|
|
887953 |
uint32_t open_fd_count = 0;
|
|
|
887953 |
uint32_t write_is_append = 0;
|
|
|
887953 |
+ int32_t num_inodelks = 0;
|
|
|
887953 |
|
|
|
887953 |
LOCK (&frame->lock);
|
|
|
887953 |
{
|
|
|
887953 |
@@ -318,10 +319,19 @@ afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
|
|
|
887953 |
&open_fd_count);
|
|
|
887953 |
if (ret < 0)
|
|
|
887953 |
goto unlock;
|
|
|
887953 |
- if (open_fd_count > local->open_fd_count) {
|
|
|
887953 |
+ if (open_fd_count > local->open_fd_count) {
|
|
|
887953 |
local->open_fd_count = open_fd_count;
|
|
|
887953 |
local->update_open_fd_count = _gf_true;
|
|
|
887953 |
- }
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ ret = dict_get_int32(xdata, GLUSTERFS_INODELK_COUNT,
|
|
|
887953 |
+ &num_inodelks);
|
|
|
887953 |
+ if (ret < 0)
|
|
|
887953 |
+ goto unlock;
|
|
|
887953 |
+ if (num_inodelks > local->num_inodelks) {
|
|
|
887953 |
+ local->num_inodelks = num_inodelks;
|
|
|
887953 |
+ local->update_num_inodelks = _gf_true;
|
|
|
887953 |
+ }
|
|
|
887953 |
}
|
|
|
887953 |
unlock:
|
|
|
887953 |
UNLOCK (&frame->lock);
|
|
|
887953 |
@@ -331,6 +341,7 @@ void
|
|
|
887953 |
afr_process_post_writev (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
{
|
|
|
887953 |
afr_local_t *local = NULL;
|
|
|
887953 |
+ afr_lock_t *lock = NULL;
|
|
|
887953 |
|
|
|
887953 |
local = frame->local;
|
|
|
887953 |
|
|
|
887953 |
@@ -349,6 +360,11 @@ afr_process_post_writev (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
|
|
|
887953 |
if (local->update_open_fd_count)
|
|
|
887953 |
local->inode_ctx->open_fd_count = local->open_fd_count;
|
|
|
887953 |
+ if (local->update_num_inodelks &&
|
|
|
887953 |
+ local->transaction.type == AFR_DATA_TRANSACTION) {
|
|
|
887953 |
+ lock = &local->inode_ctx->lock[local->transaction.type];
|
|
|
887953 |
+ lock->num_inodelks = local->num_inodelks;
|
|
|
887953 |
+ }
|
|
|
887953 |
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
@@ -534,6 +550,12 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
887953 |
goto out;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ if (dict_set_str(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
|
|
|
887953 |
+ this->name)) {
|
|
|
887953 |
+ op_errno = ENOMEM;
|
|
|
887953 |
+ goto out;
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
if (dict_set_uint32 (local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
|
|
|
887953 |
op_errno = ENOMEM;
|
|
|
887953 |
goto out;
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
887953 |
index 85b00a8..0a67a83 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr-transaction.c
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr-transaction.c
|
|
|
887953 |
@@ -1858,17 +1858,28 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
gf_boolean_t
|
|
|
887953 |
-afr_are_multiple_fds_opened (afr_local_t *local, xlator_t *this)
|
|
|
887953 |
+afr_are_conflicting_ops_waiting(afr_local_t *local, xlator_t *this)
|
|
|
887953 |
{
|
|
|
887953 |
+ afr_lock_t *lock = NULL;
|
|
|
887953 |
+ lock = &local->inode_ctx->lock[local->transaction.type];
|
|
|
887953 |
+
|
|
|
887953 |
/* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
|
|
|
887953 |
- * is taken mount2 opened the same file, it won't be able to
|
|
|
887953 |
- * perform any data operations until mount1 releases eager-lock.
|
|
|
887953 |
- * To avoid such scenario do not enable eager-lock for this transaction
|
|
|
887953 |
- * if open-fd-count is > 1
|
|
|
887953 |
+ * is taken mount2 opened the same file, it won't be able to perform
|
|
|
887953 |
+ * any {meta,}data operations until mount1 releases eager-lock. To
|
|
|
887953 |
+ * avoid such scenario do not enable eager-lock for this transaction if
|
|
|
887953 |
+ * open-fd-count is > 1 for metadata transactions and if
|
|
|
887953 |
+ * num-inodelks > 1 for data transactions
|
|
|
887953 |
*/
|
|
|
887953 |
|
|
|
887953 |
- if (local->inode_ctx->open_fd_count > 1)
|
|
|
887953 |
- return _gf_true;
|
|
|
887953 |
+ if (local->transaction.type == AFR_METADATA_TRANSACTION) {
|
|
|
887953 |
+ if (local->inode_ctx->open_fd_count > 1) {
|
|
|
887953 |
+ return _gf_true;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ } else if (local->transaction.type == AFR_DATA_TRANSACTION) {
|
|
|
887953 |
+ if (lock->num_inodelks > 1) {
|
|
|
887953 |
+ return _gf_true;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
|
|
|
887953 |
return _gf_false;
|
|
|
887953 |
}
|
|
|
887953 |
@@ -1890,7 +1901,7 @@ afr_is_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this,
|
|
|
887953 |
goto out;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- if (afr_are_multiple_fds_opened (local, this)) {
|
|
|
887953 |
+ if (afr_are_conflicting_ops_waiting(local, this)) {
|
|
|
887953 |
lock->release = _gf_true;
|
|
|
887953 |
goto out;
|
|
|
887953 |
}
|
|
|
887953 |
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
index 76ad292..afe4a73 100644
|
|
|
887953 |
--- a/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
+++ b/xlators/cluster/afr/src/afr.h
|
|
|
887953 |
@@ -302,6 +302,12 @@ typedef enum {
|
|
|
887953 |
} afr_fop_lock_state_t;
|
|
|
887953 |
|
|
|
887953 |
typedef struct _afr_inode_lock_t {
|
|
|
887953 |
+ /* @num_inodelks:
|
|
|
887953 |
+ Number of inodelks queried from the server, as queried through
|
|
|
887953 |
+ xdata in FOPs. Currently, used to decide if eager-locking must be
|
|
|
887953 |
+ temporarily disabled.
|
|
|
887953 |
+ */
|
|
|
887953 |
+ int32_t num_inodelks;
|
|
|
887953 |
unsigned int event_generation;
|
|
|
887953 |
gf_boolean_t release;
|
|
|
887953 |
gf_boolean_t acquired;
|
|
|
887953 |
@@ -354,6 +360,8 @@ typedef struct _afr_local {
|
|
|
887953 |
|
|
|
887953 |
uint32_t open_fd_count;
|
|
|
887953 |
gf_boolean_t update_open_fd_count;
|
|
|
887953 |
+ int32_t num_inodelks;
|
|
|
887953 |
+ gf_boolean_t update_num_inodelks;
|
|
|
887953 |
|
|
|
887953 |
gf_lkowner_t saved_lk_owner;
|
|
|
887953 |
|
|
|
887953 |
--
|
|
|
887953 |
1.8.3.1
|
|
|
887953 |
|