From 80eea6bf07445a290021a8936a01cfd7aa696c1b Mon Sep 17 00:00:00 2001
From: Anuradha Talur <atalur@redhat.com>
Date: Thu, 25 Aug 2016 11:46:25 +0530
Subject: [PATCH 79/86] afr: Consume compound fops in afr transaction
Backport of: http://review.gluster.org/15014
Change-Id: I7920fb69ad401f3c159565ad70b44f2b31a763a9
BUG: 1360978
Signed-off-by: Anuradha Talur <atalur@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/84816
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
libglusterfs/src/globals.h | 2 +
xlators/cluster/afr/src/afr-common.c | 55 ++++
xlators/cluster/afr/src/afr-inode-write.c | 73 +++--
xlators/cluster/afr/src/afr-lk-common.c | 26 +-
xlators/cluster/afr/src/afr-transaction.c | 391 +++++++++++++++++++++--
xlators/cluster/afr/src/afr-transaction.h | 4 +
xlators/cluster/afr/src/afr.c | 12 +
xlators/cluster/afr/src/afr.h | 40 +++
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 +
9 files changed, 546 insertions(+), 64 deletions(-)
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index b4ad9b2..f1f8ee1 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -79,6 +79,8 @@
#define GD_OP_VERSION_3_8_0 30800 /* Op-version for GlusterFS 3.8.0 */
+#define GD_OP_VERSION_3_8_4 30804 /* Op-version for GlusterFS 3.8.4 */
+
#define GD_OP_VERSION_3_9_0 30900 /* Op-version for GlusterFS 3.9.0 */
#include "xlator.h"
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 8f0de59..db6a350 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -43,6 +43,7 @@
#include "afr-self-heal.h"
#include "afr-self-heald.h"
#include "afr-messages.h"
+#include "compound-fop-utils.h"
call_frame_t *
afr_copy_frame (call_frame_t *base)
@@ -4475,6 +4476,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
local->need_full_crawl = _gf_false;
+ local->compound = _gf_false;
INIT_LIST_HEAD (&local->healer);
return 0;
out:
@@ -4626,6 +4628,7 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
if (!local->pending)
goto out;
+ local->compound = _gf_false;
INIT_LIST_HEAD (&local->transaction.eager_locked);
ret = 0;
@@ -5419,3 +5422,55 @@ afr_get_msg_id (char *op_type)
return AFR_MSG_ADD_BRICK_STATUS;
return -1;
}
+
+gf_boolean_t
+afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop)
+{
+ if (priv->arbiter_count != 0)
+ return _gf_false;
+
+ if (!priv->use_compound_fops)
+ return _gf_false;
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+}
+
+afr_compound_cbk_t
+afr_pack_fop_args (call_frame_t *frame, compound_args_t *args,
+ glusterfs_fop_t fop, int index)
+{
+ afr_local_t *local = frame->local;
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ COMPOUND_PACK_ARGS (writev, GF_FOP_WRITE,
+ args, index,
+ local->fd, local->cont.writev.vector,
+ local->cont.writev.count,
+ local->cont.writev.offset,
+ local->cont.writev.flags,
+ local->cont.writev.iobref,
+ local->xdata_req);
+ return afr_pre_op_writev_cbk;
+ default:
+ break;
+ }
+ return NULL;
+}
+
+void
+afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
+ dict_t *newloc_xdata)
+{
+ if (args)
+ compound_args_cleanup (args);
+ if (xdata)
+ dict_unref (xdata);
+ if (newloc_xdata)
+ dict_unref (newloc_xdata);
+}
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 24ab52f..200b420 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -292,21 +292,16 @@ afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
}
}
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+void
+afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- call_frame_t *fop_frame = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
int ret = 0;
+ afr_local_t *local = frame->local;
uint32_t open_fd_count = 0;
uint32_t write_is_append = 0;
- local = frame->local;
-
LOCK (&frame->lock);
{
__afr_inode_write_fill (frame, this, child_index, op_ret,
@@ -324,32 +319,60 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
&open_fd_count);
if (ret == -1)
goto unlock;
- if ((open_fd_count > local->open_fd_count)) {
- local->open_fd_count = open_fd_count;
- local->update_open_fd_count = _gf_true;
+ if (open_fd_count > local->open_fd_count) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
}
}
unlock:
UNLOCK (&frame->lock);
+}
- call_count = afr_frame_return (frame);
+void
+afr_process_post_writev (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
- if (call_count == 0) {
- if (!local->stable_write && !local->append_write)
- /* An appended write removes the necessity to
- fsync() the file. This is because self-heal
- has the logic to check for larger file when
- the xattrs are not reliably pointing at
- a stale file.
- */
- afr_fd_report_unstable_write (this, local->fd);
+ local = frame->local;
- __afr_inode_write_finalize (frame, this);
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write (this, local->fd);
+
+ __afr_inode_write_finalize (frame, this);
- afr_writev_handle_short_writes (frame, this);
+ afr_writev_handle_short_writes (frame, this);
- if (local->update_open_fd_count)
- afr_handle_open_fd_count (frame, this);
+ if (local->update_open_fd_count)
+ afr_handle_open_fd_count (frame, this);
+
+}
+
+int
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long) cookie;
+ int call_count = -1;
+ int ret = 0;
+
+ local = frame->local;
+
+ afr_inode_write_fill (frame, this, child_index, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_process_post_writev (frame, this);
if (!afr_txn_nothing_failed (frame, this)) {
//Don't unwind until post-op is complete
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index c2a5f52..0bd9ffe 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -463,8 +463,8 @@ transaction_lk_op (afr_local_t *local)
}
-static int
-is_afr_lock_transaction (afr_local_t *local)
+int
+afr_is_inodelk_transaction(afr_local_t *local)
{
int ret = 0;
@@ -636,13 +636,25 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
return 0;
}
+void
+afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock,
+ int32_t child_index)
+{
+ afr_inodelk_t *inodelk = NULL;
+
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+ inodelk->locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.eager_lock)
+ local->transaction.eager_lock[child_index] = 0;
+
+}
+
static int32_t
afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
afr_local_t *local = NULL;
afr_internal_lock_t *int_lock = NULL;
- afr_inodelk_t *inodelk = NULL;
int32_t child_index = (long)cookie;
afr_private_t *priv = NULL;
@@ -665,11 +677,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
lkowner_utoa (&frame->root->lk_owner));
}
-
- inodelk = afr_get_inodelk (int_lock, int_lock->domain);
- inodelk->locked_nodes[child_index] &= LOCKED_NO;
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
+ afr_update_uninodelk (local, int_lock, child_index);
afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
@@ -1712,7 +1720,7 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
local = frame->local;
if (transaction_lk_op (local)) {
- if (is_afr_lock_transaction (local))
+ if (afr_is_inodelk_transaction(local))
afr_unlock_inodelk (frame, this);
else
afr_unlock_entrylk (frame, this);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index fae65d9..27be045 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -17,6 +17,7 @@
#include "afr-transaction.h"
#include "afr-self-heal.h"
#include "afr-messages.h"
+#include "compound-fop-utils.h"
#include <signal.h>
@@ -32,6 +33,14 @@ gf_boolean_t
afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);
int
+afr_changelog_call_count (afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned int child_count);
+int
+afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op);
+int
afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
afr_changelog_resume_t changelog_resume,
afr_xattrop_type_t op);
@@ -820,14 +829,16 @@ afr_handle_quorum (call_frame_t *frame)
int
afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = this->private;
- int i = 0;
- int ret = 0;
- int idx = 0;
- afr_local_t * local = NULL;
- dict_t *xattr = NULL;
- int nothing_failed = 1;
- gf_boolean_t need_undirty = _gf_false;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
+ int ret = 0;
+ int idx = 0;
+ int nothing_failed = 1;
+ int piggyback = 0;
+ gf_boolean_t need_undirty = _gf_false;
afr_handle_quorum (frame);
local = frame->local;
@@ -892,8 +903,34 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
goto out;
}
- afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done,
- AFR_TRANSACTION_POST_OP);
+ if (local->compound && local->fd) {
+ LOCK (&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get (local->fd, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ local->transaction.eager_lock[i]) {
+ if (fd_ctx->lock_piggyback[i])
+ piggyback = 1;
+ }
+ if (piggyback == 1)
+ break;
+ }
+ }
+ UNLOCK (&local->fd->lock);
+ }
+
+ /* Do not compound if any brick got piggybacked lock as
+ * unlock should not be done for that. */
+ if (local->compound && !piggyback) {
+ afr_post_op_unlock_do (frame, this, xattr,
+ afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
+ } else {
+ afr_changelog_do (frame, this, xattr,
+ afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
+ }
out:
if (xattr)
dict_unref (xattr);
@@ -1188,6 +1225,299 @@ out:
}
int
+afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ void *data, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *fop_frame = NULL;
+ default_args_cbk_t *write_args_cbk = NULL;
+ compound_args_cbk_t *args_cbk = data;
+ int call_count = -1;
+ int child_index = -1;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ child_index = (long) cookie;
+
+ if (local->pre_op_compat)
+ afr_changelog_pre_op_update (frame, this);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ afr_transaction_fop_failed (frame, this, child_index);
+ }
+ write_args_cbk = &args_cbk->rsp_list[1];
+ afr_inode_write_fill (frame, this, (long) i, write_args_cbk->op_ret,
+ write_args_cbk->op_errno,
+ &write_args_cbk->prestat,
+ &write_args_cbk->poststat,
+ write_args_cbk->xdata);
+
+ call_count = afr_frame_return (frame);
+
+ if (call_count == 0) {
+ afr_process_post_writev (frame, this);
+ if (!afr_txn_nothing_failed (frame, this)) {
+ /* Don't unwind until post-op is complete */
+ local->transaction.resume (frame, this);
+ } else {
+ /* frame change, place frame in post-op delay and unwind */
+ fop_frame = afr_transaction_detach_fop_frame (frame);
+ afr_writev_copy_outvars (frame, fop_frame);
+ local->transaction.resume (frame, this);
+ afr_writev_unwind (fop_frame, this);
+ }
+ }
+ return 0;
+}
+
+int
+afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op, dict_t **xdata,
+ dict_t **newloc_xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ *call_count = afr_changelog_call_count (local->transaction.type,
+ local->transaction.pre_op,
+ priv->child_count);
+
+ if (*call_count == 0) {
+ changelog_resume (frame, this);
+ return -1;
+ }
+
+ afr_changelog_populate_xdata (frame, op, xdata, newloc_xdata);
+ local->call_count = *call_count;
+
+ local->transaction.changelog_resume = changelog_resume;
+ return 0;
+}
+
+int
+afr_pre_op_fop_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ dict_t *newloc_xdata = NULL;
+ compound_args_t *args = NULL;
+ int i = 0, call_count = 0;
+ afr_compound_cbk_t compound_cbk;
+ int ret = 0;
+ int op_errno = ENOMEM;
+
+ local = frame->local;
+ priv = this->private;
+
+ /* If lock failed on all, just unlock and unwind */
+ ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+ op, &xdata, &newloc_xdata);
+
+ if (ret)
+ return 0;
+
+ local->call_count = call_count;
+
+ afr_save_lk_owner (frame);
+ frame->root->lk_owner =
+ local->transaction.main_frame->root->lk_owner;
+
+ args = compound_fop_alloc (2, GF_CFOP_XATTROP_WRITEV, NULL);
+
+ if (!args)
+ goto err;
+
+ /* pack pre-op part */
+ i = 0;
+ COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP,
+ args, i,
+ local->fd, GF_XATTROP_ADD_ARRAY,
+ xattr, xdata);
+ i++;
+ /* pack whatever fop needs to be packed
+ * @compound_cbk holds the cbk that would need to be called
+ */
+ compound_cbk = afr_pack_fop_args (frame, args, local->op, i);
+
+ for (i = 0; i < priv->child_count; i++) {
+ /* Means lock did not succeed on this brick */
+ if (!local->transaction.pre_op[i])
+ continue;
+
+ STACK_WIND_COOKIE (frame, compound_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->compound,
+ args,
+ NULL);
+ if (!--call_count)
+ break;
+ }
+
+ afr_compound_cleanup (args, xdata, newloc_xdata);
+ return 0;
+err:
+ local->internal_lock.lock_cbk = local->transaction.done;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_restore_lk_owner (frame);
+ afr_unlock (frame, this);
+
+ afr_compound_cleanup (args, xdata, newloc_xdata);
+ return 0;
+}
+
+int
+afr_post_op_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ void *data, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ compound_args_cbk_t *args_cbk = data;
+ int call_count = -1;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ int32_t child_index = (long)cookie;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ child_index = (long) cookie;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ afr_update_uninodelk (local, int_lock, child_index);
+
+ LOCK (&frame->lock);
+ {
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK (&frame->lock);
+
+ if (call_count == 0) {
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+ gf_msg_trace (this->name, 0,
+ "All internal locks unlocked");
+ int_lock->lock_cbk (frame, this);
+ }
+
+ return 0;
+}
+
+int
+afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ dict_t *newloc_xdata = NULL;
+ compound_args_t *args = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_inodelk_t *inodelk = NULL;
+ struct gf_flock *flock_use = NULL;
+ int i = 0;
+ int call_count = 0;
+ struct gf_flock flock = {0,};
+ struct gf_flock full_flock = {0,};
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+
+ if (afr_is_inodelk_transaction(local)) {
+ inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+ flock.l_start = inodelk->flock.l_start;
+ flock.l_len = inodelk->flock.l_len;
+ flock.l_type = F_UNLCK;
+ full_flock.l_type = F_UNLCK;
+
+ }
+
+ ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+ op, &xdata, &newloc_xdata);
+
+ if (ret)
+ return 0;
+
+ int_lock->lk_call_count = call_count;
+
+ int_lock->lock_cbk = local->transaction.done;
+
+ args = compound_fop_alloc (2, GF_CFOP_XATTROP_UNLOCK, NULL);
+
+ if (!args) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ afr_changelog_post_op_done (frame, this);
+ goto out;
+ }
+
+ i = 0;
+ COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP,
+ args, i,
+ local->fd, GF_XATTROP_ADD_ARRAY,
+ xattr, xdata);
+ i++;
+ if (!local->transaction.eager_lock_on)
+ flock_use = &flock;
+ else
+ flock_use = &full_flock;
+
+ if (afr_is_inodelk_transaction(local)) {
+ if (local->fd) {
+ COMPOUND_PACK_ARGS (finodelk, GF_FOP_FINODELK,
+ args, i,
+ int_lock->domain, local->fd,
+ F_SETLK, flock_use, NULL);
+ } else {
+ COMPOUND_PACK_ARGS (inodelk, GF_FOP_INODELK,
+ args, i,
+ int_lock->domain, &local->loc,
+ F_SETLK, flock_use, NULL);
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ /* pre_op[i] has to be true for all nodes that were
+ * successfully locked. */
+ if (!local->transaction.pre_op[i])
+ continue;
+ STACK_WIND_COOKIE (frame, afr_post_op_unlock_cbk,
+ (void *) (long) i,
+ priv->children[i],
+ priv->children[i]->fops->compound,
+ args,
+ NULL);
+ if (!--call_count)
+ break;
+ }
+out:
+ afr_compound_cleanup (args, xdata, newloc_xdata);
+ return 0;
+}
+
+int
afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
afr_changelog_resume_t changelog_resume,
afr_xattrop_type_t op)
@@ -1198,23 +1528,16 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
dict_t *newloc_xdata = NULL;
int i = 0;
int call_count = 0;
+ int ret = 0;
local = frame->local;
priv = this->private;
- call_count = afr_changelog_call_count (local->transaction.type,
- local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- changelog_resume (frame, this);
- return 0;
- }
-
- afr_changelog_populate_xdata (frame, op, &xdata, &newloc_xdata);
- local->call_count = call_count;
+ ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+ op, &xdata, &newloc_xdata);
- local->transaction.changelog_resume = changelog_resume;
+ if (ret)
+ return 0;
for (i = 0; i < priv->child_count; i++) {
if (!local->transaction.pre_op[i])
@@ -1379,8 +1702,21 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
goto next;
}
- afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop,
- AFR_TRANSACTION_PRE_OP);
+ /* Till here we have already decided if pre-op needs to be done,
+ * based on various criteria. The only thing that needs to be checked
+ * now on is whether compound-fops is enabled or not.
+ * If it is, then perform pre-op and fop together for writev op.
+ */
+ if (afr_can_compound_pre_op_and_op (priv, local->op)) {
+ local->compound = _gf_true;
+ afr_pre_op_fop_do (frame, this, xdata_req,
+ afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
+ } else {
+ afr_changelog_do (frame, this, xdata_req,
+ afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
+ }
if (xdata_req)
dict_unref (xdata_req);
@@ -1737,10 +2073,6 @@ out:
void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
- call_stub_t *stub);
-
-void
afr_delayed_changelog_wake_up_cbk (void *data)
{
fd_t *fd = NULL;
@@ -2030,7 +2362,6 @@ afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
afr_delayed_changelog_post_op (this, NULL, fd, NULL);
}
-
int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
@@ -2081,7 +2412,7 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
- static gf_boolean_t
+static gf_boolean_t
afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
{
uint64_t start1 = local1->transaction.start;
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index ca8fcfe..db82456 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -59,4 +59,8 @@ void
afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
inode_t *inode1, unsigned char *readable1,
inode_t *inode2, unsigned char *readable2);
+int
+afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno,
+ void *data, dict_t *xdata);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 6f4783c..d38d59c 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -209,6 +209,9 @@ reconfigure (xlator_t *this, dict_t *options)
out);
GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,
out);
+ GF_OPTION_RECONF ("use-compound-fops", priv->use_compound_fops,
+ options, bool,
+ out);
GF_OPTION_RECONF ("granular-entry-heal", priv->esh_granular, options,
bool, out);
@@ -412,6 +415,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out);
+ GF_OPTION_INIT ("use-compound-fops", priv->use_compound_fops,
+ bool, out);
GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out);
GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
@@ -932,5 +937,12 @@ struct volume_options options[] = {
" with identical mtime and size in more than half the "
"number of bricks in the replica.",
},
+ { .key = {"use-compound-fops"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "Use compound fops framework to modify afr "
+ "transaction such that network roundtrips are "
+ "reduced, thus improving the performance.",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 4bffc30..d04775d 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -45,6 +45,11 @@ typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int
typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
+typedef int (*afr_compound_cbk_t) (call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ void *data, dict_t *xdata);
+
+
#define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr;})
#define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})
#define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})
@@ -153,6 +158,7 @@ typedef struct _afr_private {
gf_boolean_t use_afr_in_pump;
char *locking_scheme;
gf_boolean_t esh_granular;
+ gf_boolean_t use_compound_fops;
} afr_private_t;
@@ -782,6 +788,7 @@ typedef struct _afr_local {
call_frame_t *heal_frame;
gf_boolean_t need_full_crawl;
+ gf_boolean_t compound;
} afr_local_t;
@@ -1161,4 +1168,37 @@ afr_get_msg_id (char *op_type);
int
afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
inode_t *inode);
+
+void
+afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+void
+afr_process_post_writev (call_frame_t *frame, xlator_t *this);
+
+void
+afr_writev_unwind (call_frame_t *frame, xlator_t *this);
+
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame);
+
+void
+afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock,
+ int32_t child_index);
+gf_boolean_t
+afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop);
+
+afr_compound_cbk_t
+afr_pack_fop_args (call_frame_t *frame, compound_args_t *args,
+ glusterfs_fop_t fop, int index);
+int
+afr_is_inodelk_transaction(afr_local_t *local);
+
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+
+void
+afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
+ dict_t *newloc_xdata);
+
#endif /* __AFR_H__ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index a93c8d4..bd9b21b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2970,6 +2970,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_3_8_0,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "cluster.use-compound-fops",
+ .voltype = "cluster/replicate",
+ .value = "off",
+ .type = DOC,
+ .op_version = GD_OP_VERSION_3_8_4,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
{ .key = NULL
}
};
--
1.7.1