Blob Blame History Raw
From 80eea6bf07445a290021a8936a01cfd7aa696c1b Mon Sep 17 00:00:00 2001
From: Anuradha Talur <atalur@redhat.com>
Date: Thu, 25 Aug 2016 11:46:25 +0530
Subject: [PATCH 79/86] afr: Consume compound fops in afr transaction

	Backport of: http://review.gluster.org/15014

Change-Id: I7920fb69ad401f3c159565ad70b44f2b31a763a9
BUG: 1360978
Signed-off-by: Anuradha Talur <atalur@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/84816
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
 libglusterfs/src/globals.h                      |    2 +
 xlators/cluster/afr/src/afr-common.c            |   55 ++++
 xlators/cluster/afr/src/afr-inode-write.c       |   73 +++--
 xlators/cluster/afr/src/afr-lk-common.c         |   26 +-
 xlators/cluster/afr/src/afr-transaction.c       |  391 +++++++++++++++++++++--
 xlators/cluster/afr/src/afr-transaction.h       |    4 +
 xlators/cluster/afr/src/afr.c                   |   12 +
 xlators/cluster/afr/src/afr.h                   |   40 +++
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |    7 +
 9 files changed, 546 insertions(+), 64 deletions(-)

diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
index b4ad9b2..f1f8ee1 100644
--- a/libglusterfs/src/globals.h
+++ b/libglusterfs/src/globals.h
@@ -79,6 +79,8 @@
 
 #define GD_OP_VERSION_3_8_0    30800 /* Op-version for GlusterFS 3.8.0 */
 
+#define GD_OP_VERSION_3_8_4    30804 /* Op-version for GlusterFS 3.8.4 */
+
 #define GD_OP_VERSION_3_9_0    30900 /* Op-version for GlusterFS 3.9.0 */
 
 #include "xlator.h"
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 8f0de59..db6a350 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -43,6 +43,7 @@
 #include "afr-self-heal.h"
 #include "afr-self-heald.h"
 #include "afr-messages.h"
+#include "compound-fop-utils.h"
 
 call_frame_t *
 afr_copy_frame (call_frame_t *base)
@@ -4475,6 +4476,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
 
         local->need_full_crawl = _gf_false;
 
+        local->compound = _gf_false;
         INIT_LIST_HEAD (&local->healer);
 	return 0;
 out:
@@ -4626,6 +4628,7 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)
         if (!local->pending)
                 goto out;
 
+        local->compound = _gf_false;
 	INIT_LIST_HEAD (&local->transaction.eager_locked);
 
         ret = 0;
@@ -5419,3 +5422,55 @@ afr_get_msg_id (char *op_type)
                 return AFR_MSG_ADD_BRICK_STATUS;
         return -1;
 }
+
+gf_boolean_t
+afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop)
+{
+        if (priv->arbiter_count != 0)
+                return _gf_false;
+
+        if (!priv->use_compound_fops)
+                return _gf_false;
+
+        switch (fop) {
+        case GF_FOP_WRITE:
+                return _gf_true;
+        default:
+                return _gf_false;
+        }
+}
+
+afr_compound_cbk_t
+afr_pack_fop_args (call_frame_t *frame, compound_args_t *args,
+                   glusterfs_fop_t fop, int index)
+{
+        afr_local_t     *local  = frame->local;
+
+        switch (fop) {
+        case GF_FOP_WRITE:
+                COMPOUND_PACK_ARGS (writev, GF_FOP_WRITE,
+                                    args, index,
+                                    local->fd, local->cont.writev.vector,
+                                    local->cont.writev.count,
+                                    local->cont.writev.offset,
+                                    local->cont.writev.flags,
+                                    local->cont.writev.iobref,
+                                    local->xdata_req);
+                return afr_pre_op_writev_cbk;
+        default:
+                break;
+        }
+        return NULL;
+}
+
+void
+afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
+                      dict_t *newloc_xdata)
+{
+        if (args)
+                compound_args_cleanup (args);
+	if (xdata)
+		dict_unref (xdata);
+        if (newloc_xdata)
+                dict_unref (newloc_xdata);
+}
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 24ab52f..200b420 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -292,21 +292,16 @@ afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)
         }
 }
 
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+void
+afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
                      int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
                      struct iatt *postbuf, dict_t *xdata)
 {
-        afr_local_t *   local = NULL;
-        call_frame_t    *fop_frame = NULL;
-        int child_index = (long) cookie;
-        int call_count  = -1;
         int ret = 0;
+        afr_local_t *local = frame->local;
         uint32_t open_fd_count = 0;
         uint32_t write_is_append = 0;
 
-        local = frame->local;
-
         LOCK (&frame->lock);
         {
                 __afr_inode_write_fill (frame, this, child_index, op_ret,
@@ -324,32 +319,60 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 				       &open_fd_count);
 		if (ret == -1)
 			goto unlock;
-		if ((open_fd_count > local->open_fd_count)) {
-			local->open_fd_count = open_fd_count;
-			local->update_open_fd_count = _gf_true;
+		if (open_fd_count > local->open_fd_count) {
+                        local->open_fd_count = open_fd_count;
+                        local->update_open_fd_count = _gf_true;
 		}
         }
 unlock:
         UNLOCK (&frame->lock);
+}
 
-        call_count = afr_frame_return (frame);
+void
+afr_process_post_writev (call_frame_t *frame, xlator_t *this)
+{
+        afr_local_t     *local = NULL;
 
-        if (call_count == 0) {
-		if (!local->stable_write && !local->append_write)
-			/* An appended write removes the necessity to
-			   fsync() the file. This is because self-heal
-			   has the logic to check for larger file when
-			   the xattrs are not reliably pointing at
-			   a stale file.
-			*/
-			afr_fd_report_unstable_write (this, local->fd);
+        local = frame->local;
 
-		__afr_inode_write_finalize (frame, this);
+        if (!local->stable_write && !local->append_write)
+                /* An appended write removes the necessity to
+                   fsync() the file. This is because self-heal
+                   has the logic to check for larger file when
+                   the xattrs are not reliably pointing at
+                   a stale file.
+                */
+                afr_fd_report_unstable_write (this, local->fd);
+
+        __afr_inode_write_finalize (frame, this);
 
-                afr_writev_handle_short_writes (frame, this);
+        afr_writev_handle_short_writes (frame, this);
 
-                if (local->update_open_fd_count)
-                        afr_handle_open_fd_count (frame, this);
+        if (local->update_open_fd_count)
+                afr_handle_open_fd_count (frame, this);
+
+}
+
+int
+afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+                     struct iatt *postbuf, dict_t *xdata)
+{
+        afr_local_t     *local = NULL;
+        call_frame_t    *fop_frame = NULL;
+        int child_index = (long) cookie;
+        int call_count  = -1;
+        int ret = 0;
+
+        local = frame->local;
+
+        afr_inode_write_fill (frame, this, child_index, op_ret, op_errno,
+                              prebuf, postbuf, xdata);
+
+        call_count = afr_frame_return (frame);
+
+        if (call_count == 0) {
+                afr_process_post_writev (frame, this);
 
                 if (!afr_txn_nothing_failed (frame, this)) {
                         //Don't unwind until post-op is complete
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index c2a5f52..0bd9ffe 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -463,8 +463,8 @@ transaction_lk_op (afr_local_t *local)
 
 }
 
-static int
-is_afr_lock_transaction (afr_local_t *local)
+int
+afr_is_inodelk_transaction(afr_local_t *local)
 {
         int ret = 0;
 
@@ -636,13 +636,25 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         return 0;
 }
 
+void
+afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock,
+                    int32_t child_index)
+{
+        afr_inodelk_t       *inodelk = NULL;
+
+        inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+        inodelk->locked_nodes[child_index] &= LOCKED_NO;
+        if (local->transaction.eager_lock)
+                local->transaction.eager_lock[child_index] = 0;
+
+}
+
 static int32_t
 afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         int32_t op_ret, int32_t op_errno, dict_t *xdata)
 {
         afr_local_t         *local = NULL;
         afr_internal_lock_t *int_lock = NULL;
-        afr_inodelk_t       *inodelk = NULL;
         int32_t             child_index = (long)cookie;
         afr_private_t       *priv = NULL;
 
@@ -665,11 +677,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         lkowner_utoa (&frame->root->lk_owner));
         }
 
-
-        inodelk = afr_get_inodelk (int_lock, int_lock->domain);
-        inodelk->locked_nodes[child_index] &= LOCKED_NO;
-        if (local->transaction.eager_lock)
-                local->transaction.eager_lock[child_index] = 0;
+        afr_update_uninodelk (local, int_lock, child_index);
 
         afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
 
@@ -1712,7 +1720,7 @@ afr_unlock (call_frame_t *frame, xlator_t *this)
         local = frame->local;
 
         if (transaction_lk_op (local)) {
-                if (is_afr_lock_transaction (local))
+                if (afr_is_inodelk_transaction(local))
                         afr_unlock_inodelk (frame, this);
                 else
                         afr_unlock_entrylk (frame, this);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index fae65d9..27be045 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -17,6 +17,7 @@
 #include "afr-transaction.h"
 #include "afr-self-heal.h"
 #include "afr-messages.h"
+#include "compound-fop-utils.h"
 
 #include <signal.h>
 
@@ -32,6 +33,14 @@ gf_boolean_t
 afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);
 
 int
+afr_changelog_call_count (afr_transaction_type type,
+                          unsigned char *pre_op_subvols,
+                          unsigned int child_count);
+int
+afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+                       afr_changelog_resume_t changelog_resume,
+                       afr_xattrop_type_t op);
+int
 afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
 		  afr_changelog_resume_t changelog_resume,
                   afr_xattrop_type_t op);
@@ -820,14 +829,16 @@ afr_handle_quorum (call_frame_t *frame)
 int
 afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
 {
-        afr_private_t * priv = this->private;
-        int i          = 0;
-	int ret = 0;
-	int idx = 0;
-        afr_local_t *  local = NULL;
-        dict_t        *xattr = NULL;
-        int            nothing_failed = 1;
-	gf_boolean_t   need_undirty = _gf_false;
+        afr_private_t           *priv           = this->private;
+        afr_local_t             *local          = NULL;
+        dict_t                  *xattr          = NULL;
+        afr_fd_ctx_t            *fd_ctx         = NULL;
+        int                     i               = 0;
+        int                     ret             = 0;
+        int                     idx             = 0;
+        int                     nothing_failed  = 1;
+        int                     piggyback       = 0;
+        gf_boolean_t            need_undirty    = _gf_false;
 
         afr_handle_quorum (frame);
         local = frame->local;
@@ -892,8 +903,34 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
 		goto out;
 	}
 
-	afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done,
-                          AFR_TRANSACTION_POST_OP);
+        if (local->compound && local->fd) {
+                LOCK (&local->fd->lock);
+                {
+                        fd_ctx = __afr_fd_ctx_get (local->fd, this);
+                        for (i = 0; i < priv->child_count; i++) {
+                                if (local->transaction.pre_op[i] &&
+                                    local->transaction.eager_lock[i]) {
+                                        if (fd_ctx->lock_piggyback[i])
+                                                piggyback = 1;
+                                }
+                                if (piggyback == 1)
+                                        break;
+                        }
+                }
+                UNLOCK (&local->fd->lock);
+        }
+
+        /* Do not compound if any brick got piggybacked lock as
+         * unlock should not be done for that. */
+        if (local->compound && !piggyback) {
+                afr_post_op_unlock_do (frame, this, xattr,
+                                       afr_changelog_post_op_done,
+                                       AFR_TRANSACTION_POST_OP);
+        } else {
+                afr_changelog_do (frame, this, xattr,
+                                  afr_changelog_post_op_done,
+                                  AFR_TRANSACTION_POST_OP);
+        }
 out:
 	if (xattr)
                 dict_unref (xattr);
@@ -1188,6 +1225,299 @@ out:
 }
 
 int
+afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                       int op_ret, int op_errno,
+                       void *data, dict_t *xdata)
+{
+        afr_local_t *local = NULL;
+        afr_private_t *priv = NULL;
+        call_frame_t    *fop_frame = NULL;
+        default_args_cbk_t *write_args_cbk = NULL;
+        compound_args_cbk_t *args_cbk = data;
+        int call_count = -1;
+        int child_index = -1;
+        int i = 0;
+
+        local = frame->local;
+        priv = this->private;
+        child_index = (long) cookie;
+
+	if (local->pre_op_compat)
+		afr_changelog_pre_op_update (frame, this);
+
+        if (op_ret == -1) {
+                local->op_errno = op_errno;
+		afr_transaction_fop_failed (frame, this, child_index);
+        }
+        write_args_cbk = &args_cbk->rsp_list[1];
+        afr_inode_write_fill  (frame, this, (long) i, write_args_cbk->op_ret,
+                               write_args_cbk->op_errno,
+                               &write_args_cbk->prestat,
+                               &write_args_cbk->poststat,
+                               write_args_cbk->xdata);
+
+	call_count = afr_frame_return (frame);
+
+        if (call_count == 0) {
+                afr_process_post_writev (frame, this);
+                if (!afr_txn_nothing_failed (frame, this)) {
+                        /* Don't unwind until post-op is complete */
+                        local->transaction.resume (frame, this);
+                } else {
+                /* frame change, place frame in post-op delay and unwind */
+                        fop_frame = afr_transaction_detach_fop_frame (frame);
+                        afr_writev_copy_outvars (frame, fop_frame);
+                        local->transaction.resume (frame, this);
+                        afr_writev_unwind (fop_frame, this);
+                }
+        }
+        return 0;
+}
+
+int
+afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count,
+                       afr_changelog_resume_t changelog_resume,
+                       afr_xattrop_type_t op, dict_t **xdata,
+                       dict_t **newloc_xdata)
+{
+        afr_private_t *priv  = NULL;
+        afr_local_t   *local = NULL;
+
+        local = frame->local;
+        priv = this->private;
+
+        *call_count = afr_changelog_call_count (local->transaction.type,
+                                               local->transaction.pre_op,
+                                               priv->child_count);
+
+        if (*call_count == 0) {
+                changelog_resume (frame, this);
+                return -1;
+        }
+
+        afr_changelog_populate_xdata (frame, op, xdata, newloc_xdata);
+        local->call_count = *call_count;
+
+        local->transaction.changelog_resume = changelog_resume;
+        return 0;
+}
+
+int
+afr_pre_op_fop_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+                   afr_changelog_resume_t changelog_resume,
+                   afr_xattrop_type_t op)
+{
+        afr_local_t *local = NULL;
+        afr_private_t *priv = NULL;
+        dict_t *xdata = NULL;
+        dict_t *newloc_xdata = NULL;
+        compound_args_t *args = NULL;
+        int i = 0, call_count = 0;
+        afr_compound_cbk_t compound_cbk;
+        int ret = 0;
+        int op_errno = ENOMEM;
+
+        local = frame->local;
+        priv = this->private;
+
+        /* If lock failed on all, just unlock and unwind */
+        ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+                                     op, &xdata, &newloc_xdata);
+
+        if (ret)
+                return 0;
+
+        local->call_count = call_count;
+
+        afr_save_lk_owner (frame);
+        frame->root->lk_owner =
+                local->transaction.main_frame->root->lk_owner;
+
+        args = compound_fop_alloc (2, GF_CFOP_XATTROP_WRITEV, NULL);
+
+        if (!args)
+                goto err;
+
+        /* pack pre-op part */
+        i = 0;
+        COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP,
+                            args, i,
+                            local->fd, GF_XATTROP_ADD_ARRAY,
+                            xattr, xdata);
+        i++;
+        /* pack whatever fop needs to be packed
+         * @compound_cbk holds the cbk that would need to be called
+         */
+        compound_cbk = afr_pack_fop_args (frame, args, local->op, i);
+
+        for (i = 0; i < priv->child_count; i++) {
+                /* Means lock did not succeed on this brick */
+                if (!local->transaction.pre_op[i])
+                        continue;
+
+                STACK_WIND_COOKIE (frame, compound_cbk,
+                                   (void *) (long) i,
+                                   priv->children[i],
+                                   priv->children[i]->fops->compound,
+                                   args,
+                                   NULL);
+                if (!--call_count)
+                        break;
+        }
+
+        afr_compound_cleanup (args, xdata, newloc_xdata);
+        return 0;
+err:
+	local->internal_lock.lock_cbk = local->transaction.done;
+	local->op_ret = -1;
+	local->op_errno = op_errno;
+
+        afr_restore_lk_owner (frame);
+	afr_unlock (frame, this);
+
+        afr_compound_cleanup (args, xdata, newloc_xdata);
+	return 0;
+}
+
+int
+afr_post_op_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                       int op_ret, int op_errno,
+                       void *data, dict_t *xdata)
+{
+        afr_local_t *local = NULL;
+        afr_private_t *priv = NULL;
+        compound_args_cbk_t *args_cbk = data;
+        int call_count = -1;
+        afr_internal_lock_t *int_lock = NULL;
+        afr_inodelk_t       *inodelk = NULL;
+        int32_t             child_index = (long)cookie;
+        int i = 0;
+
+        local = frame->local;
+        priv = this->private;
+        child_index = (long) cookie;
+
+        local = frame->local;
+        int_lock = &local->internal_lock;
+
+        afr_update_uninodelk (local, int_lock, child_index);
+
+        LOCK (&frame->lock);
+        {
+                call_count = --int_lock->lk_call_count;
+        }
+        UNLOCK (&frame->lock);
+
+        if (call_count == 0) {
+                if (local->transaction.resume_stub) {
+                        call_resume (local->transaction.resume_stub);
+                        local->transaction.resume_stub = NULL;
+                }
+                gf_msg_trace (this->name, 0,
+                              "All internal locks unlocked");
+                int_lock->lock_cbk (frame, this);
+        }
+
+        return 0;
+}
+
+int
+afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
+		       afr_changelog_resume_t changelog_resume,
+                       afr_xattrop_type_t op)
+{
+	afr_local_t             *local          = NULL;
+	afr_private_t           *priv           = NULL;
+        dict_t                  *xdata          = NULL;
+        dict_t                  *newloc_xdata   = NULL;
+        compound_args_t         *args           = NULL;
+        afr_internal_lock_t     *int_lock       = NULL;
+        afr_inodelk_t           *inodelk        = NULL;
+        struct gf_flock         *flock_use      = NULL;
+	int                     i               = 0;
+	int                     call_count      = 0;
+        struct gf_flock         flock           = {0,};
+        struct gf_flock         full_flock      = {0,};
+        int                     ret             = 0;
+
+	local = frame->local;
+	priv = this->private;
+        int_lock = &local->internal_lock;
+
+        if (afr_is_inodelk_transaction(local)) {
+                inodelk = afr_get_inodelk (int_lock, int_lock->domain);
+
+                flock.l_start = inodelk->flock.l_start;
+                flock.l_len   = inodelk->flock.l_len;
+                flock.l_type  = F_UNLCK;
+                full_flock.l_type = F_UNLCK;
+
+        }
+
+        ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+                                     op, &xdata, &newloc_xdata);
+
+        if (ret)
+                return 0;
+
+        int_lock->lk_call_count = call_count;
+
+        int_lock->lock_cbk = local->transaction.done;
+
+        args = compound_fop_alloc (2, GF_CFOP_XATTROP_UNLOCK, NULL);
+
+        if (!args) {
+		local->op_ret = -1;
+		local->op_errno = ENOMEM;
+		afr_changelog_post_op_done (frame, this);
+		goto out;
+	}
+
+        i = 0;
+        COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP,
+                            args, i,
+                            local->fd, GF_XATTROP_ADD_ARRAY,
+                            xattr, xdata);
+        i++;
+        if (!local->transaction.eager_lock_on)
+                flock_use = &flock;
+        else
+                flock_use = &full_flock;
+
+        if (afr_is_inodelk_transaction(local)) {
+                if (local->fd) {
+                        COMPOUND_PACK_ARGS (finodelk, GF_FOP_FINODELK,
+                                            args, i,
+                                            int_lock->domain, local->fd,
+                                            F_SETLK, flock_use, NULL);
+                } else {
+                        COMPOUND_PACK_ARGS (inodelk, GF_FOP_INODELK,
+                                            args, i,
+                                            int_lock->domain, &local->loc,
+                                            F_SETLK, flock_use, NULL);
+                }
+        }
+
+        for (i = 0; i < priv->child_count; i++) {
+                /* pre_op[i] has to be true for all nodes that were
+                 * successfully locked. */
+                if (!local->transaction.pre_op[i])
+                        continue;
+                STACK_WIND_COOKIE (frame, afr_post_op_unlock_cbk,
+                                   (void *) (long) i,
+                                   priv->children[i],
+                                   priv->children[i]->fops->compound,
+                                   args,
+                                   NULL);
+                if (!--call_count)
+                        break;
+        }
+out:
+        afr_compound_cleanup (args, xdata, newloc_xdata);
+        return 0;
+}
+
+int
 afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
 		  afr_changelog_resume_t changelog_resume,
                   afr_xattrop_type_t op)
@@ -1198,23 +1528,16 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
         dict_t *newloc_xdata = NULL;
 	int i = 0;
 	int call_count = 0;
+        int ret = 0;
 
 	local = frame->local;
 	priv = this->private;
 
-        call_count = afr_changelog_call_count (local->transaction.type,
-					       local->transaction.pre_op,
-					       priv->child_count);
-
-	if (call_count == 0) {
-		changelog_resume (frame, this);
-		return 0;
-	}
-
-        afr_changelog_populate_xdata (frame, op, &xdata, &newloc_xdata);
-	local->call_count = call_count;
+        ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume,
+                                     op, &xdata, &newloc_xdata);
 
-	local->transaction.changelog_resume = changelog_resume;
+        if (ret)
+                return 0;
 
         for (i = 0; i < priv->child_count; i++) {
                 if (!local->transaction.pre_op[i])
@@ -1379,8 +1702,21 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
 		goto next;
 	}
 
-	afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop,
-                          AFR_TRANSACTION_PRE_OP);
+	/* Till here we have already decided if pre-op needs to be done,
+         * based on various criteria. The only thing that needs to be checked
+         * now on is whether compound-fops is enabled or not.
+         * If it is, then perform pre-op and fop together for writev op.
+         */
+        if (afr_can_compound_pre_op_and_op (priv, local->op)) {
+                local->compound = _gf_true;
+                afr_pre_op_fop_do (frame, this, xdata_req,
+                                   afr_transaction_perform_fop,
+                                   AFR_TRANSACTION_PRE_OP);
+        } else {
+                afr_changelog_do (frame, this, xdata_req,
+                                  afr_transaction_perform_fop,
+                                  AFR_TRANSACTION_PRE_OP);
+        }
 
 	if (xdata_req)
 		dict_unref (xdata_req);
@@ -1737,10 +2073,6 @@ out:
 
 
 void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
-                               call_stub_t *stub);
-
-void
 afr_delayed_changelog_wake_up_cbk (void *data)
 {
         fd_t           *fd = NULL;
@@ -2030,7 +2362,6 @@ afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
         afr_delayed_changelog_post_op (this, NULL, fd, NULL);
 }
 
-
 int
 afr_transaction_resume (call_frame_t *frame, xlator_t *this)
 {
@@ -2081,7 +2412,7 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
 
 
 
-        static gf_boolean_t
+static gf_boolean_t
 afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)
 {
         uint64_t start1 = local1->transaction.start;
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index ca8fcfe..db82456 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -59,4 +59,8 @@ void
 afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,
                       inode_t *inode1, unsigned char *readable1,
                       inode_t *inode2, unsigned char *readable2);
+int
+afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+                       int op_ret, int op_errno,
+                       void *data, dict_t *xdata);
 #endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 6f4783c..d38d59c 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -209,6 +209,9 @@ reconfigure (xlator_t *this, dict_t *options)
                           out);
         GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,
                           out);
+        GF_OPTION_RECONF ("use-compound-fops", priv->use_compound_fops,
+                          options, bool,
+                          out);
         GF_OPTION_RECONF ("granular-entry-heal", priv->esh_granular, options,
                           bool, out);
 
@@ -412,6 +415,8 @@ init (xlator_t *this)
 
         GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);
         GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out);
+        GF_OPTION_INIT ("use-compound-fops", priv->use_compound_fops,
+                        bool, out);
         GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out);
 
         GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
@@ -932,5 +937,12 @@ struct volume_options options[] = {
                          " with identical mtime and size in more than half the "
                          "number of bricks in the replica.",
         },
+        { .key   = {"use-compound-fops"},
+          .type  = GF_OPTION_TYPE_BOOL,
+          .default_value = "no",
+          .description = "Use compound fops framework to modify afr "
+                         "transaction such that network roundtrips are "
+                         "reduced, thus improving the performance.",
+        },
         { .key  = {NULL} },
 };
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 4bffc30..d04775d 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -45,6 +45,11 @@ typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int
 
 typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);
 
+typedef int (*afr_compound_cbk_t) (call_frame_t *frame, void *cookie,
+                                   xlator_t *this, int op_ret, int op_errno,
+                                   void *data, dict_t *xdata);
+
+
 #define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr;})
 #define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})
 #define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})
@@ -153,6 +158,7 @@ typedef struct _afr_private {
 	gf_boolean_t           use_afr_in_pump;
 	char                   *locking_scheme;
         gf_boolean_t            esh_granular;
+        gf_boolean_t            use_compound_fops;
 } afr_private_t;
 
 
@@ -782,6 +788,7 @@ typedef struct _afr_local {
         call_frame_t *heal_frame;
 
         gf_boolean_t need_full_crawl;
+        gf_boolean_t compound;
 } afr_local_t;
 
 
@@ -1161,4 +1168,37 @@ afr_get_msg_id (char *op_type);
 int
 afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,
                              inode_t *inode);
+
+void
+afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,
+                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+                     struct iatt *postbuf, dict_t *xdata);
+void
+afr_process_post_writev (call_frame_t *frame, xlator_t *this);
+
+void
+afr_writev_unwind (call_frame_t *frame, xlator_t *this);
+
+void
+afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame);
+
+void
+afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock,
+                    int32_t child_index);
+gf_boolean_t
+afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop);
+
+afr_compound_cbk_t
+afr_pack_fop_args (call_frame_t *frame, compound_args_t *args,
+                   glusterfs_fop_t fop, int index);
+int
+afr_is_inodelk_transaction(afr_local_t *local);
+
+afr_fd_ctx_t *
+__afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+
+void
+afr_compound_cleanup (compound_args_t *args, dict_t *xdata,
+                      dict_t *newloc_xdata);
+
 #endif /* __AFR_H__ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index a93c8d4..bd9b21b 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2970,6 +2970,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .op_version = GD_OP_VERSION_3_8_0,
           .flags      = OPT_FLAG_CLIENT_OPT
         },
+        { .key        = "cluster.use-compound-fops",
+          .voltype    = "cluster/replicate",
+          .value      = "off",
+          .type       = DOC,
+          .op_version = GD_OP_VERSION_3_8_4,
+          .flags      = OPT_FLAG_CLIENT_OPT
+        },
         { .key         = NULL
         }
 };
-- 
1.7.1