d1681e
From 9f670a342ffed3eee7cb91a67dcc2f2a27600983 Mon Sep 17 00:00:00 2001
d1681e
From: karthik-us <ksubrahm@redhat.com>
d1681e
Date: Fri, 23 Feb 2018 15:12:19 +0530
d1681e
Subject: [PATCH 191/201] cluster/afr: Make afr_fsync a transaction
d1681e
d1681e
Upstream patch: https://review.gluster.org/#/c/19621/
d1681e
d1681e
Change-Id: I713401feb96393f668efb074f2d5b870d19e6fda
d1681e
BUG: 1552425
d1681e
Signed-off-by: karthik-us <ksubrahm@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/131942
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
d1681e
---
d1681e
 xlators/cluster/afr/src/afr-common.c      | 163 ------------------------------
d1681e
 xlators/cluster/afr/src/afr-inode-write.c | 108 ++++++++++++++++++++
d1681e
 xlators/cluster/afr/src/afr-inode-write.h |   4 +
d1681e
 xlators/cluster/afr/src/afr.c             |   2 +-
d1681e
 xlators/cluster/afr/src/afr.h             |   4 +
d1681e
 5 files changed, 117 insertions(+), 164 deletions(-)
d1681e
d1681e
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
d1681e
index 855e568..a790402 100644
d1681e
--- a/xlators/cluster/afr/src/afr-common.c
d1681e
+++ b/xlators/cluster/afr/src/afr-common.c
d1681e
@@ -3435,169 +3435,6 @@ out:
d1681e
         return 0;
d1681e
 }
d1681e
 
d1681e
-/* }}} */
d1681e
-
d1681e
-
d1681e
-/* {{{ fsync */
d1681e
-
d1681e
-int
d1681e
-afr_fsync_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
-                      int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
d1681e
-                      struct iatt *postbuf, dict_t *xdata)
d1681e
-{
d1681e
-        AFR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
d1681e
-                          xdata);
d1681e
-        return 0;
d1681e
-}
d1681e
-
d1681e
-int
d1681e
-afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
-               int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
d1681e
-               struct iatt *postbuf, dict_t *xdata)
d1681e
-{
d1681e
-        afr_local_t *local = NULL;
d1681e
-        afr_private_t *priv = NULL;
d1681e
-        int i = 0;
d1681e
-        int call_count = -1;
d1681e
-        int child_index = (long) cookie;
d1681e
-	int read_subvol = 0;
d1681e
-	call_stub_t *stub = NULL;
d1681e
-
d1681e
-        local = frame->local;
d1681e
-        priv = this->private;
d1681e
-
d1681e
-        LOCK (&frame->lock);
d1681e
-        {
d1681e
-                local->replies[child_index].valid = 1;
d1681e
-                local->replies[child_index].op_ret = op_ret;
d1681e
-                local->replies[child_index].op_errno = op_errno;
d1681e
-                if (op_ret == 0) {
d1681e
-                        if (prebuf)
d1681e
-                                local->replies[child_index].prestat = *prebuf;
d1681e
-                        if (postbuf)
d1681e
-                                local->replies[child_index].poststat = *postbuf;
d1681e
-                        if (xdata)
d1681e
-                                local->replies[child_index].xdata =
d1681e
-                                        dict_ref (xdata);
d1681e
-                }
d1681e
-        }
d1681e
-        UNLOCK (&frame->lock);
d1681e
-
d1681e
-        call_count = afr_frame_return (frame);
d1681e
-
d1681e
-        if (call_count == 0) {
d1681e
-                local->op_ret = -1;
d1681e
-                local->op_errno = afr_final_errno (local, priv);
d1681e
-	        read_subvol = afr_data_subvol_get (local->inode, this, NULL,
d1681e
-                                                   local->readable, NULL, NULL);
d1681e
-                /* Pick a reply that is valid and readable, with a preference
d1681e
-                 * given to read_subvol. */
d1681e
-                for (i = 0; i < priv->child_count; i++) {
d1681e
-                        if (!local->replies[i].valid)
d1681e
-                                continue;
d1681e
-                        if (local->replies[i].op_ret != 0)
d1681e
-                                continue;
d1681e
-                        if (!local->readable[i])
d1681e
-                                continue;
d1681e
-                        local->op_ret = local->replies[i].op_ret;
d1681e
-                        local->op_errno = local->replies[i].op_errno;
d1681e
-                        local->cont.inode_wfop.prebuf =
d1681e
-                                local->replies[i].prestat;
d1681e
-                        local->cont.inode_wfop.postbuf =
d1681e
-                                local->replies[i].poststat;
d1681e
-                        if (local->replies[i].xdata) {
d1681e
-                                if (local->xdata_rsp)
d1681e
-                                        dict_unref (local->xdata_rsp);
d1681e
-                                local->xdata_rsp =
d1681e
-                                        dict_ref (local->replies[i].xdata);
d1681e
-                        }
d1681e
-                        if (i == read_subvol)
d1681e
-                                break;
d1681e
-                }
d1681e
-
d1681e
-		/* Make a stub out of the frame, and register it
d1681e
-		   with the waking up post-op. When the call-stub resumes,
d1681e
-		   we are guaranteed that there was no post-op pending
d1681e
-		   (i.e changelogs were unset in the server). This is an
d1681e
-		   essential "guarantee", that fsync() returns only after
d1681e
-		   completely finishing EVERYTHING, including the delayed
d1681e
-		   post-op. This guarantee is expected by FUSE graph switching
d1681e
-		   for example.
d1681e
-		*/
d1681e
-		stub = fop_fsync_cbk_stub (frame, afr_fsync_unwind_cbk,
d1681e
-                                           local->op_ret, local->op_errno,
d1681e
-                                           &local->cont.inode_wfop.prebuf,
d1681e
-                                           &local->cont.inode_wfop.postbuf,
d1681e
-                                           local->xdata_rsp);
d1681e
-		if (!stub) {
d1681e
-			AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
d1681e
-			return 0;
d1681e
-		}
d1681e
-
d1681e
-		/* If no new unstable writes happened between the
d1681e
-		   time we cleared the unstable write witness flag in afr_fsync
d1681e
-		   and now, calling afr_delayed_changelog_wake_up() should
d1681e
-		   wake up and skip over the fsync phase and go straight to
d1681e
-		   afr_changelog_post_op_now()
d1681e
-		*/
d1681e
-		afr_delayed_changelog_wake_resume (this, local->fd, stub);
d1681e
-        }
d1681e
-
d1681e
-        return 0;
d1681e
-}
d1681e
-
d1681e
-
d1681e
-int
d1681e
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
d1681e
-	   dict_t *xdata)
d1681e
-{
d1681e
-	afr_private_t *priv = NULL;
d1681e
-        afr_local_t *local = NULL;
d1681e
-        int i = 0;
d1681e
-        int32_t call_count = 0;
d1681e
-        int32_t op_errno = ENOMEM;
d1681e
-
d1681e
-	priv = this->private;
d1681e
-
d1681e
-	local = AFR_FRAME_INIT (frame, op_errno);
d1681e
-	if (!local)
d1681e
-		goto out;
d1681e
-
d1681e
-        local->op = GF_FOP_FSYNC;
d1681e
-	if (!afr_is_consistent_io_possible (local, priv, &op_errno))
d1681e
-		goto out;
d1681e
-
d1681e
-        local->fd = fd_ref (fd);
d1681e
-
d1681e
-	if (afr_fd_has_witnessed_unstable_write (this, fd)) {
d1681e
-		/* don't care. we only wanted to CLEAR the bit */
d1681e
-	}
d1681e
-
d1681e
-	local->inode = inode_ref (fd->inode);
d1681e
-
d1681e
-        call_count = local->call_count;
d1681e
-        for (i = 0; i < priv->child_count; i++) {
d1681e
-                if (local->child_up[i]) {
d1681e
-                        STACK_WIND_COOKIE (frame, afr_fsync_cbk,
d1681e
-                                           (void *) (long) i,
d1681e
-                                           priv->children[i],
d1681e
-                                           priv->children[i]->fops->fsync,
d1681e
-                                           fd, datasync, xdata);
d1681e
-                        if (!--call_count)
d1681e
-                                break;
d1681e
-                }
d1681e
-        }
d1681e
-
d1681e
-	return 0;
d1681e
-out:
d1681e
-	AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
d1681e
-
d1681e
-        return 0;
d1681e
-}
d1681e
-
d1681e
-/* }}} */
d1681e
-
d1681e
-/* {{{ fsync */
d1681e
 
d1681e
 int
d1681e
 afr_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
d1681e
index f0231b7..0e50443 100644
d1681e
--- a/xlators/cluster/afr/src/afr-inode-write.c
d1681e
+++ b/xlators/cluster/afr/src/afr-inode-write.c
d1681e
@@ -2539,3 +2539,111 @@ out:
d1681e
 	AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
d1681e
         return 0;
d1681e
 }
d1681e
+
d1681e
+
d1681e
+int
d1681e
+afr_fsync_unwind (call_frame_t *frame, xlator_t *this)
d1681e
+{
d1681e
+        afr_local_t *local = NULL;
d1681e
+        call_frame_t   *main_frame = NULL;
d1681e
+
d1681e
+        local = frame->local;
d1681e
+
d1681e
+        main_frame = afr_transaction_detach_fop_frame (frame);
d1681e
+        if (!main_frame)
d1681e
+                return 0;
d1681e
+
d1681e
+        AFR_STACK_UNWIND (fsync, main_frame, local->op_ret, local->op_errno,
d1681e
+                          &local->cont.inode_wfop.prebuf,
d1681e
+                          &local->cont.inode_wfop.postbuf, local->xdata_rsp);
d1681e
+
d1681e
+        return 0;
d1681e
+}
d1681e
+
d1681e
+
d1681e
+int
d1681e
+afr_fsync_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
d1681e
+                    int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
d1681e
+                    struct iatt *postbuf, dict_t *xdata)
d1681e
+{
d1681e
+        return __afr_inode_write_cbk (frame, cookie, this, op_ret, op_errno,
d1681e
+                                      prebuf, postbuf, NULL, xdata);
d1681e
+}
d1681e
+
d1681e
+
d1681e
+int
d1681e
+afr_fsync_wind (call_frame_t *frame, xlator_t *this, int subvol)
d1681e
+{
d1681e
+        afr_local_t *local = NULL;
d1681e
+        afr_private_t *priv = NULL;
d1681e
+
d1681e
+        local = frame->local;
d1681e
+        priv = this->private;
d1681e
+
d1681e
+        STACK_WIND_COOKIE (frame, afr_fsync_wind_cbk, (void *)(long) subvol,
d1681e
+                           priv->children[subvol],
d1681e
+                           priv->children[subvol]->fops->fsync,
d1681e
+                           local->fd, local->cont.fsync.datasync,
d1681e
+                           local->xdata_req);
d1681e
+        return 0;
d1681e
+}
d1681e
+
d1681e
+int
d1681e
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
d1681e
+	   dict_t *xdata)
d1681e
+{
d1681e
+        afr_local_t *local = NULL;
d1681e
+        call_frame_t *transaction_frame = NULL;
d1681e
+        int ret = -1;
d1681e
+        int32_t op_errno = ENOMEM;
d1681e
+
d1681e
+        transaction_frame = copy_frame (frame);
d1681e
+        if (!transaction_frame)
d1681e
+                goto out;
d1681e
+
d1681e
+	local = AFR_FRAME_INIT (transaction_frame, op_errno);
d1681e
+	if (!local)
d1681e
+		goto out;
d1681e
+
d1681e
+        if (xdata)
d1681e
+                local->xdata_req = dict_copy_with_ref (xdata, NULL);
d1681e
+        else
d1681e
+                local->xdata_req = dict_new ();
d1681e
+
d1681e
+        if (!local->xdata_req)
d1681e
+                goto out;
d1681e
+
d1681e
+        local->fd = fd_ref (fd);
d1681e
+        ret = afr_set_inode_local (this, local, fd->inode);
d1681e
+        if (ret)
d1681e
+                goto out;
d1681e
+
d1681e
+        local->op = GF_FOP_FSYNC;
d1681e
+        local->cont.fsync.datasync = datasync;
d1681e
+
d1681e
+	if (afr_fd_has_witnessed_unstable_write (this, fd)) {
d1681e
+		/* don't care. we only wanted to CLEAR the bit */
d1681e
+	}
d1681e
+
d1681e
+        local->transaction.wind   = afr_fsync_wind;
d1681e
+        local->transaction.fop    = __afr_txn_write_fop;
d1681e
+        local->transaction.done   = __afr_txn_write_done;
d1681e
+        local->transaction.unwind = afr_fsync_unwind;
d1681e
+
d1681e
+        local->transaction.main_frame = frame;
d1681e
+
d1681e
+        ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
d1681e
+        if (ret < 0) {
d1681e
+                op_errno = -ret;
d1681e
+                goto out;
d1681e
+        }
d1681e
+
d1681e
+	return 0;
d1681e
+out:
d1681e
+	if (transaction_frame)
d1681e
+		AFR_STACK_DESTROY (transaction_frame);
d1681e
+
d1681e
+	AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
d1681e
+
d1681e
+        return 0;
d1681e
+}
d1681e
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
d1681e
index e174cc2..1e8bb5c 100644
d1681e
--- a/xlators/cluster/afr/src/afr-inode-write.h
d1681e
+++ b/xlators/cluster/afr/src/afr-inode-write.h
d1681e
@@ -87,4 +87,8 @@ afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
d1681e
 int32_t
d1681e
 afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
d1681e
               gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
d1681e
+
d1681e
+int
d1681e
+afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
d1681e
+	   dict_t *xdata);
d1681e
 #endif /* __INODE_WRITE_H__ */
d1681e
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
d1681e
index 9493fbb..aa48e76 100644
d1681e
--- a/xlators/cluster/afr/src/afr.c
d1681e
+++ b/xlators/cluster/afr/src/afr.c
d1681e
@@ -664,7 +664,6 @@ struct xlator_fops fops = {
d1681e
         .lk          = afr_lk,
d1681e
         .flush       = afr_flush,
d1681e
         .statfs      = afr_statfs,
d1681e
-        .fsync       = afr_fsync,
d1681e
         .fsyncdir    = afr_fsyncdir,
d1681e
         .inodelk     = afr_inodelk,
d1681e
         .finodelk    = afr_finodelk,
d1681e
@@ -696,6 +695,7 @@ struct xlator_fops fops = {
d1681e
         .zerofill    = afr_zerofill,
d1681e
         .xattrop     = afr_xattrop,
d1681e
         .fxattrop    = afr_fxattrop,
d1681e
+        .fsync       = afr_fsync,
d1681e
 
d1681e
         /*inode open*/
d1681e
         .opendir     = afr_opendir,
d1681e
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
d1681e
index b6f5388..11278fb 100644
d1681e
--- a/xlators/cluster/afr/src/afr.h
d1681e
+++ b/xlators/cluster/afr/src/afr.h
d1681e
@@ -725,6 +725,10 @@ typedef struct _afr_local {
d1681e
                         gf_seek_what_t what;
d1681e
                 } seek;
d1681e
 
d1681e
+                struct {
d1681e
+                        int32_t datasync;
d1681e
+                } fsync;
d1681e
+
d1681e
         } cont;
d1681e
 
d1681e
         struct {
d1681e
-- 
d1681e
1.8.3.1
d1681e