Blob Blame History Raw
From c1bd80b704e6a48d357405a6163b11c1f2d57d16 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Sat, 19 Mar 2016 11:40:26 +0530
Subject: [PATCH 117/139] cluster/afr: Don't let NFS cache stat after writes

Problem:
Afr does post-ops after write but the stat buffer it unwinds is at the
time of write, so if nfs client caches this, it will see different
ctime when it does stat on it after post-op is done. From NFS client's
perspective it thinks the file is changed. Tar which depends on this
to be correct keeps giving 'file changed as we read it' warning.
If Afr instead has to choose to unwind after post-op, eager-lock,
delayed-post-op will have to be disabled which will lead to bad
performance for all write usecases.

Fix:
Don't let client cache stat after write.

 >Change-Id: Ic6062acc6e5cdd97a9c83c56bd529ec83cee8a23
 >BUG: 1302948
 >Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
 >Signed-off-by: Anuradha Talur <atalur@redhat.com>

BUG: 1298724
Change-Id: I60c0bf15d9cff0a18dada32fa969f0f449cccde0
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/73665
---
 libglusterfs/src/common-utils.c           |   24 +++++++++++++++
 libglusterfs/src/common-utils.h           |    6 ++++
 tests/basic/afr/tarissue.t                |   37 +++++++++++++++++++++++
 xlators/cluster/afr/src/afr-dir-write.c   |   11 +++++-
 xlators/cluster/afr/src/afr-inode-write.c |   16 +++++++++-
 xlators/cluster/afr/src/afr-transaction.c |   46 +++++++++++++++++++++++++++-
 xlators/cluster/afr/src/afr-transaction.h |    2 +
 xlators/nfs/server/src/nfs-common.c       |   19 ------------
 xlators/nfs/server/src/nfs-common.h       |    3 --
 xlators/nfs/server/src/nfs3-helpers.c     |    4 +-
 10 files changed, 138 insertions(+), 30 deletions(-)
 create mode 100644 tests/basic/afr/tarissue.t

diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 81e01bf..88fe5f3 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -4016,3 +4016,27 @@ _unmask_cancellation (void)
 {
         (void) pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
 }
+
+gf_boolean_t
+gf_is_zero_filled_stat (struct iatt *buf)
+{
+        if (!buf)
+                return 1;
+
+        /* Do not use st_dev because it is transformed to store the xlator id
+         * in place of the device number. Do not use st_ino because by this time
+         * we've already mapped the root ino to 1 so it is not guaranteed to be
+         * 0.
+         */
+        if ((buf->ia_nlink == 0) && (buf->ia_ctime == 0))
+                return 1;
+
+        return 0;
+}
+
+void
+gf_zero_fill_stat (struct iatt *buf)
+{
+        buf->ia_nlink = 0;
+        buf->ia_ctime = 0;
+}
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
index e374285..3ba0f04 100644
--- a/libglusterfs/src/common-utils.h
+++ b/libglusterfs/src/common-utils.h
@@ -777,4 +777,10 @@ gf_nwrite (int fd, const void *buf, size_t count);
 void _mask_cancellation (void);
 void _unmask_cancellation (void);
 
+gf_boolean_t
+gf_is_zero_filled_stat (struct iatt *buf);
+
+void
+gf_zero_fill_stat (struct iatt *buf);
+
 #endif /* _COMMON_UTILS_H */
diff --git a/tests/basic/afr/tarissue.t b/tests/basic/afr/tarissue.t
new file mode 100644
index 0000000..a35d468
--- /dev/null
+++ b/tests/basic/afr/tarissue.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+TESTS_EXPECTED_IN_LOOP=10
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+
+#Create a distributed-replicate volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
+TEST $CLI volume set $V0 cluster.consistent-metadata on
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 0
+TEST $CLI volume set $V0 nfs.rdirplus off
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+# Mount NFS
+mount_nfs $H0:/$V0 $N0 vers=3
+
+#Create files
+TEST mkdir -p $N0/nfs/dir1/dir2
+for i in {1..10}; do
+    TEST_IN_LOOP dd if=/dev/urandom of=$N0/nfs/dir1/dir2/file$i bs=1024k count=1
+done
+TEST tar cvf /tmp/dir1.tar.gz $N0/nfs/dir1
+
+TEST rm -f /tmp/dir1.tar.gz
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 3d2b114..6d4a352 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -239,7 +239,9 @@ __afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         afr_local_t *local = NULL;
         int child_index = (long) cookie;
         int call_count = -1;
+        afr_private_t *priv = NULL;
 
+        priv  = this->private;
         local = frame->local;
 
 	LOCK (&frame->lock);
@@ -254,8 +256,13 @@ __afr_dir_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         if (call_count == 0) {
 		__afr_dir_write_finalize (frame, this);
 
-		if (afr_txn_nothing_failed (frame, this))
-			local->transaction.unwind (frame, this);
+		if (afr_txn_nothing_failed (frame, this)) {
+                        /*if it did pre-op, it will do post-op changing ctime*/
+                        if (priv->consistent_metadata &&
+                            afr_needs_changelog_update (local))
+                                afr_zero_fill_stat (local);
+                        local->transaction.unwind (frame, this);
+                }
 
 		afr_mark_entry_pending_changelog (frame, this);
 
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 4206ef2..baa5871 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -187,7 +187,9 @@ __afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         afr_local_t *local = NULL;
         int child_index = (long) cookie;
         int call_count = -1;
+        afr_private_t *priv = NULL;
 
+        priv = this->private;
         local = frame->local;
 
         LOCK (&frame->lock);
@@ -203,8 +205,13 @@ __afr_inode_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         if (call_count == 0) {
 		__afr_inode_write_finalize (frame, this);
 
-		if (afr_txn_nothing_failed (frame, this))
-			local->transaction.unwind (frame, this);
+		if (afr_txn_nothing_failed (frame, this)) {
+                        /*if it did pre-op, it will do post-op changing ctime*/
+                        if (priv->consistent_metadata &&
+                            afr_needs_changelog_update (local))
+                                afr_zero_fill_stat (local);
+                        local->transaction.unwind (frame, this);
+                }
 
                 local->transaction.resume (frame, this);
         }
@@ -235,8 +242,13 @@ void
 afr_writev_unwind (call_frame_t *frame, xlator_t *this)
 {
         afr_local_t *   local = NULL;
+        afr_private_t *priv = this->private;
+
         local = frame->local;
 
+       if (priv->consistent_metadata)
+               afr_zero_fill_stat (local);
+
         AFR_STACK_UNWIND (writev, frame,
                           local->op_ret, local->op_errno,
                           &local->cont.inode_wfop.prebuf,
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 4c85a4b..73de030 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -30,6 +30,37 @@ int
 afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
 		  afr_changelog_resume_t changelog_resume);
 
+void
+afr_zero_fill_stat (afr_local_t *local)
+{
+        if (!local)
+                return;
+        if (local->transaction.type == AFR_DATA_TRANSACTION ||
+            local->transaction.type == AFR_METADATA_TRANSACTION) {
+                gf_zero_fill_stat (&local->cont.inode_wfop.prebuf);
+                gf_zero_fill_stat (&local->cont.inode_wfop.postbuf);
+        } else if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+                   local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+                gf_zero_fill_stat (&local->cont.dir_fop.buf);
+                gf_zero_fill_stat (&local->cont.dir_fop.preparent);
+                gf_zero_fill_stat (&local->cont.dir_fop.postparent);
+                if (local->transaction.type == AFR_ENTRY_TRANSACTION)
+                        return;
+                gf_zero_fill_stat (&local->cont.dir_fop.prenewparent);
+                gf_zero_fill_stat (&local->cont.dir_fop.postnewparent);
+        }
+}
+
+gf_boolean_t
+afr_needs_changelog_update (afr_local_t *local)
+{
+        if (local->transaction.type == AFR_DATA_TRANSACTION)
+                return _gf_true;
+        if (!local->optimistic_change_log)
+                return _gf_true;
+        return _gf_false;
+}
+
 static int32_t
 afr_quorum_errno (afr_private_t *priv)
 {
@@ -79,9 +110,21 @@ int
 __afr_txn_write_done (call_frame_t *frame, xlator_t *this)
 {
         afr_local_t *local = NULL;
+        afr_private_t *priv = NULL;
+        gf_boolean_t unwind = _gf_false;
 
+        priv  = this->private;
         local = frame->local;
 
+        if (priv->consistent_metadata) {
+                LOCK (&frame->lock);
+                {
+                        unwind = (local->transaction.main_frame != NULL);
+                }
+                UNLOCK (&frame->lock);
+                if (unwind)/*It definitely did post-op*/
+                        afr_zero_fill_stat (local);
+        }
         local->transaction.unwind (frame, this);
 
         AFR_STACK_DESTROY (frame);
@@ -1158,8 +1201,7 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
 		goto err;
 	}
 
-	if ((local->transaction.type == AFR_DATA_TRANSACTION ||
-	     !local->optimistic_change_log)) {
+	if (afr_needs_changelog_update (local)) {
 
 		local->dirty[idx] = hton32(1);
 
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 47d43d8..c58531e 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -52,5 +52,7 @@ int __afr_txn_write_fop (call_frame_t *frame, xlator_t *this);
 int __afr_txn_write_done (call_frame_t *frame, xlator_t *this);
 call_frame_t *afr_transaction_detach_fop_frame (call_frame_t *frame);
 gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this);
+gf_boolean_t afr_needs_changelog_update (afr_local_t *local);
+void afr_zero_fill_stat (afr_local_t *local);
 
 #endif /* __TRANSACTION_H__ */
diff --git a/xlators/nfs/server/src/nfs-common.c b/xlators/nfs/server/src/nfs-common.c
index cfb0edd..5bca459 100644
--- a/xlators/nfs/server/src/nfs-common.c
+++ b/xlators/nfs/server/src/nfs-common.c
@@ -111,25 +111,6 @@ nfs_mntpath_to_xlator (xlator_list_t *cl, char *path)
 }
 
 
-/* Returns 1 if the stat seems to be filled with zeroes. */
-int
-nfs_zero_filled_stat (struct iatt *buf)
-{
-        if (!buf)
-                return 1;
-
-        /* Do not use st_dev because it is transformed to store the xlator id
-         * in place of the device number. Do not use st_ino because by this time
-         * we've already mapped the root ino to 1 so it is not guaranteed to be
-         * 0.
-         */
-        if ((buf->ia_nlink == 0) && (buf->ia_ctime == 0))
-                return 1;
-
-        return 0;
-}
-
-
 void
 nfs_loc_wipe (loc_t *loc)
 {
diff --git a/xlators/nfs/server/src/nfs-common.h b/xlators/nfs/server/src/nfs-common.h
index 01b49c1..5853662 100644
--- a/xlators/nfs/server/src/nfs-common.h
+++ b/xlators/nfs/server/src/nfs-common.h
@@ -42,9 +42,6 @@ nfs_path_to_xlator (xlator_list_t *cl, char *path);
 extern xlator_t *
 nfs_mntpath_to_xlator (xlator_list_t *cl, char *path);
 
-extern int
-nfs_zero_filled_stat (struct iatt *buf);
-
 extern void
 nfs_loc_wipe (loc_t *loc);
 
diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c
index bf2594f..229bfee 100644
--- a/xlators/nfs/server/src/nfs3-helpers.c
+++ b/xlators/nfs/server/src/nfs3-helpers.c
@@ -378,7 +378,7 @@ nfs3_stat_to_post_op_attr (struct iatt *buf)
          * returning these zeroed out attrs.
          */
         attr.attributes_follow = FALSE;
-        if (nfs_zero_filled_stat (buf))
+        if (gf_is_zero_filled_stat (buf))
                 goto out;
 
         nfs3_stat_to_fattr3 (buf, &(attr.post_op_attr_u.attributes));
@@ -399,7 +399,7 @@ nfs3_stat_to_pre_op_attr (struct iatt *pre)
          * returning these zeroed out attrs.
          */
         poa.attributes_follow = FALSE;
-        if (nfs_zero_filled_stat (pre))
+        if (gf_is_zero_filled_stat (pre))
                 goto out;
 
         poa.attributes_follow = TRUE;
-- 
1.7.1