Blob Blame History Raw
From b97269044f8a541017d600cc8682dc51e62258c7 Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Mon, 2 May 2016 16:51:10 +0530
Subject: [PATCH 190/192] core, shard: Make shards inherit main file's O_DIRECT flag if present

        Backport of: http://review.gluster.org/14191

If the application opens a file with O_DIRECT, the shards'
anon fds would also need to inherit the flag. Towards this,
shard xl would be passing the odirect flag in the @flags parameter
to the WRITEV fop. This will be used in anon fd resolution
and subsequent opening by posix xl.

Change-Id: Ide18f819d34bd3705eb95220baa2f1029a3e0102
BUG: 1339136
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/76045
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
 libglusterfs/src/fd.c                        |   41 +++++++++++++++++++++-----
 libglusterfs/src/fd.h                        |    4 ++-
 xlators/cluster/afr/src/afr-transaction.c    |    3 +-
 xlators/features/shard/src/shard.c           |   11 +++++++
 xlators/nfs/server/src/nfs-fops.c            |   16 +++++++++-
 xlators/nfs/server/src/nfs3.c                |   18 -----------
 xlators/protocol/server/src/server-resolve.c |    2 +-
 xlators/storage/posix/src/posix-helpers.c    |    1 +
 8 files changed, 66 insertions(+), 30 deletions(-)

diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index 7d9a0cc..89ba357 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -749,7 +749,7 @@ fd_lookup_uint64 (inode_t *inode, uint64_t pid)
 }
 
 static fd_t *
-__fd_lookup_anonymous (inode_t *inode)
+__fd_lookup_anonymous (inode_t *inode, int32_t flags)
 {
         fd_t *iter_fd = NULL;
         fd_t *fd = NULL;
@@ -758,7 +758,7 @@ __fd_lookup_anonymous (inode_t *inode)
                 return NULL;
 
         list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
-                if (iter_fd->anonymous) {
+                if ((iter_fd->anonymous) && (flags == iter_fd->flags)) {
                         fd = __fd_ref (iter_fd);
                         break;
                 }
@@ -768,11 +768,11 @@ __fd_lookup_anonymous (inode_t *inode)
 }
 
 static fd_t *
-__fd_anonymous (inode_t *inode)
+__fd_anonymous (inode_t *inode, int32_t flags)
 {
         fd_t *fd = NULL;
 
-        fd = __fd_lookup_anonymous (inode);
+        fd = __fd_lookup_anonymous (inode, flags);
 
         /* if (fd); then we already have increased the refcount in
            __fd_lookup_anonymous(), so no need of one more fd_ref().
@@ -785,7 +785,7 @@ __fd_anonymous (inode_t *inode)
                         return NULL;
 
                 fd->anonymous = _gf_true;
-                fd->flags = GF_ANON_FD_FLAGS;
+                fd->flags = GF_ANON_FD_FLAGS|flags;
 
                 __fd_bind (fd);
 
@@ -803,7 +803,32 @@ fd_anonymous (inode_t *inode)
 
         LOCK (&inode->lock);
         {
-                fd = __fd_anonymous (inode);
+                fd = __fd_anonymous (inode, GF_ANON_FD_FLAGS);
+        }
+        UNLOCK (&inode->lock);
+
+        return fd;
+}
+
+fd_t *
+fd_anonymous_with_flags (inode_t *inode, int32_t flags)
+{
+        fd_t *fd = NULL;
+
+        LOCK (&inode->lock);
+        {
+                if (flags == 0)
+                        flags = GF_ANON_FD_FLAGS;
+                /* If this API is ever called with O_SYNC or O_DSYNC in @flags,
+                 * reset the bits associated with these flags before calling
+                 * __fd_anonymous(). That way, posix will do the open() without
+                 * these flags. And subsequently, posix_writev() (mostly) will
+                 * do the write within inode->lock on an fd without O_SYNC or
+                 * O_DSYNC and in its place to an fsync() outside of the locks
+                 * to simulate the effect of using these flags.
+                 */
+                flags &= (~(O_SYNC|O_DSYNC));
+                fd = __fd_anonymous (inode, flags);
         }
         UNLOCK (&inode->lock);
 
@@ -811,7 +836,7 @@ fd_anonymous (inode_t *inode)
 }
 
 fd_t*
-fd_lookup_anonymous (inode_t *inode)
+fd_lookup_anonymous (inode_t *inode, int32_t flags)
 {
         fd_t *fd = NULL;
 
@@ -823,7 +848,7 @@ fd_lookup_anonymous (inode_t *inode)
 
         LOCK (&inode->lock);
         {
-                fd = __fd_lookup_anonymous (inode);
+                fd = __fd_lookup_anonymous (inode, flags);
         }
         UNLOCK (&inode->lock);
         return fd;
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
index 2e93f76..66e983d 100644
--- a/libglusterfs/src/fd.h
+++ b/libglusterfs/src/fd.h
@@ -141,11 +141,13 @@ fd_t *
 fd_lookup_uint64 (struct _inode *inode, uint64_t pid);
 
 fd_t*
-fd_lookup_anonymous (inode_t *inode);
+fd_lookup_anonymous (inode_t *inode, int32_t flags);
 
 fd_t *
 fd_anonymous (inode_t *inode);
 
+fd_t *
+fd_anonymous_with_flags (inode_t *inode, int32_t flags);
 
 gf_boolean_t
 fd_is_anonymous (fd_t *fd);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 316c7cd..0353406 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2091,7 +2091,8 @@ afr_transaction_start (call_frame_t *frame, xlator_t *this)
         if (!local->transaction.eager_lock_on && local->loc.inode) {
                 fd = fd_lookup (local->loc.inode, frame->root->pid);
                 if (fd == NULL)
-                        fd = fd_lookup_anonymous (local->loc.inode);
+                        fd = fd_lookup_anonymous (local->loc.inode,
+                                                  GF_ANON_FD_FLAGS);
 
                 if (fd) {
                         afr_delayed_changelog_wake_up (this, fd);
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index dce6d49..f54878c 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -3638,6 +3638,7 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
         shard_local_t  *local             = NULL;
         struct iovec   *vec               = NULL;
         gf_boolean_t    wind_failed       = _gf_false;
+        gf_boolean_t    odirect           = _gf_false;
         off_t           orig_offset       = 0;
         off_t           shard_offset      = 0;
         off_t           vec_offset        = 0;
@@ -3668,6 +3669,9 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
                 return 0;
         }
 
+        if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
+                odirect = _gf_true;
+
         while (cur_block <= last_block) {
                 if (wind_failed) {
                         shard_common_inode_write_do_cbk (frame,
@@ -3725,6 +3729,13 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
                                                                  NULL, NULL);
                                 goto next;
                         }
+
+                        if (local->fop == GF_FOP_WRITE) {
+                                if (odirect)
+                                        local->flags = O_DIRECT;
+                                else
+                                        local->flags = GF_ANON_FD_FLAGS;
+                        }
                 }
 
                 shard_common_inode_write_wind (frame, this, anon_fd,
diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c
index e5990a1..167f6b7 100644
--- a/xlators/nfs/server/src/nfs-fops.c
+++ b/xlators/nfs/server/src/nfs-fops.c
@@ -1386,6 +1386,8 @@ nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
         call_frame_t            *frame = NULL;
         int                     ret = -EFAULT;
         struct nfs_fop_local    *nfl = NULL;
+        int flags = 0;
+        nfs3_call_state_t       *cs = local;
 
         if ((!nfsx) || (!xl) || (!fd) || (!vector) || (!nfu) || (!srciobref))
                 return ret;
@@ -1403,8 +1405,20 @@ nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
 
         iobref_add (nfl->iobref, srciob);
 */
+
+        switch (cs->writetype) {
+        case UNSTABLE:
+                break;
+        case DATA_SYNC:
+                flags |= O_DSYNC;
+                break;
+        case FILE_SYNC:
+                flags |= O_SYNC;
+                break;
+        }
+
         STACK_WIND_COOKIE (frame, nfs_fop_writev_cbk, xl, xl,xl->fops->writev,
-                           fd, vector, count, offset, fd->flags, srciobref, NULL);
+                           fd, vector, count, offset, flags, srciobref, NULL);
         ret = 0;
 err:
         if (ret < 0) {
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
index 4035491..aa7af31 100644
--- a/xlators/nfs/server/src/nfs3.c
+++ b/xlators/nfs/server/src/nfs3.c
@@ -2245,24 +2245,6 @@ nfs3_write_resume (void *carg)
 
         cs->fd = fd;    /* Gets unrefd when the call state is wiped. */
 
-/*
-  enum stable_how {
-  UNSTABLE = 0,
-  DATA_SYNC = 1,
-  FILE_SYNC = 2,
-  };
-*/
-	switch (cs->writetype) {
-	case UNSTABLE:
-		break;
-	case DATA_SYNC:
-		fd->flags |= O_DSYNC;
-		break;
-	case FILE_SYNC:
-		fd->flags |= O_SYNC;
-		break;
-	}
-
         ret = __nfs3_write_resume (cs);
         if (ret < 0)
                 stat = nfs3_errno_to_nfsstat3 (-ret);
diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c
index 26bf37e..4e6ec6a 100644
--- a/xlators/protocol/server/src/server-resolve.c
+++ b/xlators/protocol/server/src/server-resolve.c
@@ -458,7 +458,7 @@ resolve_anonfd_simple (call_frame_t *frame)
 
         ret = 0;
 
-        state->fd = fd_anonymous (inode);
+        state->fd = fd_anonymous_with_flags (inode, state->flags);
 out:
         if (inode)
                 inode_unref (inode);
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 7fb1a24..56c48bd 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1687,6 +1687,7 @@ __posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd_p)
 
         pfd->fd = _fd;
         pfd->dir = dir;
+        pfd->flags = fd->flags;
 
         ret = __fd_ctx_set (fd, this, (uint64_t) (long) pfd);
         if (ret != 0) {
-- 
1.7.1