12a457
From b97269044f8a541017d600cc8682dc51e62258c7 Mon Sep 17 00:00:00 2001
12a457
From: Krutika Dhananjay <kdhananj@redhat.com>
12a457
Date: Mon, 2 May 2016 16:51:10 +0530
12a457
Subject: [PATCH 190/192] core, shard: Make shards inherit main file's O_DIRECT flag if present
12a457
12a457
        Backport of: http://review.gluster.org/14191
12a457
12a457
If the application opens a file with O_DIRECT, the shards'
12a457
anon fds would also need to inherit the flag. Towards this,
12a457
shard xl would be passing the odirect flag in the @flags parameter
12a457
to the WRITEV fop. This will be used in anon fd resolution
12a457
and subsequent opening by posix xl.
12a457
12a457
Change-Id: Ide18f819d34bd3705eb95220baa2f1029a3e0102
12a457
BUG: 1339136
12a457
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
12a457
Reviewed-on: https://code.engineering.redhat.com/gerrit/76045
12a457
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
12a457
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
12a457
---
12a457
 libglusterfs/src/fd.c                        |   41 +++++++++++++++++++++-----
12a457
 libglusterfs/src/fd.h                        |    4 ++-
12a457
 xlators/cluster/afr/src/afr-transaction.c    |    3 +-
12a457
 xlators/features/shard/src/shard.c           |   11 +++++++
12a457
 xlators/nfs/server/src/nfs-fops.c            |   16 +++++++++-
12a457
 xlators/nfs/server/src/nfs3.c                |   18 -----------
12a457
 xlators/protocol/server/src/server-resolve.c |    2 +-
12a457
 xlators/storage/posix/src/posix-helpers.c    |    1 +
12a457
 8 files changed, 66 insertions(+), 30 deletions(-)
12a457
12a457
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
12a457
index 7d9a0cc..89ba357 100644
12a457
--- a/libglusterfs/src/fd.c
12a457
+++ b/libglusterfs/src/fd.c
12a457
@@ -749,7 +749,7 @@ fd_lookup_uint64 (inode_t *inode, uint64_t pid)
12a457
 }
12a457
 
12a457
 static fd_t *
12a457
-__fd_lookup_anonymous (inode_t *inode)
12a457
+__fd_lookup_anonymous (inode_t *inode, int32_t flags)
12a457
 {
12a457
         fd_t *iter_fd = NULL;
12a457
         fd_t *fd = NULL;
12a457
@@ -758,7 +758,7 @@ __fd_lookup_anonymous (inode_t *inode)
12a457
                 return NULL;
12a457
 
12a457
         list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
12a457
-                if (iter_fd->anonymous) {
12a457
+                if ((iter_fd->anonymous) && (flags == iter_fd->flags)) {
12a457
                         fd = __fd_ref (iter_fd);
12a457
                         break;
12a457
                 }
12a457
@@ -768,11 +768,11 @@ __fd_lookup_anonymous (inode_t *inode)
12a457
 }
12a457
 
12a457
 static fd_t *
12a457
-__fd_anonymous (inode_t *inode)
12a457
+__fd_anonymous (inode_t *inode, int32_t flags)
12a457
 {
12a457
         fd_t *fd = NULL;
12a457
 
12a457
-        fd = __fd_lookup_anonymous (inode);
12a457
+        fd = __fd_lookup_anonymous (inode, flags);
12a457
 
12a457
         /* if (fd); then we already have increased the refcount in
12a457
            __fd_lookup_anonymous(), so no need of one more fd_ref().
12a457
@@ -785,7 +785,7 @@ __fd_anonymous (inode_t *inode)
12a457
                         return NULL;
12a457
 
12a457
                 fd->anonymous = _gf_true;
12a457
-                fd->flags = GF_ANON_FD_FLAGS;
12a457
+                fd->flags = GF_ANON_FD_FLAGS|flags;
12a457
 
12a457
                 __fd_bind (fd);
12a457
 
12a457
@@ -803,7 +803,32 @@ fd_anonymous (inode_t *inode)
12a457
 
12a457
         LOCK (&inode->lock);
12a457
         {
12a457
-                fd = __fd_anonymous (inode);
12a457
+                fd = __fd_anonymous (inode, GF_ANON_FD_FLAGS);
12a457
+        }
12a457
+        UNLOCK (&inode->lock);
12a457
+
12a457
+        return fd;
12a457
+}
12a457
+
12a457
+fd_t *
12a457
+fd_anonymous_with_flags (inode_t *inode, int32_t flags)
12a457
+{
12a457
+        fd_t *fd = NULL;
12a457
+
12a457
+        LOCK (&inode->lock);
12a457
+        {
12a457
+                if (flags == 0)
12a457
+                        flags = GF_ANON_FD_FLAGS;
12a457
+                /* If this API is ever called with O_SYNC or O_DSYNC in @flags,
12a457
+                 * reset the bits associated with these flags before calling
12a457
+                 * __fd_anonymous(). That way, posix will do the open() without
12a457
+                 * these flags. And subsequently, posix_writev() (mostly) will
12a457
+                 * do the write within inode->lock on an fd without O_SYNC or
12a457
+                 * O_DSYNC and in its place to an fsync() outside of the locks
12a457
+                 * to simulate the effect of using these flags.
12a457
+                 */
12a457
+                flags &= (~(O_SYNC|O_DSYNC));
12a457
+                fd = __fd_anonymous (inode, flags);
12a457
         }
12a457
         UNLOCK (&inode->lock);
12a457
 
12a457
@@ -811,7 +836,7 @@ fd_anonymous (inode_t *inode)
12a457
 }
12a457
 
12a457
 fd_t*
12a457
-fd_lookup_anonymous (inode_t *inode)
12a457
+fd_lookup_anonymous (inode_t *inode, int32_t flags)
12a457
 {
12a457
         fd_t *fd = NULL;
12a457
 
12a457
@@ -823,7 +848,7 @@ fd_lookup_anonymous (inode_t *inode)
12a457
 
12a457
         LOCK (&inode->lock);
12a457
         {
12a457
-                fd = __fd_lookup_anonymous (inode);
12a457
+                fd = __fd_lookup_anonymous (inode, flags);
12a457
         }
12a457
         UNLOCK (&inode->lock);
12a457
         return fd;
12a457
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
12a457
index 2e93f76..66e983d 100644
12a457
--- a/libglusterfs/src/fd.h
12a457
+++ b/libglusterfs/src/fd.h
12a457
@@ -141,11 +141,13 @@ fd_t *
12a457
 fd_lookup_uint64 (struct _inode *inode, uint64_t pid);
12a457
 
12a457
 fd_t*
12a457
-fd_lookup_anonymous (inode_t *inode);
12a457
+fd_lookup_anonymous (inode_t *inode, int32_t flags);
12a457
 
12a457
 fd_t *
12a457
 fd_anonymous (inode_t *inode);
12a457
 
12a457
+fd_t *
12a457
+fd_anonymous_with_flags (inode_t *inode, int32_t flags);
12a457
 
12a457
 gf_boolean_t
12a457
 fd_is_anonymous (fd_t *fd);
12a457
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
12a457
index 316c7cd..0353406 100644
12a457
--- a/xlators/cluster/afr/src/afr-transaction.c
12a457
+++ b/xlators/cluster/afr/src/afr-transaction.c
12a457
@@ -2091,7 +2091,8 @@ afr_transaction_start (call_frame_t *frame, xlator_t *this)
12a457
         if (!local->transaction.eager_lock_on && local->loc.inode) {
12a457
                 fd = fd_lookup (local->loc.inode, frame->root->pid);
12a457
                 if (fd == NULL)
12a457
-                        fd = fd_lookup_anonymous (local->loc.inode);
12a457
+                        fd = fd_lookup_anonymous (local->loc.inode,
12a457
+                                                  GF_ANON_FD_FLAGS);
12a457
 
12a457
                 if (fd) {
12a457
                         afr_delayed_changelog_wake_up (this, fd);
12a457
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
12a457
index dce6d49..f54878c 100644
12a457
--- a/xlators/features/shard/src/shard.c
12a457
+++ b/xlators/features/shard/src/shard.c
12a457
@@ -3638,6 +3638,7 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
12a457
         shard_local_t  *local             = NULL;
12a457
         struct iovec   *vec               = NULL;
12a457
         gf_boolean_t    wind_failed       = _gf_false;
12a457
+        gf_boolean_t    odirect           = _gf_false;
12a457
         off_t           orig_offset       = 0;
12a457
         off_t           shard_offset      = 0;
12a457
         off_t           vec_offset        = 0;
12a457
@@ -3668,6 +3669,9 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
12a457
                 return 0;
12a457
         }
12a457
 
12a457
+        if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
12a457
+                odirect = _gf_true;
12a457
+
12a457
         while (cur_block <= last_block) {
12a457
                 if (wind_failed) {
12a457
                         shard_common_inode_write_do_cbk (frame,
12a457
@@ -3725,6 +3729,13 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
12a457
                                                                  NULL, NULL);
12a457
                                 goto next;
12a457
                         }
12a457
+
12a457
+                        if (local->fop == GF_FOP_WRITE) {
12a457
+                                if (odirect)
12a457
+                                        local->flags = O_DIRECT;
12a457
+                                else
12a457
+                                        local->flags = GF_ANON_FD_FLAGS;
12a457
+                        }
12a457
                 }
12a457
 
12a457
                 shard_common_inode_write_wind (frame, this, anon_fd,
12a457
diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c
12a457
index e5990a1..167f6b7 100644
12a457
--- a/xlators/nfs/server/src/nfs-fops.c
12a457
+++ b/xlators/nfs/server/src/nfs-fops.c
12a457
@@ -1386,6 +1386,8 @@ nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
12a457
         call_frame_t            *frame = NULL;
12a457
         int                     ret = -EFAULT;
12a457
         struct nfs_fop_local    *nfl = NULL;
12a457
+        int flags = 0;
12a457
+        nfs3_call_state_t       *cs = local;
12a457
 
12a457
         if ((!nfsx) || (!xl) || (!fd) || (!vector) || (!nfu) || (!srciobref))
12a457
                 return ret;
12a457
@@ -1403,8 +1405,20 @@ nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
12a457
 
12a457
         iobref_add (nfl->iobref, srciob);
12a457
 */
12a457
+
12a457
+        switch (cs->writetype) {
12a457
+        case UNSTABLE:
12a457
+                break;
12a457
+        case DATA_SYNC:
12a457
+                flags |= O_DSYNC;
12a457
+                break;
12a457
+        case FILE_SYNC:
12a457
+                flags |= O_SYNC;
12a457
+                break;
12a457
+        }
12a457
+
12a457
         STACK_WIND_COOKIE (frame, nfs_fop_writev_cbk, xl, xl,xl->fops->writev,
12a457
-                           fd, vector, count, offset, fd->flags, srciobref, NULL);
12a457
+                           fd, vector, count, offset, flags, srciobref, NULL);
12a457
         ret = 0;
12a457
 err:
12a457
         if (ret < 0) {
12a457
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
12a457
index 4035491..aa7af31 100644
12a457
--- a/xlators/nfs/server/src/nfs3.c
12a457
+++ b/xlators/nfs/server/src/nfs3.c
12a457
@@ -2245,24 +2245,6 @@ nfs3_write_resume (void *carg)
12a457
 
12a457
         cs->fd = fd;    /* Gets unrefd when the call state is wiped. */
12a457
 
12a457
-/*
12a457
-  enum stable_how {
12a457
-  UNSTABLE = 0,
12a457
-  DATA_SYNC = 1,
12a457
-  FILE_SYNC = 2,
12a457
-  };
12a457
-*/
12a457
-	switch (cs->writetype) {
12a457
-	case UNSTABLE:
12a457
-		break;
12a457
-	case DATA_SYNC:
12a457
-		fd->flags |= O_DSYNC;
12a457
-		break;
12a457
-	case FILE_SYNC:
12a457
-		fd->flags |= O_SYNC;
12a457
-		break;
12a457
-	}
12a457
-
12a457
         ret = __nfs3_write_resume (cs);
12a457
         if (ret < 0)
12a457
                 stat = nfs3_errno_to_nfsstat3 (-ret);
12a457
diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c
12a457
index 26bf37e..4e6ec6a 100644
12a457
--- a/xlators/protocol/server/src/server-resolve.c
12a457
+++ b/xlators/protocol/server/src/server-resolve.c
12a457
@@ -458,7 +458,7 @@ resolve_anonfd_simple (call_frame_t *frame)
12a457
 
12a457
         ret = 0;
12a457
 
12a457
-        state->fd = fd_anonymous (inode);
12a457
+        state->fd = fd_anonymous_with_flags (inode, state->flags);
12a457
 out:
12a457
         if (inode)
12a457
                 inode_unref (inode);
12a457
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
12a457
index 7fb1a24..56c48bd 100644
12a457
--- a/xlators/storage/posix/src/posix-helpers.c
12a457
+++ b/xlators/storage/posix/src/posix-helpers.c
12a457
@@ -1687,6 +1687,7 @@ __posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd_p)
12a457
 
12a457
         pfd->fd = _fd;
12a457
         pfd->dir = dir;
12a457
+        pfd->flags = fd->flags;
12a457
 
12a457
         ret = __fd_ctx_set (fd, this, (uint64_t) (long) pfd);
12a457
         if (ret != 0) {
12a457
-- 
12a457
1.7.1
12a457