From b97269044f8a541017d600cc8682dc51e62258c7 Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Mon, 2 May 2016 16:51:10 +0530
Subject: [PATCH 190/192] core, shard: Make shards inherit main file's O_DIRECT flag if present
Backport of: http://review.gluster.org/14191
If the application opens a file with O_DIRECT, the shards'
anon fds would also need to inherit the flag. Towards this,
shard xl would be passing the odirect flag in the @flags parameter
to the WRITEV fop. This will be used in anon fd resolution
and subsequent opening by posix xl.
Change-Id: Ide18f819d34bd3705eb95220baa2f1029a3e0102
BUG: 1339136
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/76045
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
libglusterfs/src/fd.c | 41 +++++++++++++++++++++-----
libglusterfs/src/fd.h | 4 ++-
xlators/cluster/afr/src/afr-transaction.c | 3 +-
xlators/features/shard/src/shard.c | 11 +++++++
xlators/nfs/server/src/nfs-fops.c | 16 +++++++++-
xlators/nfs/server/src/nfs3.c | 18 -----------
xlators/protocol/server/src/server-resolve.c | 2 +-
xlators/storage/posix/src/posix-helpers.c | 1 +
8 files changed, 66 insertions(+), 30 deletions(-)
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index 7d9a0cc..89ba357 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -749,7 +749,7 @@ fd_lookup_uint64 (inode_t *inode, uint64_t pid)
}
static fd_t *
-__fd_lookup_anonymous (inode_t *inode)
+__fd_lookup_anonymous (inode_t *inode, int32_t flags)
{
fd_t *iter_fd = NULL;
fd_t *fd = NULL;
@@ -758,7 +758,7 @@ __fd_lookup_anonymous (inode_t *inode)
return NULL;
list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- if (iter_fd->anonymous) {
+ if ((iter_fd->anonymous) && (flags == iter_fd->flags)) {
fd = __fd_ref (iter_fd);
break;
}
@@ -768,11 +768,11 @@ __fd_lookup_anonymous (inode_t *inode)
}
static fd_t *
-__fd_anonymous (inode_t *inode)
+__fd_anonymous (inode_t *inode, int32_t flags)
{
fd_t *fd = NULL;
- fd = __fd_lookup_anonymous (inode);
+ fd = __fd_lookup_anonymous (inode, flags);
/* if (fd); then we already have increased the refcount in
__fd_lookup_anonymous(), so no need of one more fd_ref().
@@ -785,7 +785,7 @@ __fd_anonymous (inode_t *inode)
return NULL;
fd->anonymous = _gf_true;
- fd->flags = GF_ANON_FD_FLAGS;
+ fd->flags = GF_ANON_FD_FLAGS|flags;
__fd_bind (fd);
@@ -803,7 +803,32 @@ fd_anonymous (inode_t *inode)
LOCK (&inode->lock);
{
- fd = __fd_anonymous (inode);
+ fd = __fd_anonymous (inode, GF_ANON_FD_FLAGS);
+ }
+ UNLOCK (&inode->lock);
+
+ return fd;
+}
+
+fd_t *
+fd_anonymous_with_flags (inode_t *inode, int32_t flags)
+{
+ fd_t *fd = NULL;
+
+ LOCK (&inode->lock);
+ {
+ if (flags == 0)
+ flags = GF_ANON_FD_FLAGS;
+ /* If this API is ever called with O_SYNC or O_DSYNC in @flags,
+ * reset the bits associated with these flags before calling
+ * __fd_anonymous(). That way, posix will do the open() without
+ * these flags. And subsequently, posix_writev() (mostly) will
+ * do the write within inode->lock on an fd without O_SYNC or
+ * O_DSYNC and in its place to an fsync() outside of the locks
+ * to simulate the effect of using these flags.
+ */
+ flags &= (~(O_SYNC|O_DSYNC));
+ fd = __fd_anonymous (inode, flags);
}
UNLOCK (&inode->lock);
@@ -811,7 +836,7 @@ fd_anonymous (inode_t *inode)
}
fd_t*
-fd_lookup_anonymous (inode_t *inode)
+fd_lookup_anonymous (inode_t *inode, int32_t flags)
{
fd_t *fd = NULL;
@@ -823,7 +848,7 @@ fd_lookup_anonymous (inode_t *inode)
LOCK (&inode->lock);
{
- fd = __fd_lookup_anonymous (inode);
+ fd = __fd_lookup_anonymous (inode, flags);
}
UNLOCK (&inode->lock);
return fd;
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
index 2e93f76..66e983d 100644
--- a/libglusterfs/src/fd.h
+++ b/libglusterfs/src/fd.h
@@ -141,11 +141,13 @@ fd_t *
fd_lookup_uint64 (struct _inode *inode, uint64_t pid);
fd_t*
-fd_lookup_anonymous (inode_t *inode);
+fd_lookup_anonymous (inode_t *inode, int32_t flags);
fd_t *
fd_anonymous (inode_t *inode);
+fd_t *
+fd_anonymous_with_flags (inode_t *inode, int32_t flags);
gf_boolean_t
fd_is_anonymous (fd_t *fd);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 316c7cd..0353406 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2091,7 +2091,8 @@ afr_transaction_start (call_frame_t *frame, xlator_t *this)
if (!local->transaction.eager_lock_on && local->loc.inode) {
fd = fd_lookup (local->loc.inode, frame->root->pid);
if (fd == NULL)
- fd = fd_lookup_anonymous (local->loc.inode);
+ fd = fd_lookup_anonymous (local->loc.inode,
+ GF_ANON_FD_FLAGS);
if (fd) {
afr_delayed_changelog_wake_up (this, fd);
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index dce6d49..f54878c 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -3638,6 +3638,7 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
shard_local_t *local = NULL;
struct iovec *vec = NULL;
gf_boolean_t wind_failed = _gf_false;
+ gf_boolean_t odirect = _gf_false;
off_t orig_offset = 0;
off_t shard_offset = 0;
off_t vec_offset = 0;
@@ -3668,6 +3669,9 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
return 0;
}
+ if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
+ odirect = _gf_true;
+
while (cur_block <= last_block) {
if (wind_failed) {
shard_common_inode_write_do_cbk (frame,
@@ -3725,6 +3729,13 @@ shard_common_inode_write_do (call_frame_t *frame, xlator_t *this)
NULL, NULL);
goto next;
}
+
+ if (local->fop == GF_FOP_WRITE) {
+ if (odirect)
+ local->flags = O_DIRECT;
+ else
+ local->flags = GF_ANON_FD_FLAGS;
+ }
}
shard_common_inode_write_wind (frame, this, anon_fd,
diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c
index e5990a1..167f6b7 100644
--- a/xlators/nfs/server/src/nfs-fops.c
+++ b/xlators/nfs/server/src/nfs-fops.c
@@ -1386,6 +1386,8 @@ nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
call_frame_t *frame = NULL;
int ret = -EFAULT;
struct nfs_fop_local *nfl = NULL;
+ int flags = 0;
+ nfs3_call_state_t *cs = local;
if ((!nfsx) || (!xl) || (!fd) || (!vector) || (!nfu) || (!srciobref))
return ret;
@@ -1403,8 +1405,20 @@ nfs_fop_write (xlator_t *nfsx, xlator_t *xl, nfs_user_t *nfu, fd_t *fd,
iobref_add (nfl->iobref, srciob);
*/
+
+ switch (cs->writetype) {
+ case UNSTABLE:
+ break;
+ case DATA_SYNC:
+ flags |= O_DSYNC;
+ break;
+ case FILE_SYNC:
+ flags |= O_SYNC;
+ break;
+ }
+
STACK_WIND_COOKIE (frame, nfs_fop_writev_cbk, xl, xl,xl->fops->writev,
- fd, vector, count, offset, fd->flags, srciobref, NULL);
+ fd, vector, count, offset, flags, srciobref, NULL);
ret = 0;
err:
if (ret < 0) {
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
index 4035491..aa7af31 100644
--- a/xlators/nfs/server/src/nfs3.c
+++ b/xlators/nfs/server/src/nfs3.c
@@ -2245,24 +2245,6 @@ nfs3_write_resume (void *carg)
cs->fd = fd; /* Gets unrefd when the call state is wiped. */
-/*
- enum stable_how {
- UNSTABLE = 0,
- DATA_SYNC = 1,
- FILE_SYNC = 2,
- };
-*/
- switch (cs->writetype) {
- case UNSTABLE:
- break;
- case DATA_SYNC:
- fd->flags |= O_DSYNC;
- break;
- case FILE_SYNC:
- fd->flags |= O_SYNC;
- break;
- }
-
ret = __nfs3_write_resume (cs);
if (ret < 0)
stat = nfs3_errno_to_nfsstat3 (-ret);
diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c
index 26bf37e..4e6ec6a 100644
--- a/xlators/protocol/server/src/server-resolve.c
+++ b/xlators/protocol/server/src/server-resolve.c
@@ -458,7 +458,7 @@ resolve_anonfd_simple (call_frame_t *frame)
ret = 0;
- state->fd = fd_anonymous (inode);
+ state->fd = fd_anonymous_with_flags (inode, state->flags);
out:
if (inode)
inode_unref (inode);
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 7fb1a24..56c48bd 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1687,6 +1687,7 @@ __posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd_p)
pfd->fd = _fd;
pfd->dir = dir;
+ pfd->flags = fd->flags;
ret = __fd_ctx_set (fd, this, (uint64_t) (long) pfd);
if (ret != 0) {
--
1.7.1