From 56cb9390434a26a0b2d9c364899eb436799fdf30 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 19 Mar 2018 15:12:14 +0530 Subject: [PATCH 668/675] storage/posix: Add active-fd-count option in gluster Problem: when dd happens on sharded replicate volume all the writes on shards happen through anon-fd. When the writes don't come quick enough, old anon-fd closes and new fd gets created to serve the new writes. open-fd-count is decremented only after the fd is closed as part of fd_destroy(). So even when one fd is on the way to be closed a new fd will be created and during this short period it appears as though there are multiple fds opened on the file. AFR thinks another application opened the same file and switches off eager-lock leading to extra latency. Fix: Have a different option called active-fd whose life cycle starts at fd_bind() and ends just before fd_destroy() >BUG: 1557932 Change-Id: If6cadbe97d183e124f9dc4672a5b621cbe3324cb Upstream-patch: https://review.gluster.org/19740 BUG: 1583733 Signed-off-by: Pranith Kumar K Reviewed-on: https://code.engineering.redhat.com/gerrit/140573 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- libglusterfs/src/fd.c | 2 ++ libglusterfs/src/glusterfs.h | 1 + libglusterfs/src/inode.c | 2 ++ libglusterfs/src/inode.h | 1 + tests/volume.rc | 12 +++++++ xlators/storage/posix/src/posix-helpers.c | 52 ++++++++++++------------------- xlators/storage/posix/src/posix.c | 12 +++++++ 7 files changed, 50 insertions(+), 32 deletions(-) diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c index 118f876..d91f4de 100644 --- a/libglusterfs/src/fd.c +++ b/libglusterfs/src/fd.c @@ -557,6 +557,7 @@ fd_unref (fd_t *fd) if (refcount == 0) { if (!list_empty (&fd->inode_list)) { list_del_init (&fd->inode_list); + fd->inode->active_fd_count--; bound = _gf_true; } } @@ -578,6 +579,7 @@ __fd_bind (fd_t *fd) list_del_init (&fd->inode_list); list_add (&fd->inode_list, &fd->inode->fd_list); fd->inode->fd_count++; + fd->inode->active_fd_count++; return fd; } diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index d13e5bd..277511a 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -159,6 +159,7 @@ #define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append" #define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic" #define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count" +#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count" #define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count" #define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count" #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count" diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 0353825..2f67cf4 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -2332,6 +2332,8 @@ inode_dump (inode_t *inode, char *prefix) gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid)); gf_proc_dump_write("nlookup", "%ld", inode->nlookup); gf_proc_dump_write("fd-count", "%u", inode->fd_count); + gf_proc_dump_write("active-fd-count", "%u", + inode->active_fd_count); gf_proc_dump_write("ref", "%u", inode->ref); gf_proc_dump_write("ia_type", "%d", inode->ia_type); if (inode->_ctx) { diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h index e4ad046..506f495 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/inode.h @@ -89,6 +89,7 @@ struct _inode { gf_lock_t lock; uint64_t nlookup; uint32_t fd_count; /* Open fd count */ + uint32_t active_fd_count; /* Active open fd count */ uint32_t ref; /* reference count on this inode */ ia_type_t ia_type; /* what kind of file */ struct list_head fd_list; /* list of open files on this inode */ diff --git a/tests/volume.rc b/tests/volume.rc index 9ab31af..0d20b0e 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -778,3 +778,15 @@ function get_mount_lru_size_value { rm -f $statedump echo $val } + +function get_active_fd_count { + local vol=$1 + local host=$2 + local brick=$3 + local fname=$4 + local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname)) + local statedump=$(generate_brick_statedump $vol $host $brick) + local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1) + rm -f $statedump + echo $count +} diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 073465b..5f8304e 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -371,27 +371,6 @@ _get_filler_inode (posix_xattr_filler_t *filler) } static int -_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key) -{ - inode_t *inode = NULL; - int ret = -1; - - inode = _get_filler_inode (filler); - if (!inode || gf_uuid_is_null (inode->gfid)) - goto out; - - ret = dict_set_uint32 (filler->xattr, key, inode->fd_count); - if (ret < 0) { - gf_msg (filler->this->name, GF_LOG_WARNING, 0, - P_MSG_DICT_SET_FAILED, - "Failed to set dictionary value for %s", key); - goto out; - } -out: - return ret; -} - -static int _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data, void *xattrargs) { @@ -399,8 +378,8 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data, int ret = -1; char *databuf = NULL; int _fd = -1; - loc_t *loc = NULL; ssize_t req_size = 0; + inode_t *inode = NULL; if (posix_xattr_ignorable (key)) @@ -477,16 +456,25 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data, GF_FREE (databuf); } } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) { - ret = _posix_filler_get_openfd_count (filler, key); - loc = filler->loc; - if (loc) { - ret = dict_set_uint32 (filler->xattr, key, - loc->inode->fd_count); - if (ret < 0) - gf_msg (filler->this->name, GF_LOG_WARNING, 0, - P_MSG_XDATA_GETXATTR, - "Failed to set dictionary value for %s", - key); + inode = _get_filler_inode (filler); + if (!inode || gf_uuid_is_null (inode->gfid)) + goto out; + ret = dict_set_uint32 (filler->xattr, key, inode->fd_count); + if (ret < 0) { + gf_msg (filler->this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, + "Failed to set dictionary value for %s", key); + } + } else if (!strcmp (key, GLUSTERFS_ACTIVE_FD_COUNT)) { + inode = _get_filler_inode (filler); + if (!inode || gf_uuid_is_null (inode->gfid)) + goto out; + ret = dict_set_uint32 (filler->xattr, key, + inode->active_fd_count); + if (ret < 0) { + gf_msg (filler->this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, + "Failed to set dictionary value for %s", key); } } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) { /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index d499e09..b0d7037 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3353,6 +3353,18 @@ _fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) } } + if (dict_get (xdata, GLUSTERFS_ACTIVE_FD_COUNT)) { + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT, + fd->inode->active_fd_count); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, "%s: Failed to set " + "dictionary value for %s", + uuid_utoa (fd->inode->gfid), + GLUSTERFS_ACTIVE_FD_COUNT); + } + } + if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append); -- 1.8.3.1