e7a346
From 0ce89d9d2bb0b162ecd4dc47c663569815acdb7b Mon Sep 17 00:00:00 2001
e7a346
From: Pranith Kumar K <pkarampu@redhat.com>
e7a346
Date: Mon, 19 Mar 2018 15:12:14 +0530
e7a346
Subject: [PATCH 195/201] storage/posix: Add active-fd-count option in gluster
e7a346
e7a346
Problem:
e7a346
when dd happens on sharded replicate volume all the writes on shards happen
e7a346
through anon-fd. When the writes don't come quick enough, old anon-fd closes
e7a346
and new fd gets created to serve the new writes. open-fd-count is decremented
e7a346
only after the fd is closed as part of fd_destroy(). So even when one fd is on
e7a346
the way to be closed a new fd will be created and during this short period it
e7a346
appears as though there are multiple fds opened on the file. AFR thinks another
e7a346
application opened the same file and switches off eager-lock leading to
e7a346
extra latency.
e7a346
e7a346
Fix:
e7a346
Have a different option called active-fd whose life cycle starts at
e7a346
fd_bind() and ends just before fd_destroy()
e7a346
e7a346
 >BUG: 1557932
e7a346
e7a346
Upstream-patch: https://review.gluster.org/19740
e7a346
BUG: 1491785
e7a346
Change-Id: I2e221f6030feeedf29fbb3bd6554673b8a5b9c94
e7a346
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/133659
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
---
e7a346
 libglusterfs/src/fd.c                     |  2 ++
e7a346
 libglusterfs/src/glusterfs.h              |  1 +
e7a346
 libglusterfs/src/inode.c                  |  2 ++
e7a346
 libglusterfs/src/inode.h                  |  1 +
e7a346
 tests/volume.rc                           | 14 ++++++++-
e7a346
 xlators/storage/posix/src/posix-helpers.c | 52 ++++++++++++-------------------
e7a346
 xlators/storage/posix/src/posix.c         | 12 +++++++
e7a346
 7 files changed, 51 insertions(+), 33 deletions(-)
e7a346
e7a346
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
e7a346
index a824db7..45b0d32 100644
e7a346
--- a/libglusterfs/src/fd.c
e7a346
+++ b/libglusterfs/src/fd.c
e7a346
@@ -557,6 +557,7 @@ fd_unref (fd_t *fd)
e7a346
                 if (refcount == 0) {
e7a346
                         if (!list_empty (&fd->inode_list)) {
e7a346
                                 list_del_init (&fd->inode_list);
e7a346
+                                fd->inode->active_fd_count--;
e7a346
                                 bound = _gf_true;
e7a346
                         }
e7a346
                 }
e7a346
@@ -578,6 +579,7 @@ __fd_bind (fd_t *fd)
e7a346
         list_del_init (&fd->inode_list);
e7a346
         list_add (&fd->inode_list, &fd->inode->fd_list);
e7a346
         fd->inode->fd_count++;
e7a346
+        fd->inode->active_fd_count++;
e7a346
 
e7a346
         return fd;
e7a346
 }
e7a346
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
e7a346
index c8835d9..5abfafa 100644
e7a346
--- a/libglusterfs/src/glusterfs.h
e7a346
+++ b/libglusterfs/src/glusterfs.h
e7a346
@@ -164,6 +164,7 @@
e7a346
 #define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
e7a346
 #define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
e7a346
 #define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
e7a346
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
e7a346
 #define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
e7a346
 #define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
e7a346
 #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
e7a346
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
e7a346
index b7b5ac6..ffba1bf 100644
e7a346
--- a/libglusterfs/src/inode.c
e7a346
+++ b/libglusterfs/src/inode.c
e7a346
@@ -2344,6 +2344,8 @@ inode_dump (inode_t *inode, char *prefix)
e7a346
                 gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
e7a346
                 gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
e7a346
                 gf_proc_dump_write("fd-count", "%u", inode->fd_count);
e7a346
+                gf_proc_dump_write("active-fd-count", "%u",
e7a346
+                                   inode->active_fd_count);
e7a346
                 gf_proc_dump_write("ref", "%u", inode->ref);
e7a346
                 gf_proc_dump_write("ia_type", "%d", inode->ia_type);
e7a346
                 if (inode->_ctx) {
e7a346
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
e7a346
index b82b6ba..7a87748 100644
e7a346
--- a/libglusterfs/src/inode.h
e7a346
+++ b/libglusterfs/src/inode.h
e7a346
@@ -93,6 +93,7 @@ struct _inode {
e7a346
         gf_lock_t            lock;
e7a346
         uint64_t             nlookup;
e7a346
         uint32_t             fd_count;      /* Open fd count */
e7a346
+        uint32_t             active_fd_count;      /* Active open fd count */
e7a346
         uint32_t             ref;           /* reference count on this inode */
e7a346
         ia_type_t            ia_type;       /* what kind of file */
e7a346
         struct list_head     fd_list;       /* list of open files on this inode */
e7a346
diff --git a/tests/volume.rc b/tests/volume.rc
e7a346
index a15c8e5..d57aa93 100644
e7a346
--- a/tests/volume.rc
e7a346
+++ b/tests/volume.rc
e7a346
@@ -804,7 +804,19 @@ function get_fd_count {
e7a346
         local fname=$4
e7a346
         local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
e7a346
         local statedump=$(generate_brick_statedump $vol $host $brick)
e7a346
-        local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
e7a346
+        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w fd-count | cut -f2 -d'=' | tail -1)
e7a346
+        rm -f $statedump
e7a346
+        echo $count
e7a346
+}
e7a346
+
e7a346
+function get_active_fd_count {
e7a346
+        local vol=$1
e7a346
+        local host=$2
e7a346
+        local brick=$3
e7a346
+        local fname=$4
e7a346
+        local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
e7a346
+        local statedump=$(generate_brick_statedump $vol $host $brick)
e7a346
+        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1)
e7a346
         rm -f $statedump
e7a346
         echo $count
e7a346
 }
e7a346
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
e7a346
index bc97206..ba1d8c3 100644
e7a346
--- a/xlators/storage/posix/src/posix-helpers.c
e7a346
+++ b/xlators/storage/posix/src/posix-helpers.c
e7a346
@@ -388,27 +388,6 @@ _get_filler_inode (posix_xattr_filler_t *filler)
e7a346
 }
e7a346
 
e7a346
 static int
e7a346
-_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key)
e7a346
-{
e7a346
-        inode_t  *inode            = NULL;
e7a346
-        int      ret               = -1;
e7a346
-
e7a346
-        inode = _get_filler_inode (filler);
e7a346
-        if (!inode || gf_uuid_is_null (inode->gfid))
e7a346
-                        goto out;
e7a346
-
e7a346
-        ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
e7a346
-        if (ret < 0) {
e7a346
-                gf_msg (filler->this->name, GF_LOG_WARNING, 0,
e7a346
-                        P_MSG_DICT_SET_FAILED,
e7a346
-                        "Failed to set dictionary value for %s", key);
e7a346
-                goto out;
e7a346
-        }
e7a346
-out:
e7a346
-        return ret;
e7a346
-}
e7a346
-
e7a346
-static int
e7a346
 _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
e7a346
                       void *xattrargs)
e7a346
 {
e7a346
@@ -416,11 +395,11 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
e7a346
         int       ret      = -1;
e7a346
         char     *databuf  = NULL;
e7a346
         int       _fd      = -1;
e7a346
-        loc_t    *loc      = NULL;
e7a346
         ssize_t  req_size  = 0;
e7a346
         int32_t  list_offset = 0;
e7a346
         ssize_t  remaining_size = 0;
e7a346
         char     *xattr    = NULL;
e7a346
+        inode_t  *inode    = NULL;
e7a346
 
e7a346
         if (posix_xattr_ignorable (key))
e7a346
                 goto out;
e7a346
@@ -496,16 +475,25 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
e7a346
                         GF_FREE (databuf);
e7a346
                 }
e7a346
         } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
e7a346
-                ret = _posix_filler_get_openfd_count (filler, key);
e7a346
-                loc = filler->loc;
e7a346
-                if (loc) {
e7a346
-                        ret = dict_set_uint32 (filler->xattr, key,
e7a346
-                                               loc->inode->fd_count);
e7a346
-                        if (ret < 0)
e7a346
-                                gf_msg (filler->this->name, GF_LOG_WARNING, 0,
e7a346
-                                        P_MSG_XDATA_GETXATTR,
e7a346
-                                        "Failed to set dictionary value for %s",
e7a346
-                                        key);
e7a346
+                inode = _get_filler_inode (filler);
e7a346
+                if (!inode || gf_uuid_is_null (inode->gfid))
e7a346
+                                goto out;
e7a346
+                ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
e7a346
+                if (ret < 0) {
e7a346
+                        gf_msg (filler->this->name, GF_LOG_WARNING, 0,
e7a346
+                                P_MSG_DICT_SET_FAILED,
e7a346
+                                "Failed to set dictionary value for %s", key);
e7a346
+                }
e7a346
+        } else if (!strcmp (key, GLUSTERFS_ACTIVE_FD_COUNT)) {
e7a346
+                inode = _get_filler_inode (filler);
e7a346
+                if (!inode || gf_uuid_is_null (inode->gfid))
e7a346
+                                goto out;
e7a346
+                ret = dict_set_uint32 (filler->xattr, key,
e7a346
+                                       inode->active_fd_count);
e7a346
+                if (ret < 0) {
e7a346
+                        gf_msg (filler->this->name, GF_LOG_WARNING, 0,
e7a346
+                                P_MSG_DICT_SET_FAILED,
e7a346
+                                "Failed to set dictionary value for %s", key);
e7a346
                 }
e7a346
         } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
e7a346
                 /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
e7a346
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
e7a346
index a412e6d..6856e5e 100644
e7a346
--- a/xlators/storage/posix/src/posix.c
e7a346
+++ b/xlators/storage/posix/src/posix.c
e7a346
@@ -3554,6 +3554,18 @@ _fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
e7a346
                 }
e7a346
         }
e7a346
 
e7a346
+        if (dict_get (xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
e7a346
+                ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
e7a346
+                                       fd->inode->active_fd_count);
e7a346
+                if (ret < 0) {
e7a346
+                        gf_msg (this->name, GF_LOG_WARNING, 0,
e7a346
+                                P_MSG_DICT_SET_FAILED, "%s: Failed to set "
e7a346
+                                "dictionary value for %s",
e7a346
+                                uuid_utoa (fd->inode->gfid),
e7a346
+                                GLUSTERFS_ACTIVE_FD_COUNT);
e7a346
+                }
e7a346
+        }
e7a346
+
e7a346
         if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
e7a346
                 ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
e7a346
                                        is_append);
e7a346
-- 
e7a346
1.8.3.1
e7a346