d1681e
From 0ce89d9d2bb0b162ecd4dc47c663569815acdb7b Mon Sep 17 00:00:00 2001
d1681e
From: Pranith Kumar K <pkarampu@redhat.com>
d1681e
Date: Mon, 19 Mar 2018 15:12:14 +0530
d1681e
Subject: [PATCH 195/201] storage/posix: Add active-fd-count option in gluster
d1681e
d1681e
Problem:
d1681e
when dd happens on sharded replicate volume all the writes on shards happen
d1681e
through anon-fd. When the writes don't come quick enough, old anon-fd closes
d1681e
and new fd gets created to serve the new writes. open-fd-count is decremented
d1681e
only after the fd is closed as part of fd_destroy(). So even when one fd is on
d1681e
the way to be closed a new fd will be created and during this short period it
d1681e
appears as though there are multiple fds opened on the file. AFR thinks another
d1681e
application opened the same file and switches off eager-lock leading to
d1681e
extra latency.
d1681e
d1681e
Fix:
d1681e
Have a different option called active-fd whose life cycle starts at
d1681e
fd_bind() and ends just before fd_destroy()
d1681e
d1681e
 >BUG: 1557932
d1681e
d1681e
Upstream-patch: https://review.gluster.org/19740
d1681e
BUG: 1491785
d1681e
Change-Id: I2e221f6030feeedf29fbb3bd6554673b8a5b9c94
d1681e
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
d1681e
Reviewed-on: https://code.engineering.redhat.com/gerrit/133659
d1681e
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d1681e
---
d1681e
 libglusterfs/src/fd.c                     |  2 ++
d1681e
 libglusterfs/src/glusterfs.h              |  1 +
d1681e
 libglusterfs/src/inode.c                  |  2 ++
d1681e
 libglusterfs/src/inode.h                  |  1 +
d1681e
 tests/volume.rc                           | 14 ++++++++-
d1681e
 xlators/storage/posix/src/posix-helpers.c | 52 ++++++++++++-------------------
d1681e
 xlators/storage/posix/src/posix.c         | 12 +++++++
d1681e
 7 files changed, 51 insertions(+), 33 deletions(-)
d1681e
d1681e
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
d1681e
index a824db7..45b0d32 100644
d1681e
--- a/libglusterfs/src/fd.c
d1681e
+++ b/libglusterfs/src/fd.c
d1681e
@@ -557,6 +557,7 @@ fd_unref (fd_t *fd)
d1681e
                 if (refcount == 0) {
d1681e
                         if (!list_empty (&fd->inode_list)) {
d1681e
                                 list_del_init (&fd->inode_list);
d1681e
+                                fd->inode->active_fd_count--;
d1681e
                                 bound = _gf_true;
d1681e
                         }
d1681e
                 }
d1681e
@@ -578,6 +579,7 @@ __fd_bind (fd_t *fd)
d1681e
         list_del_init (&fd->inode_list);
d1681e
         list_add (&fd->inode_list, &fd->inode->fd_list);
d1681e
         fd->inode->fd_count++;
d1681e
+        fd->inode->active_fd_count++;
d1681e
 
d1681e
         return fd;
d1681e
 }
d1681e
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
d1681e
index c8835d9..5abfafa 100644
d1681e
--- a/libglusterfs/src/glusterfs.h
d1681e
+++ b/libglusterfs/src/glusterfs.h
d1681e
@@ -164,6 +164,7 @@
d1681e
 #define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
d1681e
 #define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
d1681e
 #define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
d1681e
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
d1681e
 #define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
d1681e
 #define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
d1681e
 #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
d1681e
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
d1681e
index b7b5ac6..ffba1bf 100644
d1681e
--- a/libglusterfs/src/inode.c
d1681e
+++ b/libglusterfs/src/inode.c
d1681e
@@ -2344,6 +2344,8 @@ inode_dump (inode_t *inode, char *prefix)
d1681e
                 gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
d1681e
                 gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
d1681e
                 gf_proc_dump_write("fd-count", "%u", inode->fd_count);
d1681e
+                gf_proc_dump_write("active-fd-count", "%u",
d1681e
+                                   inode->active_fd_count);
d1681e
                 gf_proc_dump_write("ref", "%u", inode->ref);
d1681e
                 gf_proc_dump_write("ia_type", "%d", inode->ia_type);
d1681e
                 if (inode->_ctx) {
d1681e
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
d1681e
index b82b6ba..7a87748 100644
d1681e
--- a/libglusterfs/src/inode.h
d1681e
+++ b/libglusterfs/src/inode.h
d1681e
@@ -93,6 +93,7 @@ struct _inode {
d1681e
         gf_lock_t            lock;
d1681e
         uint64_t             nlookup;
d1681e
         uint32_t             fd_count;      /* Open fd count */
d1681e
+        uint32_t             active_fd_count;      /* Active open fd count */
d1681e
         uint32_t             ref;           /* reference count on this inode */
d1681e
         ia_type_t            ia_type;       /* what kind of file */
d1681e
         struct list_head     fd_list;       /* list of open files on this inode */
d1681e
diff --git a/tests/volume.rc b/tests/volume.rc
d1681e
index a15c8e5..d57aa93 100644
d1681e
--- a/tests/volume.rc
d1681e
+++ b/tests/volume.rc
d1681e
@@ -804,7 +804,19 @@ function get_fd_count {
d1681e
         local fname=$4
d1681e
         local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
d1681e
         local statedump=$(generate_brick_statedump $vol $host $brick)
d1681e
-        local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
d1681e
+        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w fd-count | cut -f2 -d'=' | tail -1)
d1681e
+        rm -f $statedump
d1681e
+        echo $count
d1681e
+}
d1681e
+
d1681e
+function get_active_fd_count {
d1681e
+        local vol=$1
d1681e
+        local host=$2
d1681e
+        local brick=$3
d1681e
+        local fname=$4
d1681e
+        local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
d1681e
+        local statedump=$(generate_brick_statedump $vol $host $brick)
d1681e
+        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1)
d1681e
         rm -f $statedump
d1681e
         echo $count
d1681e
 }
d1681e
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
d1681e
index bc97206..ba1d8c3 100644
d1681e
--- a/xlators/storage/posix/src/posix-helpers.c
d1681e
+++ b/xlators/storage/posix/src/posix-helpers.c
d1681e
@@ -388,27 +388,6 @@ _get_filler_inode (posix_xattr_filler_t *filler)
d1681e
 }
d1681e
 
d1681e
 static int
d1681e
-_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key)
d1681e
-{
d1681e
-        inode_t  *inode            = NULL;
d1681e
-        int      ret               = -1;
d1681e
-
d1681e
-        inode = _get_filler_inode (filler);
d1681e
-        if (!inode || gf_uuid_is_null (inode->gfid))
d1681e
-                        goto out;
d1681e
-
d1681e
-        ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
d1681e
-        if (ret < 0) {
d1681e
-                gf_msg (filler->this->name, GF_LOG_WARNING, 0,
d1681e
-                        P_MSG_DICT_SET_FAILED,
d1681e
-                        "Failed to set dictionary value for %s", key);
d1681e
-                goto out;
d1681e
-        }
d1681e
-out:
d1681e
-        return ret;
d1681e
-}
d1681e
-
d1681e
-static int
d1681e
 _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
d1681e
                       void *xattrargs)
d1681e
 {
d1681e
@@ -416,11 +395,11 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
d1681e
         int       ret      = -1;
d1681e
         char     *databuf  = NULL;
d1681e
         int       _fd      = -1;
d1681e
-        loc_t    *loc      = NULL;
d1681e
         ssize_t  req_size  = 0;
d1681e
         int32_t  list_offset = 0;
d1681e
         ssize_t  remaining_size = 0;
d1681e
         char     *xattr    = NULL;
d1681e
+        inode_t  *inode    = NULL;
d1681e
 
d1681e
         if (posix_xattr_ignorable (key))
d1681e
                 goto out;
d1681e
@@ -496,16 +475,25 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
d1681e
                         GF_FREE (databuf);
d1681e
                 }
d1681e
         } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
d1681e
-                ret = _posix_filler_get_openfd_count (filler, key);
d1681e
-                loc = filler->loc;
d1681e
-                if (loc) {
d1681e
-                        ret = dict_set_uint32 (filler->xattr, key,
d1681e
-                                               loc->inode->fd_count);
d1681e
-                        if (ret < 0)
d1681e
-                                gf_msg (filler->this->name, GF_LOG_WARNING, 0,
d1681e
-                                        P_MSG_XDATA_GETXATTR,
d1681e
-                                        "Failed to set dictionary value for %s",
d1681e
-                                        key);
d1681e
+                inode = _get_filler_inode (filler);
d1681e
+                if (!inode || gf_uuid_is_null (inode->gfid))
d1681e
+                                goto out;
d1681e
+                ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
d1681e
+                if (ret < 0) {
d1681e
+                        gf_msg (filler->this->name, GF_LOG_WARNING, 0,
d1681e
+                                P_MSG_DICT_SET_FAILED,
d1681e
+                                "Failed to set dictionary value for %s", key);
d1681e
+                }
d1681e
+        } else if (!strcmp (key, GLUSTERFS_ACTIVE_FD_COUNT)) {
d1681e
+                inode = _get_filler_inode (filler);
d1681e
+                if (!inode || gf_uuid_is_null (inode->gfid))
d1681e
+                                goto out;
d1681e
+                ret = dict_set_uint32 (filler->xattr, key,
d1681e
+                                       inode->active_fd_count);
d1681e
+                if (ret < 0) {
d1681e
+                        gf_msg (filler->this->name, GF_LOG_WARNING, 0,
d1681e
+                                P_MSG_DICT_SET_FAILED,
d1681e
+                                "Failed to set dictionary value for %s", key);
d1681e
                 }
d1681e
         } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
d1681e
                 /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
d1681e
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
d1681e
index a412e6d..6856e5e 100644
d1681e
--- a/xlators/storage/posix/src/posix.c
d1681e
+++ b/xlators/storage/posix/src/posix.c
d1681e
@@ -3554,6 +3554,18 @@ _fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
d1681e
                 }
d1681e
         }
d1681e
 
d1681e
+        if (dict_get (xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
d1681e
+                ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
d1681e
+                                       fd->inode->active_fd_count);
d1681e
+                if (ret < 0) {
d1681e
+                        gf_msg (this->name, GF_LOG_WARNING, 0,
d1681e
+                                P_MSG_DICT_SET_FAILED, "%s: Failed to set "
d1681e
+                                "dictionary value for %s",
d1681e
+                                uuid_utoa (fd->inode->gfid),
d1681e
+                                GLUSTERFS_ACTIVE_FD_COUNT);
d1681e
+                }
d1681e
+        }
d1681e
+
d1681e
         if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
d1681e
                 ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
d1681e
                                        is_append);
d1681e
-- 
d1681e
1.8.3.1
d1681e