a3470f
From 0ce89d9d2bb0b162ecd4dc47c663569815acdb7b Mon Sep 17 00:00:00 2001
a3470f
From: Pranith Kumar K <pkarampu@redhat.com>
a3470f
Date: Mon, 19 Mar 2018 15:12:14 +0530
a3470f
Subject: [PATCH 195/201] storage/posix: Add active-fd-count option in gluster
a3470f
a3470f
Problem:
a3470f
when dd happens on sharded replicate volume all the writes on shards happen
a3470f
through anon-fd. When the writes don't come quick enough, old anon-fd closes
a3470f
and new fd gets created to serve the new writes. open-fd-count is decremented
a3470f
only after the fd is closed as part of fd_destroy(). So even when one fd is on
a3470f
the way to be closed a new fd will be created and during this short period it
a3470f
appears as though there are multiple fds opened on the file. AFR thinks another
a3470f
application opened the same file and switches off eager-lock leading to
a3470f
extra latency.
a3470f
a3470f
Fix:
a3470f
Have a different option called active-fd whose life cycle starts at
a3470f
fd_bind() and ends just before fd_destroy()
a3470f
a3470f
 >BUG: 1557932
a3470f
a3470f
Upstream-patch: https://review.gluster.org/19740
a3470f
BUG: 1491785
a3470f
Change-Id: I2e221f6030feeedf29fbb3bd6554673b8a5b9c94
a3470f
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
a3470f
Reviewed-on: https://code.engineering.redhat.com/gerrit/133659
a3470f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
a3470f
---
a3470f
 libglusterfs/src/fd.c                     |  2 ++
a3470f
 libglusterfs/src/glusterfs.h              |  1 +
a3470f
 libglusterfs/src/inode.c                  |  2 ++
a3470f
 libglusterfs/src/inode.h                  |  1 +
a3470f
 tests/volume.rc                           | 14 ++++++++-
a3470f
 xlators/storage/posix/src/posix-helpers.c | 52 ++++++++++++-------------------
a3470f
 xlators/storage/posix/src/posix.c         | 12 +++++++
a3470f
 7 files changed, 51 insertions(+), 33 deletions(-)
a3470f
a3470f
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
a3470f
index a824db7..45b0d32 100644
a3470f
--- a/libglusterfs/src/fd.c
a3470f
+++ b/libglusterfs/src/fd.c
a3470f
@@ -557,6 +557,7 @@ fd_unref (fd_t *fd)
a3470f
                 if (refcount == 0) {
a3470f
                         if (!list_empty (&fd->inode_list)) {
a3470f
                                 list_del_init (&fd->inode_list);
a3470f
+                                fd->inode->active_fd_count--;
a3470f
                                 bound = _gf_true;
a3470f
                         }
a3470f
                 }
a3470f
@@ -578,6 +579,7 @@ __fd_bind (fd_t *fd)
a3470f
         list_del_init (&fd->inode_list);
a3470f
         list_add (&fd->inode_list, &fd->inode->fd_list);
a3470f
         fd->inode->fd_count++;
a3470f
+        fd->inode->active_fd_count++;
a3470f
 
a3470f
         return fd;
a3470f
 }
a3470f
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
a3470f
index c8835d9..5abfafa 100644
a3470f
--- a/libglusterfs/src/glusterfs.h
a3470f
+++ b/libglusterfs/src/glusterfs.h
a3470f
@@ -164,6 +164,7 @@
a3470f
 #define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
a3470f
 #define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
a3470f
 #define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
a3470f
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
a3470f
 #define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
a3470f
 #define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
a3470f
 #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
a3470f
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
a3470f
index b7b5ac6..ffba1bf 100644
a3470f
--- a/libglusterfs/src/inode.c
a3470f
+++ b/libglusterfs/src/inode.c
a3470f
@@ -2344,6 +2344,8 @@ inode_dump (inode_t *inode, char *prefix)
a3470f
                 gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
a3470f
                 gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
a3470f
                 gf_proc_dump_write("fd-count", "%u", inode->fd_count);
a3470f
+                gf_proc_dump_write("active-fd-count", "%u",
a3470f
+                                   inode->active_fd_count);
a3470f
                 gf_proc_dump_write("ref", "%u", inode->ref);
a3470f
                 gf_proc_dump_write("ia_type", "%d", inode->ia_type);
a3470f
                 if (inode->_ctx) {
a3470f
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
a3470f
index b82b6ba..7a87748 100644
a3470f
--- a/libglusterfs/src/inode.h
a3470f
+++ b/libglusterfs/src/inode.h
a3470f
@@ -93,6 +93,7 @@ struct _inode {
a3470f
         gf_lock_t            lock;
a3470f
         uint64_t             nlookup;
a3470f
         uint32_t             fd_count;      /* Open fd count */
a3470f
+        uint32_t             active_fd_count;      /* Active open fd count */
a3470f
         uint32_t             ref;           /* reference count on this inode */
a3470f
         ia_type_t            ia_type;       /* what kind of file */
a3470f
         struct list_head     fd_list;       /* list of open files on this inode */
a3470f
diff --git a/tests/volume.rc b/tests/volume.rc
a3470f
index a15c8e5..d57aa93 100644
a3470f
--- a/tests/volume.rc
a3470f
+++ b/tests/volume.rc
a3470f
@@ -804,7 +804,19 @@ function get_fd_count {
a3470f
         local fname=$4
a3470f
         local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
a3470f
         local statedump=$(generate_brick_statedump $vol $host $brick)
a3470f
-        local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
a3470f
+        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w fd-count | cut -f2 -d'=' | tail -1)
a3470f
+        rm -f $statedump
a3470f
+        echo $count
a3470f
+}
a3470f
+
a3470f
+function get_active_fd_count {
a3470f
+        local vol=$1
a3470f
+        local host=$2
a3470f
+        local brick=$3
a3470f
+        local fname=$4
a3470f
+        local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
a3470f
+        local statedump=$(generate_brick_statedump $vol $host $brick)
a3470f
+        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1)
a3470f
         rm -f $statedump
a3470f
         echo $count
a3470f
 }
a3470f
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
a3470f
index bc97206..ba1d8c3 100644
a3470f
--- a/xlators/storage/posix/src/posix-helpers.c
a3470f
+++ b/xlators/storage/posix/src/posix-helpers.c
a3470f
@@ -388,27 +388,6 @@ _get_filler_inode (posix_xattr_filler_t *filler)
a3470f
 }
a3470f
 
a3470f
 static int
a3470f
-_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key)
a3470f
-{
a3470f
-        inode_t  *inode            = NULL;
a3470f
-        int      ret               = -1;
a3470f
-
a3470f
-        inode = _get_filler_inode (filler);
a3470f
-        if (!inode || gf_uuid_is_null (inode->gfid))
a3470f
-                        goto out;
a3470f
-
a3470f
-        ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
a3470f
-        if (ret < 0) {
a3470f
-                gf_msg (filler->this->name, GF_LOG_WARNING, 0,
a3470f
-                        P_MSG_DICT_SET_FAILED,
a3470f
-                        "Failed to set dictionary value for %s", key);
a3470f
-                goto out;
a3470f
-        }
a3470f
-out:
a3470f
-        return ret;
a3470f
-}
a3470f
-
a3470f
-static int
a3470f
 _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
a3470f
                       void *xattrargs)
a3470f
 {
a3470f
@@ -416,11 +395,11 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
a3470f
         int       ret      = -1;
a3470f
         char     *databuf  = NULL;
a3470f
         int       _fd      = -1;
a3470f
-        loc_t    *loc      = NULL;
a3470f
         ssize_t  req_size  = 0;
a3470f
         int32_t  list_offset = 0;
a3470f
         ssize_t  remaining_size = 0;
a3470f
         char     *xattr    = NULL;
a3470f
+        inode_t  *inode    = NULL;
a3470f
 
a3470f
         if (posix_xattr_ignorable (key))
a3470f
                 goto out;
a3470f
@@ -496,16 +475,25 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
a3470f
                         GF_FREE (databuf);
a3470f
                 }
a3470f
         } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
a3470f
-                ret = _posix_filler_get_openfd_count (filler, key);
a3470f
-                loc = filler->loc;
a3470f
-                if (loc) {
a3470f
-                        ret = dict_set_uint32 (filler->xattr, key,
a3470f
-                                               loc->inode->fd_count);
a3470f
-                        if (ret < 0)
a3470f
-                                gf_msg (filler->this->name, GF_LOG_WARNING, 0,
a3470f
-                                        P_MSG_XDATA_GETXATTR,
a3470f
-                                        "Failed to set dictionary value for %s",
a3470f
-                                        key);
a3470f
+                inode = _get_filler_inode (filler);
a3470f
+                if (!inode || gf_uuid_is_null (inode->gfid))
a3470f
+                                goto out;
a3470f
+                ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
a3470f
+                if (ret < 0) {
a3470f
+                        gf_msg (filler->this->name, GF_LOG_WARNING, 0,
a3470f
+                                P_MSG_DICT_SET_FAILED,
a3470f
+                                "Failed to set dictionary value for %s", key);
a3470f
+                }
a3470f
+        } else if (!strcmp (key, GLUSTERFS_ACTIVE_FD_COUNT)) {
a3470f
+                inode = _get_filler_inode (filler);
a3470f
+                if (!inode || gf_uuid_is_null (inode->gfid))
a3470f
+                                goto out;
a3470f
+                ret = dict_set_uint32 (filler->xattr, key,
a3470f
+                                       inode->active_fd_count);
a3470f
+                if (ret < 0) {
a3470f
+                        gf_msg (filler->this->name, GF_LOG_WARNING, 0,
a3470f
+                                P_MSG_DICT_SET_FAILED,
a3470f
+                                "Failed to set dictionary value for %s", key);
a3470f
                 }
a3470f
         } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
a3470f
                 /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
a3470f
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
a3470f
index a412e6d..6856e5e 100644
a3470f
--- a/xlators/storage/posix/src/posix.c
a3470f
+++ b/xlators/storage/posix/src/posix.c
a3470f
@@ -3554,6 +3554,18 @@ _fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
a3470f
                 }
a3470f
         }
a3470f
 
a3470f
+        if (dict_get (xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
a3470f
+                ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
a3470f
+                                       fd->inode->active_fd_count);
a3470f
+                if (ret < 0) {
a3470f
+                        gf_msg (this->name, GF_LOG_WARNING, 0,
a3470f
+                                P_MSG_DICT_SET_FAILED, "%s: Failed to set "
a3470f
+                                "dictionary value for %s",
a3470f
+                                uuid_utoa (fd->inode->gfid),
a3470f
+                                GLUSTERFS_ACTIVE_FD_COUNT);
a3470f
+                }
a3470f
+        }
a3470f
+
a3470f
         if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
a3470f
                 ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
a3470f
                                        is_append);
a3470f
-- 
a3470f
1.8.3.1
a3470f