7c2869
From 56cb9390434a26a0b2d9c364899eb436799fdf30 Mon Sep 17 00:00:00 2001
7c2869
From: Pranith Kumar K <pkarampu@redhat.com>
7c2869
Date: Mon, 19 Mar 2018 15:12:14 +0530
7c2869
Subject: [PATCH 668/675] storage/posix: Add active-fd-count option in gluster
7c2869
7c2869
Problem:
7c2869
when dd happens on sharded replicate volume all the writes on shards happen
7c2869
through anon-fd. When the writes don't come quick enough, old anon-fd closes
7c2869
and new fd gets created to serve the new writes. open-fd-count is decremented
7c2869
only after the fd is closed as part of fd_destroy(). So even when one fd is on
7c2869
the way to be closed a new fd will be created and during this short period it
7c2869
appears as though there are multiple fds opened on the file. AFR thinks another
7c2869
application opened the same file and switches off eager-lock leading to
7c2869
extra latency.
7c2869
7c2869
Fix:
7c2869
Have a different option called active-fd whose life cycle starts at
7c2869
fd_bind() and ends just before fd_destroy()
7c2869
7c2869
 >BUG: 1557932
7c2869
7c2869
Change-Id: If6cadbe97d183e124f9dc4672a5b621cbe3324cb
7c2869
Upstream-patch: https://review.gluster.org/19740
7c2869
BUG: 1583733
7c2869
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
7c2869
Reviewed-on: https://code.engineering.redhat.com/gerrit/140573
7c2869
Tested-by: RHGS Build Bot <nigelb@redhat.com>
7c2869
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
7c2869
---
7c2869
 libglusterfs/src/fd.c                     |  2 ++
7c2869
 libglusterfs/src/glusterfs.h              |  1 +
7c2869
 libglusterfs/src/inode.c                  |  2 ++
7c2869
 libglusterfs/src/inode.h                  |  1 +
7c2869
 tests/volume.rc                           | 12 +++++++
7c2869
 xlators/storage/posix/src/posix-helpers.c | 52 ++++++++++++-------------------
7c2869
 xlators/storage/posix/src/posix.c         | 12 +++++++
7c2869
 7 files changed, 50 insertions(+), 32 deletions(-)
7c2869
7c2869
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
7c2869
index 118f876..d91f4de 100644
7c2869
--- a/libglusterfs/src/fd.c
7c2869
+++ b/libglusterfs/src/fd.c
7c2869
@@ -557,6 +557,7 @@ fd_unref (fd_t *fd)
7c2869
                 if (refcount == 0) {
7c2869
                         if (!list_empty (&fd->inode_list)) {
7c2869
                                 list_del_init (&fd->inode_list);
7c2869
+                                fd->inode->active_fd_count--;
7c2869
                                 bound = _gf_true;
7c2869
                         }
7c2869
                 }
7c2869
@@ -578,6 +579,7 @@ __fd_bind (fd_t *fd)
7c2869
         list_del_init (&fd->inode_list);
7c2869
         list_add (&fd->inode_list, &fd->inode->fd_list);
7c2869
         fd->inode->fd_count++;
7c2869
+        fd->inode->active_fd_count++;
7c2869
 
7c2869
         return fd;
7c2869
 }
7c2869
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
7c2869
index d13e5bd..277511a 100644
7c2869
--- a/libglusterfs/src/glusterfs.h
7c2869
+++ b/libglusterfs/src/glusterfs.h
7c2869
@@ -159,6 +159,7 @@
7c2869
 #define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
7c2869
 #define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
7c2869
 #define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
7c2869
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
7c2869
 #define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
7c2869
 #define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
7c2869
 #define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
7c2869
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
7c2869
index 0353825..2f67cf4 100644
7c2869
--- a/libglusterfs/src/inode.c
7c2869
+++ b/libglusterfs/src/inode.c
7c2869
@@ -2332,6 +2332,8 @@ inode_dump (inode_t *inode, char *prefix)
7c2869
                 gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
7c2869
                 gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
7c2869
                 gf_proc_dump_write("fd-count", "%u", inode->fd_count);
7c2869
+                gf_proc_dump_write("active-fd-count", "%u",
7c2869
+                                   inode->active_fd_count);
7c2869
                 gf_proc_dump_write("ref", "%u", inode->ref);
7c2869
                 gf_proc_dump_write("ia_type", "%d", inode->ia_type);
7c2869
                 if (inode->_ctx) {
7c2869
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
7c2869
index e4ad046..506f495 100644
7c2869
--- a/libglusterfs/src/inode.h
7c2869
+++ b/libglusterfs/src/inode.h
7c2869
@@ -89,6 +89,7 @@ struct _inode {
7c2869
         gf_lock_t            lock;
7c2869
         uint64_t             nlookup;
7c2869
         uint32_t             fd_count;      /* Open fd count */
7c2869
+        uint32_t             active_fd_count;      /* Active open fd count */
7c2869
         uint32_t             ref;           /* reference count on this inode */
7c2869
         ia_type_t            ia_type;       /* what kind of file */
7c2869
         struct list_head     fd_list;       /* list of open files on this inode */
7c2869
diff --git a/tests/volume.rc b/tests/volume.rc
7c2869
index 9ab31af..0d20b0e 100644
7c2869
--- a/tests/volume.rc
7c2869
+++ b/tests/volume.rc
7c2869
@@ -778,3 +778,15 @@ function get_mount_lru_size_value {
7c2869
         rm -f $statedump
7c2869
         echo $val
7c2869
 }
7c2869
+
7c2869
+function get_active_fd_count {
7c2869
+        local vol=$1
7c2869
+        local host=$2
7c2869
+        local brick=$3
7c2869
+        local fname=$4
7c2869
+        local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
7c2869
+        local statedump=$(generate_brick_statedump $vol $host $brick)
7c2869
+        local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1)
7c2869
+         rm -f $statedump
7c2869
+         echo $count
7c2869
+}
7c2869
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
7c2869
index 073465b..5f8304e 100644
7c2869
--- a/xlators/storage/posix/src/posix-helpers.c
7c2869
+++ b/xlators/storage/posix/src/posix-helpers.c
7c2869
@@ -371,27 +371,6 @@ _get_filler_inode (posix_xattr_filler_t *filler)
7c2869
 }
7c2869
 
7c2869
 static int
7c2869
-_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key)
7c2869
-{
7c2869
-        inode_t  *inode            = NULL;
7c2869
-        int      ret               = -1;
7c2869
-
7c2869
-        inode = _get_filler_inode (filler);
7c2869
-        if (!inode || gf_uuid_is_null (inode->gfid))
7c2869
-                        goto out;
7c2869
-
7c2869
-        ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
7c2869
-        if (ret < 0) {
7c2869
-                gf_msg (filler->this->name, GF_LOG_WARNING, 0,
7c2869
-                        P_MSG_DICT_SET_FAILED,
7c2869
-                        "Failed to set dictionary value for %s", key);
7c2869
-                goto out;
7c2869
-        }
7c2869
-out:
7c2869
-        return ret;
7c2869
-}
7c2869
-
7c2869
-static int
7c2869
 _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
7c2869
                       void *xattrargs)
7c2869
 {
7c2869
@@ -399,8 +378,8 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
7c2869
         int       ret      = -1;
7c2869
         char     *databuf  = NULL;
7c2869
         int       _fd      = -1;
7c2869
-        loc_t    *loc      = NULL;
7c2869
         ssize_t  req_size  = 0;
7c2869
+        inode_t  *inode    = NULL;
7c2869
 
7c2869
 
7c2869
         if (posix_xattr_ignorable (key))
7c2869
@@ -477,16 +456,25 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
7c2869
                         GF_FREE (databuf);
7c2869
                 }
7c2869
         } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
7c2869
-                ret = _posix_filler_get_openfd_count (filler, key);
7c2869
-                loc = filler->loc;
7c2869
-                if (loc) {
7c2869
-                        ret = dict_set_uint32 (filler->xattr, key,
7c2869
-                                               loc->inode->fd_count);
7c2869
-                        if (ret < 0)
7c2869
-                                gf_msg (filler->this->name, GF_LOG_WARNING, 0,
7c2869
-                                        P_MSG_XDATA_GETXATTR,
7c2869
-                                        "Failed to set dictionary value for %s",
7c2869
-                                        key);
7c2869
+                inode = _get_filler_inode (filler);
7c2869
+                if (!inode || gf_uuid_is_null (inode->gfid))
7c2869
+                                goto out;
7c2869
+                ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
7c2869
+                if (ret < 0) {
7c2869
+                        gf_msg (filler->this->name, GF_LOG_WARNING, 0,
7c2869
+                                P_MSG_DICT_SET_FAILED,
7c2869
+                                "Failed to set dictionary value for %s", key);
7c2869
+                }
7c2869
+        } else if (!strcmp (key, GLUSTERFS_ACTIVE_FD_COUNT)) {
7c2869
+                inode = _get_filler_inode (filler);
7c2869
+                if (!inode || gf_uuid_is_null (inode->gfid))
7c2869
+                                goto out;
7c2869
+                ret = dict_set_uint32 (filler->xattr, key,
7c2869
+                                       inode->active_fd_count);
7c2869
+                if (ret < 0) {
7c2869
+                        gf_msg (filler->this->name, GF_LOG_WARNING, 0,
7c2869
+                                P_MSG_DICT_SET_FAILED,
7c2869
+                                "Failed to set dictionary value for %s", key);
7c2869
                 }
7c2869
         } else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
7c2869
                 /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
7c2869
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
7c2869
index d499e09..b0d7037 100644
7c2869
--- a/xlators/storage/posix/src/posix.c
7c2869
+++ b/xlators/storage/posix/src/posix.c
7c2869
@@ -3353,6 +3353,18 @@ _fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
7c2869
                 }
7c2869
         }
7c2869
 
7c2869
+        if (dict_get (xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
7c2869
+                ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
7c2869
+                                       fd->inode->active_fd_count);
7c2869
+                if (ret < 0) {
7c2869
+                        gf_msg (this->name, GF_LOG_WARNING, 0,
7c2869
+                                P_MSG_DICT_SET_FAILED, "%s: Failed to set "
7c2869
+                                "dictionary value for %s",
7c2869
+                                uuid_utoa (fd->inode->gfid),
7c2869
+                                GLUSTERFS_ACTIVE_FD_COUNT);
7c2869
+                }
7c2869
+        }
7c2869
+
7c2869
         if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
7c2869
                 ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
7c2869
                                        is_append);
7c2869
-- 
7c2869
1.8.3.1
7c2869