|
|
d1681e |
From 0ce89d9d2bb0b162ecd4dc47c663569815acdb7b Mon Sep 17 00:00:00 2001
|
|
|
d1681e |
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
d1681e |
Date: Mon, 19 Mar 2018 15:12:14 +0530
|
|
|
d1681e |
Subject: [PATCH 195/201] storage/posix: Add active-fd-count option in gluster
|
|
|
d1681e |
|
|
|
d1681e |
Problem:
|
|
|
d1681e |
when dd happens on sharded replicate volume all the writes on shards happen
|
|
|
d1681e |
through anon-fd. When the writes don't come quick enough, old anon-fd closes
|
|
|
d1681e |
and new fd gets created to serve the new writes. open-fd-count is decremented
|
|
|
d1681e |
only after the fd is closed as part of fd_destroy(). So even when one fd is on
|
|
|
d1681e |
the way to be closed a new fd will be created and during this short period it
|
|
|
d1681e |
appears as though there are multiple fds opened on the file. AFR thinks another
|
|
|
d1681e |
application opened the same file and switches off eager-lock leading to
|
|
|
d1681e |
extra latency.
|
|
|
d1681e |
|
|
|
d1681e |
Fix:
|
|
|
d1681e |
Have a different option called active-fd whose life cycle starts at
|
|
|
d1681e |
fd_bind() and ends just before fd_destroy()
|
|
|
d1681e |
|
|
|
d1681e |
>BUG: 1557932
|
|
|
d1681e |
|
|
|
d1681e |
Upstream-patch: https://review.gluster.org/19740
|
|
|
d1681e |
BUG: 1491785
|
|
|
d1681e |
Change-Id: I2e221f6030feeedf29fbb3bd6554673b8a5b9c94
|
|
|
d1681e |
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
d1681e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/133659
|
|
|
d1681e |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
d1681e |
---
|
|
|
d1681e |
libglusterfs/src/fd.c | 2 ++
|
|
|
d1681e |
libglusterfs/src/glusterfs.h | 1 +
|
|
|
d1681e |
libglusterfs/src/inode.c | 2 ++
|
|
|
d1681e |
libglusterfs/src/inode.h | 1 +
|
|
|
d1681e |
tests/volume.rc | 14 ++++++++-
|
|
|
d1681e |
xlators/storage/posix/src/posix-helpers.c | 52 ++++++++++++-------------------
|
|
|
d1681e |
xlators/storage/posix/src/posix.c | 12 +++++++
|
|
|
d1681e |
7 files changed, 51 insertions(+), 33 deletions(-)
|
|
|
d1681e |
|
|
|
d1681e |
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
|
|
|
d1681e |
index a824db7..45b0d32 100644
|
|
|
d1681e |
--- a/libglusterfs/src/fd.c
|
|
|
d1681e |
+++ b/libglusterfs/src/fd.c
|
|
|
d1681e |
@@ -557,6 +557,7 @@ fd_unref (fd_t *fd)
|
|
|
d1681e |
if (refcount == 0) {
|
|
|
d1681e |
if (!list_empty (&fd->inode_list)) {
|
|
|
d1681e |
list_del_init (&fd->inode_list);
|
|
|
d1681e |
+ fd->inode->active_fd_count--;
|
|
|
d1681e |
bound = _gf_true;
|
|
|
d1681e |
}
|
|
|
d1681e |
}
|
|
|
d1681e |
@@ -578,6 +579,7 @@ __fd_bind (fd_t *fd)
|
|
|
d1681e |
list_del_init (&fd->inode_list);
|
|
|
d1681e |
list_add (&fd->inode_list, &fd->inode->fd_list);
|
|
|
d1681e |
fd->inode->fd_count++;
|
|
|
d1681e |
+ fd->inode->active_fd_count++;
|
|
|
d1681e |
|
|
|
d1681e |
return fd;
|
|
|
d1681e |
}
|
|
|
d1681e |
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
|
|
|
d1681e |
index c8835d9..5abfafa 100644
|
|
|
d1681e |
--- a/libglusterfs/src/glusterfs.h
|
|
|
d1681e |
+++ b/libglusterfs/src/glusterfs.h
|
|
|
d1681e |
@@ -164,6 +164,7 @@
|
|
|
d1681e |
#define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
|
|
|
d1681e |
#define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
|
|
|
d1681e |
#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
|
|
|
d1681e |
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
|
|
|
d1681e |
#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
|
|
|
d1681e |
#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
|
|
|
d1681e |
#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
|
|
|
d1681e |
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
|
|
|
d1681e |
index b7b5ac6..ffba1bf 100644
|
|
|
d1681e |
--- a/libglusterfs/src/inode.c
|
|
|
d1681e |
+++ b/libglusterfs/src/inode.c
|
|
|
d1681e |
@@ -2344,6 +2344,8 @@ inode_dump (inode_t *inode, char *prefix)
|
|
|
d1681e |
gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
|
|
|
d1681e |
gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
|
|
|
d1681e |
gf_proc_dump_write("fd-count", "%u", inode->fd_count);
|
|
|
d1681e |
+ gf_proc_dump_write("active-fd-count", "%u",
|
|
|
d1681e |
+ inode->active_fd_count);
|
|
|
d1681e |
gf_proc_dump_write("ref", "%u", inode->ref);
|
|
|
d1681e |
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
|
|
|
d1681e |
if (inode->_ctx) {
|
|
|
d1681e |
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
|
|
|
d1681e |
index b82b6ba..7a87748 100644
|
|
|
d1681e |
--- a/libglusterfs/src/inode.h
|
|
|
d1681e |
+++ b/libglusterfs/src/inode.h
|
|
|
d1681e |
@@ -93,6 +93,7 @@ struct _inode {
|
|
|
d1681e |
gf_lock_t lock;
|
|
|
d1681e |
uint64_t nlookup;
|
|
|
d1681e |
uint32_t fd_count; /* Open fd count */
|
|
|
d1681e |
+ uint32_t active_fd_count; /* Active open fd count */
|
|
|
d1681e |
uint32_t ref; /* reference count on this inode */
|
|
|
d1681e |
ia_type_t ia_type; /* what kind of file */
|
|
|
d1681e |
struct list_head fd_list; /* list of open files on this inode */
|
|
|
d1681e |
diff --git a/tests/volume.rc b/tests/volume.rc
|
|
|
d1681e |
index a15c8e5..d57aa93 100644
|
|
|
d1681e |
--- a/tests/volume.rc
|
|
|
d1681e |
+++ b/tests/volume.rc
|
|
|
d1681e |
@@ -804,7 +804,19 @@ function get_fd_count {
|
|
|
d1681e |
local fname=$4
|
|
|
d1681e |
local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
|
|
|
d1681e |
local statedump=$(generate_brick_statedump $vol $host $brick)
|
|
|
d1681e |
- local count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
|
|
|
d1681e |
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w fd-count | cut -f2 -d'=' | tail -1)
|
|
|
d1681e |
+ rm -f $statedump
|
|
|
d1681e |
+ echo $count
|
|
|
d1681e |
+}
|
|
|
d1681e |
+
|
|
|
d1681e |
+function get_active_fd_count {
|
|
|
d1681e |
+ local vol=$1
|
|
|
d1681e |
+ local host=$2
|
|
|
d1681e |
+ local brick=$3
|
|
|
d1681e |
+ local fname=$4
|
|
|
d1681e |
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
|
|
|
d1681e |
+ local statedump=$(generate_brick_statedump $vol $host $brick)
|
|
|
d1681e |
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1)
|
|
|
d1681e |
rm -f $statedump
|
|
|
d1681e |
echo $count
|
|
|
d1681e |
}
|
|
|
d1681e |
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
|
|
|
d1681e |
index bc97206..ba1d8c3 100644
|
|
|
d1681e |
--- a/xlators/storage/posix/src/posix-helpers.c
|
|
|
d1681e |
+++ b/xlators/storage/posix/src/posix-helpers.c
|
|
|
d1681e |
@@ -388,27 +388,6 @@ _get_filler_inode (posix_xattr_filler_t *filler)
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
static int
|
|
|
d1681e |
-_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key)
|
|
|
d1681e |
-{
|
|
|
d1681e |
- inode_t *inode = NULL;
|
|
|
d1681e |
- int ret = -1;
|
|
|
d1681e |
-
|
|
|
d1681e |
- inode = _get_filler_inode (filler);
|
|
|
d1681e |
- if (!inode || gf_uuid_is_null (inode->gfid))
|
|
|
d1681e |
- goto out;
|
|
|
d1681e |
-
|
|
|
d1681e |
- ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
|
|
|
d1681e |
- if (ret < 0) {
|
|
|
d1681e |
- gf_msg (filler->this->name, GF_LOG_WARNING, 0,
|
|
|
d1681e |
- P_MSG_DICT_SET_FAILED,
|
|
|
d1681e |
- "Failed to set dictionary value for %s", key);
|
|
|
d1681e |
- goto out;
|
|
|
d1681e |
- }
|
|
|
d1681e |
-out:
|
|
|
d1681e |
- return ret;
|
|
|
d1681e |
-}
|
|
|
d1681e |
-
|
|
|
d1681e |
-static int
|
|
|
d1681e |
_posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
|
|
|
d1681e |
void *xattrargs)
|
|
|
d1681e |
{
|
|
|
d1681e |
@@ -416,11 +395,11 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
|
|
|
d1681e |
int ret = -1;
|
|
|
d1681e |
char *databuf = NULL;
|
|
|
d1681e |
int _fd = -1;
|
|
|
d1681e |
- loc_t *loc = NULL;
|
|
|
d1681e |
ssize_t req_size = 0;
|
|
|
d1681e |
int32_t list_offset = 0;
|
|
|
d1681e |
ssize_t remaining_size = 0;
|
|
|
d1681e |
char *xattr = NULL;
|
|
|
d1681e |
+ inode_t *inode = NULL;
|
|
|
d1681e |
|
|
|
d1681e |
if (posix_xattr_ignorable (key))
|
|
|
d1681e |
goto out;
|
|
|
d1681e |
@@ -496,16 +475,25 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
|
|
|
d1681e |
GF_FREE (databuf);
|
|
|
d1681e |
}
|
|
|
d1681e |
} else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
|
|
|
d1681e |
- ret = _posix_filler_get_openfd_count (filler, key);
|
|
|
d1681e |
- loc = filler->loc;
|
|
|
d1681e |
- if (loc) {
|
|
|
d1681e |
- ret = dict_set_uint32 (filler->xattr, key,
|
|
|
d1681e |
- loc->inode->fd_count);
|
|
|
d1681e |
- if (ret < 0)
|
|
|
d1681e |
- gf_msg (filler->this->name, GF_LOG_WARNING, 0,
|
|
|
d1681e |
- P_MSG_XDATA_GETXATTR,
|
|
|
d1681e |
- "Failed to set dictionary value for %s",
|
|
|
d1681e |
- key);
|
|
|
d1681e |
+ inode = _get_filler_inode (filler);
|
|
|
d1681e |
+ if (!inode || gf_uuid_is_null (inode->gfid))
|
|
|
d1681e |
+ goto out;
|
|
|
d1681e |
+ ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
|
|
|
d1681e |
+ if (ret < 0) {
|
|
|
d1681e |
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
|
|
|
d1681e |
+ P_MSG_DICT_SET_FAILED,
|
|
|
d1681e |
+ "Failed to set dictionary value for %s", key);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ } else if (!strcmp (key, GLUSTERFS_ACTIVE_FD_COUNT)) {
|
|
|
d1681e |
+ inode = _get_filler_inode (filler);
|
|
|
d1681e |
+ if (!inode || gf_uuid_is_null (inode->gfid))
|
|
|
d1681e |
+ goto out;
|
|
|
d1681e |
+ ret = dict_set_uint32 (filler->xattr, key,
|
|
|
d1681e |
+ inode->active_fd_count);
|
|
|
d1681e |
+ if (ret < 0) {
|
|
|
d1681e |
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
|
|
|
d1681e |
+ P_MSG_DICT_SET_FAILED,
|
|
|
d1681e |
+ "Failed to set dictionary value for %s", key);
|
|
|
d1681e |
}
|
|
|
d1681e |
} else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
|
|
|
d1681e |
/* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
|
|
|
d1681e |
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
|
|
|
d1681e |
index a412e6d..6856e5e 100644
|
|
|
d1681e |
--- a/xlators/storage/posix/src/posix.c
|
|
|
d1681e |
+++ b/xlators/storage/posix/src/posix.c
|
|
|
d1681e |
@@ -3554,6 +3554,18 @@ _fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
|
|
|
d1681e |
}
|
|
|
d1681e |
}
|
|
|
d1681e |
|
|
|
d1681e |
+ if (dict_get (xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
|
|
|
d1681e |
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
|
|
|
d1681e |
+ fd->inode->active_fd_count);
|
|
|
d1681e |
+ if (ret < 0) {
|
|
|
d1681e |
+ gf_msg (this->name, GF_LOG_WARNING, 0,
|
|
|
d1681e |
+ P_MSG_DICT_SET_FAILED, "%s: Failed to set "
|
|
|
d1681e |
+ "dictionary value for %s",
|
|
|
d1681e |
+ uuid_utoa (fd->inode->gfid),
|
|
|
d1681e |
+ GLUSTERFS_ACTIVE_FD_COUNT);
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+ }
|
|
|
d1681e |
+
|
|
|
d1681e |
if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
|
|
|
d1681e |
ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
|
|
|
d1681e |
is_append);
|
|
|
d1681e |
--
|
|
|
d1681e |
1.8.3.1
|
|
|
d1681e |
|