|
|
7c2869 |
From 56cb9390434a26a0b2d9c364899eb436799fdf30 Mon Sep 17 00:00:00 2001
|
|
|
7c2869 |
From: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
7c2869 |
Date: Mon, 19 Mar 2018 15:12:14 +0530
|
|
|
7c2869 |
Subject: [PATCH 668/675] storage/posix: Add active-fd-count option in gluster
|
|
|
7c2869 |
|
|
|
7c2869 |
Problem:
|
|
|
7c2869 |
when dd happens on sharded replicate volume all the writes on shards happen
|
|
|
7c2869 |
through anon-fd. When the writes don't come quick enough, old anon-fd closes
|
|
|
7c2869 |
and new fd gets created to serve the new writes. open-fd-count is decremented
|
|
|
7c2869 |
only after the fd is closed as part of fd_destroy(). So even when one fd is on
|
|
|
7c2869 |
the way to be closed a new fd will be created and during this short period it
|
|
|
7c2869 |
appears as though there are multiple fds opened on the file. AFR thinks another
|
|
|
7c2869 |
application opened the same file and switches off eager-lock leading to
|
|
|
7c2869 |
extra latency.
|
|
|
7c2869 |
|
|
|
7c2869 |
Fix:
|
|
|
7c2869 |
Have a different option called active-fd whose life cycle starts at
|
|
|
7c2869 |
fd_bind() and ends just before fd_destroy()
|
|
|
7c2869 |
|
|
|
7c2869 |
>BUG: 1557932
|
|
|
7c2869 |
|
|
|
7c2869 |
Change-Id: If6cadbe97d183e124f9dc4672a5b621cbe3324cb
|
|
|
7c2869 |
Upstream-patch: https://review.gluster.org/19740
|
|
|
7c2869 |
BUG: 1583733
|
|
|
7c2869 |
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
|
|
|
7c2869 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/140573
|
|
|
7c2869 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
7c2869 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
7c2869 |
---
|
|
|
7c2869 |
libglusterfs/src/fd.c | 2 ++
|
|
|
7c2869 |
libglusterfs/src/glusterfs.h | 1 +
|
|
|
7c2869 |
libglusterfs/src/inode.c | 2 ++
|
|
|
7c2869 |
libglusterfs/src/inode.h | 1 +
|
|
|
7c2869 |
tests/volume.rc | 12 +++++++
|
|
|
7c2869 |
xlators/storage/posix/src/posix-helpers.c | 52 ++++++++++++-------------------
|
|
|
7c2869 |
xlators/storage/posix/src/posix.c | 12 +++++++
|
|
|
7c2869 |
7 files changed, 50 insertions(+), 32 deletions(-)
|
|
|
7c2869 |
|
|
|
7c2869 |
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
|
|
|
7c2869 |
index 118f876..d91f4de 100644
|
|
|
7c2869 |
--- a/libglusterfs/src/fd.c
|
|
|
7c2869 |
+++ b/libglusterfs/src/fd.c
|
|
|
7c2869 |
@@ -557,6 +557,7 @@ fd_unref (fd_t *fd)
|
|
|
7c2869 |
if (refcount == 0) {
|
|
|
7c2869 |
if (!list_empty (&fd->inode_list)) {
|
|
|
7c2869 |
list_del_init (&fd->inode_list);
|
|
|
7c2869 |
+ fd->inode->active_fd_count--;
|
|
|
7c2869 |
bound = _gf_true;
|
|
|
7c2869 |
}
|
|
|
7c2869 |
}
|
|
|
7c2869 |
@@ -578,6 +579,7 @@ __fd_bind (fd_t *fd)
|
|
|
7c2869 |
list_del_init (&fd->inode_list);
|
|
|
7c2869 |
list_add (&fd->inode_list, &fd->inode->fd_list);
|
|
|
7c2869 |
fd->inode->fd_count++;
|
|
|
7c2869 |
+ fd->inode->active_fd_count++;
|
|
|
7c2869 |
|
|
|
7c2869 |
return fd;
|
|
|
7c2869 |
}
|
|
|
7c2869 |
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
|
|
|
7c2869 |
index d13e5bd..277511a 100644
|
|
|
7c2869 |
--- a/libglusterfs/src/glusterfs.h
|
|
|
7c2869 |
+++ b/libglusterfs/src/glusterfs.h
|
|
|
7c2869 |
@@ -159,6 +159,7 @@
|
|
|
7c2869 |
#define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
|
|
|
7c2869 |
#define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
|
|
|
7c2869 |
#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
|
|
|
7c2869 |
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
|
|
|
7c2869 |
#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
|
|
|
7c2869 |
#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
|
|
|
7c2869 |
#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
|
|
|
7c2869 |
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
|
|
|
7c2869 |
index 0353825..2f67cf4 100644
|
|
|
7c2869 |
--- a/libglusterfs/src/inode.c
|
|
|
7c2869 |
+++ b/libglusterfs/src/inode.c
|
|
|
7c2869 |
@@ -2332,6 +2332,8 @@ inode_dump (inode_t *inode, char *prefix)
|
|
|
7c2869 |
gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
|
|
|
7c2869 |
gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
|
|
|
7c2869 |
gf_proc_dump_write("fd-count", "%u", inode->fd_count);
|
|
|
7c2869 |
+ gf_proc_dump_write("active-fd-count", "%u",
|
|
|
7c2869 |
+ inode->active_fd_count);
|
|
|
7c2869 |
gf_proc_dump_write("ref", "%u", inode->ref);
|
|
|
7c2869 |
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
|
|
|
7c2869 |
if (inode->_ctx) {
|
|
|
7c2869 |
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
|
|
|
7c2869 |
index e4ad046..506f495 100644
|
|
|
7c2869 |
--- a/libglusterfs/src/inode.h
|
|
|
7c2869 |
+++ b/libglusterfs/src/inode.h
|
|
|
7c2869 |
@@ -89,6 +89,7 @@ struct _inode {
|
|
|
7c2869 |
gf_lock_t lock;
|
|
|
7c2869 |
uint64_t nlookup;
|
|
|
7c2869 |
uint32_t fd_count; /* Open fd count */
|
|
|
7c2869 |
+ uint32_t active_fd_count; /* Active open fd count */
|
|
|
7c2869 |
uint32_t ref; /* reference count on this inode */
|
|
|
7c2869 |
ia_type_t ia_type; /* what kind of file */
|
|
|
7c2869 |
struct list_head fd_list; /* list of open files on this inode */
|
|
|
7c2869 |
diff --git a/tests/volume.rc b/tests/volume.rc
|
|
|
7c2869 |
index 9ab31af..0d20b0e 100644
|
|
|
7c2869 |
--- a/tests/volume.rc
|
|
|
7c2869 |
+++ b/tests/volume.rc
|
|
|
7c2869 |
@@ -778,3 +778,15 @@ function get_mount_lru_size_value {
|
|
|
7c2869 |
rm -f $statedump
|
|
|
7c2869 |
echo $val
|
|
|
7c2869 |
}
|
|
|
7c2869 |
+
|
|
|
7c2869 |
+function get_active_fd_count {
|
|
|
7c2869 |
+ local vol=$1
|
|
|
7c2869 |
+ local host=$2
|
|
|
7c2869 |
+ local brick=$3
|
|
|
7c2869 |
+ local fname=$4
|
|
|
7c2869 |
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
|
|
|
7c2869 |
+ local statedump=$(generate_brick_statedump $vol $host $brick)
|
|
|
7c2869 |
+ local count=$(grep "gfid=$gfid_str" $statedump -A2 -B1 | grep $brick -A3 | grep -w active-fd-count | cut -f2 -d'=' | tail -1)
|
|
|
7c2869 |
+ rm -f $statedump
|
|
|
7c2869 |
+ echo $count
|
|
|
7c2869 |
+}
|
|
|
7c2869 |
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
|
|
|
7c2869 |
index 073465b..5f8304e 100644
|
|
|
7c2869 |
--- a/xlators/storage/posix/src/posix-helpers.c
|
|
|
7c2869 |
+++ b/xlators/storage/posix/src/posix-helpers.c
|
|
|
7c2869 |
@@ -371,27 +371,6 @@ _get_filler_inode (posix_xattr_filler_t *filler)
|
|
|
7c2869 |
}
|
|
|
7c2869 |
|
|
|
7c2869 |
static int
|
|
|
7c2869 |
-_posix_filler_get_openfd_count (posix_xattr_filler_t *filler, char *key)
|
|
|
7c2869 |
-{
|
|
|
7c2869 |
- inode_t *inode = NULL;
|
|
|
7c2869 |
- int ret = -1;
|
|
|
7c2869 |
-
|
|
|
7c2869 |
- inode = _get_filler_inode (filler);
|
|
|
7c2869 |
- if (!inode || gf_uuid_is_null (inode->gfid))
|
|
|
7c2869 |
- goto out;
|
|
|
7c2869 |
-
|
|
|
7c2869 |
- ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
|
|
|
7c2869 |
- if (ret < 0) {
|
|
|
7c2869 |
- gf_msg (filler->this->name, GF_LOG_WARNING, 0,
|
|
|
7c2869 |
- P_MSG_DICT_SET_FAILED,
|
|
|
7c2869 |
- "Failed to set dictionary value for %s", key);
|
|
|
7c2869 |
- goto out;
|
|
|
7c2869 |
- }
|
|
|
7c2869 |
-out:
|
|
|
7c2869 |
- return ret;
|
|
|
7c2869 |
-}
|
|
|
7c2869 |
-
|
|
|
7c2869 |
-static int
|
|
|
7c2869 |
_posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
|
|
|
7c2869 |
void *xattrargs)
|
|
|
7c2869 |
{
|
|
|
7c2869 |
@@ -399,8 +378,8 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
|
|
|
7c2869 |
int ret = -1;
|
|
|
7c2869 |
char *databuf = NULL;
|
|
|
7c2869 |
int _fd = -1;
|
|
|
7c2869 |
- loc_t *loc = NULL;
|
|
|
7c2869 |
ssize_t req_size = 0;
|
|
|
7c2869 |
+ inode_t *inode = NULL;
|
|
|
7c2869 |
|
|
|
7c2869 |
|
|
|
7c2869 |
if (posix_xattr_ignorable (key))
|
|
|
7c2869 |
@@ -477,16 +456,25 @@ _posix_xattr_get_set (dict_t *xattr_req, char *key, data_t *data,
|
|
|
7c2869 |
GF_FREE (databuf);
|
|
|
7c2869 |
}
|
|
|
7c2869 |
} else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
|
|
|
7c2869 |
- ret = _posix_filler_get_openfd_count (filler, key);
|
|
|
7c2869 |
- loc = filler->loc;
|
|
|
7c2869 |
- if (loc) {
|
|
|
7c2869 |
- ret = dict_set_uint32 (filler->xattr, key,
|
|
|
7c2869 |
- loc->inode->fd_count);
|
|
|
7c2869 |
- if (ret < 0)
|
|
|
7c2869 |
- gf_msg (filler->this->name, GF_LOG_WARNING, 0,
|
|
|
7c2869 |
- P_MSG_XDATA_GETXATTR,
|
|
|
7c2869 |
- "Failed to set dictionary value for %s",
|
|
|
7c2869 |
- key);
|
|
|
7c2869 |
+ inode = _get_filler_inode (filler);
|
|
|
7c2869 |
+ if (!inode || gf_uuid_is_null (inode->gfid))
|
|
|
7c2869 |
+ goto out;
|
|
|
7c2869 |
+ ret = dict_set_uint32 (filler->xattr, key, inode->fd_count);
|
|
|
7c2869 |
+ if (ret < 0) {
|
|
|
7c2869 |
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
|
|
|
7c2869 |
+ P_MSG_DICT_SET_FAILED,
|
|
|
7c2869 |
+ "Failed to set dictionary value for %s", key);
|
|
|
7c2869 |
+ }
|
|
|
7c2869 |
+ } else if (!strcmp (key, GLUSTERFS_ACTIVE_FD_COUNT)) {
|
|
|
7c2869 |
+ inode = _get_filler_inode (filler);
|
|
|
7c2869 |
+ if (!inode || gf_uuid_is_null (inode->gfid))
|
|
|
7c2869 |
+ goto out;
|
|
|
7c2869 |
+ ret = dict_set_uint32 (filler->xattr, key,
|
|
|
7c2869 |
+ inode->active_fd_count);
|
|
|
7c2869 |
+ if (ret < 0) {
|
|
|
7c2869 |
+ gf_msg (filler->this->name, GF_LOG_WARNING, 0,
|
|
|
7c2869 |
+ P_MSG_DICT_SET_FAILED,
|
|
|
7c2869 |
+ "Failed to set dictionary value for %s", key);
|
|
|
7c2869 |
}
|
|
|
7c2869 |
} else if (!strcmp (key, GET_ANCESTRY_PATH_KEY)) {
|
|
|
7c2869 |
/* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt
|
|
|
7c2869 |
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
|
|
|
7c2869 |
index d499e09..b0d7037 100644
|
|
|
7c2869 |
--- a/xlators/storage/posix/src/posix.c
|
|
|
7c2869 |
+++ b/xlators/storage/posix/src/posix.c
|
|
|
7c2869 |
@@ -3353,6 +3353,18 @@ _fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
|
|
|
7c2869 |
}
|
|
|
7c2869 |
}
|
|
|
7c2869 |
|
|
|
7c2869 |
+ if (dict_get (xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
|
|
|
7c2869 |
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
|
|
|
7c2869 |
+ fd->inode->active_fd_count);
|
|
|
7c2869 |
+ if (ret < 0) {
|
|
|
7c2869 |
+ gf_msg (this->name, GF_LOG_WARNING, 0,
|
|
|
7c2869 |
+ P_MSG_DICT_SET_FAILED, "%s: Failed to set "
|
|
|
7c2869 |
+ "dictionary value for %s",
|
|
|
7c2869 |
+ uuid_utoa (fd->inode->gfid),
|
|
|
7c2869 |
+ GLUSTERFS_ACTIVE_FD_COUNT);
|
|
|
7c2869 |
+ }
|
|
|
7c2869 |
+ }
|
|
|
7c2869 |
+
|
|
|
7c2869 |
if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
|
|
|
7c2869 |
ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
|
|
|
7c2869 |
is_append);
|
|
|
7c2869 |
--
|
|
|
7c2869 |
1.8.3.1
|
|
|
7c2869 |
|