3604df
From ef167770c456b228aa4227d9376bcf2c283df95e Mon Sep 17 00:00:00 2001
3604df
From: Raghavendra G <rgowdapp@redhat.com>
3604df
Date: Thu, 24 Nov 2016 14:58:20 +0530
3604df
Subject: [PATCH 255/257] performance/readdir-ahead: limit cache size
3604df
3604df
This patch introduces a new option called "rda-cache-limit", which is
3604df
the maximum value the entire readdir-ahead cache can grow into. Since,
3604df
readdir-ahead holds a reference to inode through dentries, this patch
3604df
also accounts memory stored by various xlators in inode contexts.
3604df
3604df
>Reviewed-on: http://review.gluster.org/16137
3604df
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
3604df
>Reviewed-by: Poornima G <pgurusid@redhat.com>
3604df
>Smoke: Gluster Build System <jenkins@build.gluster.org>
3604df
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
3604df
3604df
Change-Id: I84cc0ca812f35e0f9041f8cc71effae53a9e7f99
3604df
BUG: 1393316
3604df
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/93587
3604df
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
3604df
Tested-by: Atin Mukherjee <amukherj@redhat.com>
3604df
---
3604df
 libglusterfs/src/inode.c                           |  32 ++++++
3604df
 libglusterfs/src/inode.h                           |   3 +
3604df
 libglusterfs/src/xlator.h                          |   6 ++
3604df
 xlators/mgmt/glusterd/src/glusterd-volume-set.c    |  28 ++++-
3604df
 .../performance/readdir-ahead/src/readdir-ahead.c  | 116 +++++++++++++++------
3604df
 .../performance/readdir-ahead/src/readdir-ahead.h  |   4 +-
3604df
 6 files changed, 153 insertions(+), 36 deletions(-)
3604df
3604df
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
3604df
index 2aca41b..650a301 100644
3604df
--- a/libglusterfs/src/inode.c
3604df
+++ b/libglusterfs/src/inode.c
3604df
@@ -2507,3 +2507,35 @@ out:
3604df
 
3604df
         return;
3604df
 }
3604df
+
3604df
+size_t
3604df
+inode_ctx_size (inode_t *inode)
3604df
+{
3604df
+        int       i    = 0;
3604df
+        size_t    size = 0;
3604df
+        xlator_t *xl   = NULL, *old_THIS = NULL;
3604df
+
3604df
+        if (!inode)
3604df
+                goto out;
3604df
+
3604df
+        LOCK (&inode->lock);
3604df
+        {
3604df
+                for (i = 0; i < inode->table->ctxcount; i++) {
3604df
+                        if (!inode->_ctx[i].xl_key)
3604df
+                                continue;
3604df
+
3604df
+                        xl = (xlator_t *)(long)inode->_ctx[i].xl_key;
3604df
+                        old_THIS = THIS;
3604df
+                        THIS = xl;
3604df
+
3604df
+                        if (xl->cbks->ictxsize)
3604df
+                                size += xl->cbks->ictxsize (xl, inode);
3604df
+
3604df
+                        THIS = old_THIS;
3604df
+                }
3604df
+        }
3604df
+        UNLOCK (&inode->lock);
3604df
+
3604df
+out:
3604df
+        return size;
3604df
+}
3604df
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
3604df
index 114aeae..5289b15 100644
3604df
--- a/libglusterfs/src/inode.h
3604df
+++ b/libglusterfs/src/inode.h
3604df
@@ -279,4 +279,7 @@ inode_needs_lookup (inode_t *inode, xlator_t *this);
3604df
 int
3604df
 inode_has_dentry (inode_t *inode);
3604df
 
3604df
+size_t
3604df
+inode_ctx_size (inode_t *inode);
3604df
+
3604df
 #endif /* _INODE_H */
3604df
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
3604df
index 70e6f0a..b11d1a9 100644
3604df
--- a/libglusterfs/src/xlator.h
3604df
+++ b/libglusterfs/src/xlator.h
3604df
@@ -847,6 +847,10 @@ typedef int32_t (*cbk_client_t)(xlator_t *this, client_t *client);
3604df
 typedef void (*cbk_ictxmerge_t) (xlator_t *this, fd_t *fd,
3604df
                                  inode_t *inode, inode_t *linked_inode);
3604df
 
3604df
+typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
3604df
+
3604df
+typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
3604df
+
3604df
 struct xlator_cbks {
3604df
         cbk_forget_t             forget;
3604df
         cbk_release_t            release;
3604df
@@ -855,6 +859,8 @@ struct xlator_cbks {
3604df
         cbk_client_t             client_destroy;
3604df
         cbk_client_t             client_disconnect;
3604df
         cbk_ictxmerge_t          ictxmerge;
3604df
+        cbk_inodectx_size_t      ictxsize;
3604df
+        cbk_fdctx_size_t         fdctxsize;
3604df
 };
3604df
 
3604df
 typedef int32_t (*dumpop_priv_t) (xlator_t *this);
3604df
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
3604df
index 93ed1c8..cc95bf7 100644
3604df
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
3604df
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
3604df
@@ -2622,7 +2622,6 @@ struct volopt_map_entry glusterd_volopt_map[] = {
3604df
                          "option. Unmount and delete the shared storage volume "
3604df
                          " on disabling this option."
3604df
         },
3604df
-
3604df
 #if USE_GFDB /* no GFDB means tiering is disabled */
3604df
         /* tier translator - global tunables */
3604df
         { .key         = "cluster.write-freq-threshold",
3604df
@@ -3013,6 +3012,33 @@ struct volopt_map_entry glusterd_volopt_map[] = {
3604df
           .op_version = GD_OP_VERSION_3_9_0,
3604df
           .flags      = OPT_FLAG_CLIENT_OPT
3604df
         },
3604df
+        },
3604df
+	{ .key         = "performance.rda-request-size",
3604df
+	  .voltype     = "performance/readdir-ahead",
3604df
+          .option      = "rda-request-size",
3604df
+          .flags       = OPT_FLAG_CLIENT_OPT,
3604df
+          .type        = DOC,
3604df
+          .op_version  = GD_OP_VERSION_3_9_1,
3604df
+	},
3604df
+	{ .key         = "performance.rda-low-wmark",
3604df
+          .voltype     = "performance/readdir-ahead",
3604df
+          .option      = "rda-low-wmark",
3604df
+          .type        = DOC,
3604df
+          .flags       = OPT_FLAG_CLIENT_OPT,
3604df
+          .op_version  = GD_OP_VERSION_3_9_1,
3604df
+	},
3604df
+	{ .key         = "performance.rda-high-wmark",
3604df
+          .voltype     = "performance/readdir-ahead",
3604df
+          .type        = DOC,
3604df
+          .flags       = OPT_FLAG_CLIENT_OPT,
3604df
+          .op_version  = GD_OP_VERSION_3_9_1,
3604df
+	},
3604df
+        { .key         = "performance.rda-cache-limit",
3604df
+          .voltype     = "performance/readdir-ahead",
3604df
+          .type        = DOC,
3604df
+          .flags       = OPT_FLAG_CLIENT_OPT,
3604df
+          .op_version  = GD_OP_VERSION_3_9_1,
3604df
+        },
3604df
         { .key         = NULL
3604df
         }
3604df
 };
3604df
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c
3604df
index c3daf91..4b57a8b 100644
3604df
--- a/xlators/performance/readdir-ahead/src/readdir-ahead.c
3604df
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c
3604df
@@ -97,7 +97,8 @@ rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
3604df
 {
3604df
 	if ((ctx->state & RDA_FD_EOD) ||
3604df
 	    (ctx->state & RDA_FD_ERROR) ||
3604df
-	    (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)))
3604df
+	    (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)) ||
3604df
+            (request_size && ctx->cur_size >= request_size))
3604df
 		return _gf_true;
3604df
 
3604df
 	return _gf_false;
3604df
@@ -111,20 +112,28 @@ static int32_t
3604df
 __rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,
3604df
 		   struct rda_fd_ctx *ctx)
3604df
 {
3604df
-	gf_dirent_t *dirent, *tmp;
3604df
-	size_t dirent_size, size = 0;
3604df
-	int32_t count = 0;
3604df
-	struct rda_priv *priv = this->private;
3604df
+	gf_dirent_t     *dirent, *tmp;
3604df
+	size_t           dirent_size, size = 0, inodectx_size = 0;
3604df
+	int32_t          count             = 0;
3604df
+	struct rda_priv *priv              = NULL;
3604df
+
3604df
+        priv = this->private;
3604df
 
3604df
 	list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) {
3604df
 		dirent_size = gf_dirent_size(dirent->d_name);
3604df
 		if (size + dirent_size > request_size)
3604df
 			break;
3604df
 
3604df
+                inodectx_size = 0;
3604df
+
3604df
+                inode_ctx_del (dirent->inode, this, (void *)&inodectx_size);
3604df
+
3604df
 		size += dirent_size;
3604df
 		list_del_init(&dirent->list);
3604df
 		ctx->cur_size -= dirent_size;
3604df
 
3604df
+                priv->rda_cache_size -= (dirent_size + inodectx_size);
3604df
+
3604df
 		list_add_tail(&dirent->list, &entries->list);
3604df
 		ctx->cur_offset = dirent->d_off;
3604df
 		count++;
3604df
@@ -234,11 +243,17 @@ rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
3604df
 	 * the request out of the preload or the request that enables us to do
3604df
 	 * so is in flight...
3604df
 	 */
3604df
-	if (rda_can_serve_readdirp(ctx, size))
3604df
+	if (rda_can_serve_readdirp(ctx, size)) {
3604df
 		call_resume(stub);
3604df
-	else
3604df
+        } else {
3604df
 		ctx->stub = stub;
3604df
 
3604df
+                if (!(ctx->state & RDA_FD_RUNNING)) {
3604df
+                        fill = 1;
3604df
+                        ctx->state |= RDA_FD_RUNNING;
3604df
+                }
3604df
+        }
3604df
+
3604df
 	UNLOCK(&ctx->lock);
3604df
 
3604df
 	if (fill)
3604df
@@ -266,6 +281,7 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
3604df
 	struct rda_fd_ctx *ctx = local->ctx;
3604df
 	struct rda_priv *priv = this->private;
3604df
 	int fill = 1;
3604df
+        size_t inodectx_size = 0, dirent_size = 0;
3604df
 
3604df
 	LOCK(&ctx->lock);
3604df
 
3604df
@@ -286,7 +302,19 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
3604df
 			/* must preserve entry order */
3604df
 			list_add_tail(&dirent->list, &ctx->entries.list);
3604df
 
3604df
-			ctx->cur_size += gf_dirent_size(dirent->d_name);
3604df
+                        dirent_size = gf_dirent_size (dirent->d_name);
3604df
+                        inodectx_size = 0;
3604df
+
3604df
+                        if (dirent->inode) {
3604df
+                                inodectx_size = inode_ctx_size (dirent->inode);
3604df
+                                inode_ctx_set (dirent->inode, this,
3604df
+                                               (void *)inodectx_size);
3604df
+                        }
3604df
+
3604df
+			ctx->cur_size += dirent_size;
3604df
+
3604df
+                        priv->rda_cache_size += (dirent_size + inodectx_size);
3604df
+
3604df
 			ctx->next_offset = dirent->d_off;
3604df
 		}
3604df
 	}
3604df
@@ -321,19 +349,21 @@ out:
3604df
 	 * If we have been marked for bypass and have no pending stub, clear the
3604df
 	 * run state so we stop preloading the context with entries.
3604df
 	 */
3604df
-	if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub)
3604df
+	if (!ctx->stub && ((ctx->state & RDA_FD_BYPASS)
3604df
+                           || (priv->rda_cache_size > priv->rda_cache_limit)))
3604df
 		ctx->state &= ~RDA_FD_RUNNING;
3604df
 
3604df
 	if (!(ctx->state & RDA_FD_RUNNING)) {
3604df
 		fill = 0;
3604df
-        if (ctx->xattrs) {
3604df
-                /*
3604df
-                 * fill = 0 and hence rda_fill_fd() won't be invoked.
3604df
-                 * unref for ref taken in rda_fill_fd()
3604df
-                 */
3604df
-                dict_unref (ctx->xattrs);
3604df
-                ctx->xattrs = NULL;
3604df
-        }
3604df
+                if (ctx->xattrs) {
3604df
+                        /*
3604df
+                         * fill = 0 and hence rda_fill_fd() won't be invoked.
3604df
+                         * unref for ref taken in rda_fill_fd()
3604df
+                         */
3604df
+                        dict_unref (ctx->xattrs);
3604df
+                        ctx->xattrs = NULL;
3604df
+                }
3604df
+
3604df
 		STACK_DESTROY(ctx->fill_frame->root);
3604df
 		ctx->fill_frame = NULL;
3604df
 	}
3604df
@@ -393,10 +423,10 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
3604df
 
3604df
 		ctx->fill_frame = nframe;
3604df
 
3604df
-        if (!ctx->xattrs && orig_local && orig_local->xattrs) {
3604df
-                /* when this function is invoked by rda_opendir_cbk */
3604df
-                ctx->xattrs = dict_ref(orig_local->xattrs);
3604df
-        }
3604df
+                if (!ctx->xattrs && orig_local && orig_local->xattrs) {
3604df
+                        /* when this function is invoked by rda_opendir_cbk */
3604df
+                        ctx->xattrs = dict_ref(orig_local->xattrs);
3604df
+                }
3604df
 	} else {
3604df
 		nframe = ctx->fill_frame;
3604df
 		local = nframe->local;
3604df
@@ -578,11 +608,13 @@ reconfigure(xlator_t *this, dict_t *options)
3604df
 	struct rda_priv *priv = this->private;
3604df
 
3604df
 	GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options,
3604df
-			 uint32, err);
3604df
-	GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64,
3604df
-			 err);
3604df
-	GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size_uint64,
3604df
-			 err);
3604df
+			 size_uint64, err);
3604df
+	GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options,
3604df
+                         size_uint64, err);
3604df
+	GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options,
3604df
+                         size_uint64, err);
3604df
+        GF_OPTION_RECONF("rda-cache-limit", priv->rda_cache_limit, options,
3604df
+                         size_uint64, err);
3604df
 
3604df
 	return 0;
3604df
 err:
3604df
@@ -619,9 +651,13 @@ init(xlator_t *this)
3604df
 	if (!this->local_pool)
3604df
 		goto err;
3604df
 
3604df
-	GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err);
3604df
+	GF_OPTION_INIT("rda-request-size", priv->rda_req_size, size_uint64,
3604df
+                       err);
3604df
 	GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err);
3604df
-	GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err);
3604df
+	GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64,
3604df
+                       err);
3604df
+        GF_OPTION_INIT("rda-cache-limit", priv->rda_cache_limit, size_uint64,
3604df
+                       err);
3604df
 
3604df
 	return 0;
3604df
 
3604df
@@ -657,26 +693,38 @@ struct xlator_cbks cbks = {
3604df
 
3604df
 struct volume_options options[] = {
3604df
 	{ .key = {"rda-request-size"},
3604df
-	  .type = GF_OPTION_TYPE_INT,
3604df
+	  .type = GF_OPTION_TYPE_SIZET,
3604df
 	  .min = 4096,
3604df
 	  .max = 131072,
3604df
-	  .default_value = "131072",
3604df
-	  .description = "readdir-ahead request size",
3604df
+	  .default_value = "128KB",
3604df
+	  .description = "size of buffer in readdirp calls initiated by "
3604df
+                         "readdir-ahead ",
3604df
 	},
3604df
 	{ .key = {"rda-low-wmark"},
3604df
 	  .type = GF_OPTION_TYPE_SIZET,
3604df
 	  .min = 0,
3604df
 	  .max = 10 * GF_UNIT_MB,
3604df
 	  .default_value = "4096",
3604df
-	  .description = "the value under which we plug",
3604df
+	  .description = "the value under which readdir-ahead plugs",
3604df
 	},
3604df
 	{ .key = {"rda-high-wmark"},
3604df
 	  .type = GF_OPTION_TYPE_SIZET,
3604df
 	  .min = 0,
3604df
 	  .max = 100 * GF_UNIT_MB,
3604df
-	  .default_value = "131072",
3604df
-	  .description = "the value over which we unplug",
3604df
+	  .default_value = "128KB",
3604df
+	  .description = "the value over which readdir-ahead unplugs",
3604df
 	},
3604df
+        { .key = {"rda-cache-limit"},
3604df
+          .type = GF_OPTION_TYPE_SIZET,
3604df
+          .min = 0,
3604df
+          .max = 1 * GF_UNIT_GB,
3604df
+          .default_value = "10MB",
3604df
+          .description = "maximum size of cache consumed by readdir-ahead "
3604df
+                         "xlator. This value is global and total memory "
3604df
+                         "consumption by readdir-ahead is capped by this "
3604df
+                         "value, irrespective of the number/size of "
3604df
+                         "directories cached",
3604df
+        },
3604df
         { .key = {NULL} },
3604df
 };
3604df
 
3604df
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h
3604df
index f030f10..6b65a62 100644
3604df
--- a/xlators/performance/readdir-ahead/src/readdir-ahead.h
3604df
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h
3604df
@@ -40,9 +40,11 @@ struct rda_local {
3604df
 };
3604df
 
3604df
 struct rda_priv {
3604df
-	uint32_t rda_req_size;
3604df
+	uint64_t rda_req_size;
3604df
 	uint64_t rda_low_wmark;
3604df
 	uint64_t rda_high_wmark;
3604df
+        uint64_t rda_cache_limit;
3604df
+        uint64_t rda_cache_size;
3604df
 };
3604df
 
3604df
 #endif /* __READDIR_AHEAD_H */
3604df
-- 
3604df
2.9.3
3604df