From ef167770c456b228aa4227d9376bcf2c283df95e Mon Sep 17 00:00:00 2001 From: Raghavendra G Date: Thu, 24 Nov 2016 14:58:20 +0530 Subject: [PATCH 255/257] performance/readdir-ahead: limit cache size This patch introduces a new option called "rda-cache-limit", which is the maximum value the entire readdir-ahead cache can grow into. Since, readdir-ahead holds a reference to inode through dentries, this patch also accounts memory stored by various xlators in inode contexts. >Reviewed-on: http://review.gluster.org/16137 >NetBSD-regression: NetBSD Build System >Reviewed-by: Poornima G >Smoke: Gluster Build System >CentOS-regression: Gluster Build System Change-Id: I84cc0ca812f35e0f9041f8cc71effae53a9e7f99 BUG: 1393316 Signed-off-by: Raghavendra G Reviewed-on: https://code.engineering.redhat.com/gerrit/93587 Reviewed-by: Atin Mukherjee Tested-by: Atin Mukherjee --- libglusterfs/src/inode.c | 32 ++++++ libglusterfs/src/inode.h | 3 + libglusterfs/src/xlator.h | 6 ++ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 28 ++++- .../performance/readdir-ahead/src/readdir-ahead.c | 116 +++++++++++++++------ .../performance/readdir-ahead/src/readdir-ahead.h | 4 +- 6 files changed, 153 insertions(+), 36 deletions(-) diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index 2aca41b..650a301 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -2507,3 +2507,35 @@ out: return; } + +size_t +inode_ctx_size (inode_t *inode) +{ + int i = 0; + size_t size = 0; + xlator_t *xl = NULL, *old_THIS = NULL; + + if (!inode) + goto out; + + LOCK (&inode->lock); + { + for (i = 0; i < inode->table->ctxcount; i++) { + if (!inode->_ctx[i].xl_key) + continue; + + xl = (xlator_t *)(long)inode->_ctx[i].xl_key; + old_THIS = THIS; + THIS = xl; + + if (xl->cbks->ictxsize) + size += xl->cbks->ictxsize (xl, inode); + + THIS = old_THIS; + } + } + UNLOCK (&inode->lock); + +out: + return size; +} diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h index 114aeae..5289b15 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/inode.h @@ -279,4 +279,7 @@ inode_needs_lookup (inode_t *inode, xlator_t *this); int inode_has_dentry (inode_t *inode); +size_t +inode_ctx_size (inode_t *inode); + #endif /* _INODE_H */ diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 70e6f0a..b11d1a9 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -847,6 +847,10 @@ typedef int32_t (*cbk_client_t)(xlator_t *this, client_t *client); typedef void (*cbk_ictxmerge_t) (xlator_t *this, fd_t *fd, inode_t *inode, inode_t *linked_inode); +typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode); + +typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd); + struct xlator_cbks { cbk_forget_t forget; cbk_release_t release; @@ -855,6 +859,8 @@ struct xlator_cbks { cbk_client_t client_destroy; cbk_client_t client_disconnect; cbk_ictxmerge_t ictxmerge; + cbk_inodectx_size_t ictxsize; + cbk_fdctx_size_t fdctxsize; }; typedef int32_t (*dumpop_priv_t) (xlator_t *this); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 93ed1c8..cc95bf7 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2622,7 +2622,6 @@ struct volopt_map_entry glusterd_volopt_map[] = { "option. Unmount and delete the shared storage volume " " on disabling this option." }, - #if USE_GFDB /* no GFDB means tiering is disabled */ /* tier translator - global tunables */ { .key = "cluster.write-freq-threshold", @@ -3013,6 +3012,33 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_9_0, .flags = OPT_FLAG_CLIENT_OPT }, + }, + { .key = "performance.rda-request-size", + .voltype = "performance/readdir-ahead", + .option = "rda-request-size", + .flags = OPT_FLAG_CLIENT_OPT, + .type = DOC, + .op_version = GD_OP_VERSION_3_9_1, + }, + { .key = "performance.rda-low-wmark", + .voltype = "performance/readdir-ahead", + .option = "rda-low-wmark", + .type = DOC, + .flags = OPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_9_1, + }, + { .key = "performance.rda-high-wmark", + .voltype = "performance/readdir-ahead", + .type = DOC, + .flags = OPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_9_1, + }, + { .key = "performance.rda-cache-limit", + .voltype = "performance/readdir-ahead", + .type = DOC, + .flags = OPT_FLAG_CLIENT_OPT, + .op_version = GD_OP_VERSION_3_9_1, + }, { .key = NULL } }; diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c index c3daf91..4b57a8b 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.c +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -97,7 +97,8 @@ rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size) { if ((ctx->state & RDA_FD_EOD) || (ctx->state & RDA_FD_ERROR) || - (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0))) + (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)) || + (request_size && ctx->cur_size >= request_size)) return _gf_true; return _gf_false; @@ -111,20 +112,28 @@ static int32_t __rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size, struct rda_fd_ctx *ctx) { - gf_dirent_t *dirent, *tmp; - size_t dirent_size, size = 0; - int32_t count = 0; - struct rda_priv *priv = this->private; + gf_dirent_t *dirent, *tmp; + size_t dirent_size, size = 0, inodectx_size = 0; + int32_t count = 0; + struct rda_priv *priv = NULL; + + priv = this->private; list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) { dirent_size = gf_dirent_size(dirent->d_name); if (size + dirent_size > request_size) break; + inodectx_size = 0; + + inode_ctx_del (dirent->inode, this, (void *)&inodectx_size); + size += dirent_size; list_del_init(&dirent->list); ctx->cur_size -= dirent_size; + priv->rda_cache_size -= (dirent_size + inodectx_size); + list_add_tail(&dirent->list, &entries->list); ctx->cur_offset = dirent->d_off; count++; @@ -234,11 +243,17 @@ rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, * the request out of the preload or the request that enables us to do * so is in flight... */ - if (rda_can_serve_readdirp(ctx, size)) + if (rda_can_serve_readdirp(ctx, size)) { call_resume(stub); - else + } else { ctx->stub = stub; + if (!(ctx->state & RDA_FD_RUNNING)) { + fill = 1; + ctx->state |= RDA_FD_RUNNING; + } + } + UNLOCK(&ctx->lock); if (fill) @@ -266,6 +281,7 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, struct rda_fd_ctx *ctx = local->ctx; struct rda_priv *priv = this->private; int fill = 1; + size_t inodectx_size = 0, dirent_size = 0; LOCK(&ctx->lock); @@ -286,7 +302,19 @@ rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, /* must preserve entry order */ list_add_tail(&dirent->list, &ctx->entries.list); - ctx->cur_size += gf_dirent_size(dirent->d_name); + dirent_size = gf_dirent_size (dirent->d_name); + inodectx_size = 0; + + if (dirent->inode) { + inodectx_size = inode_ctx_size (dirent->inode); + inode_ctx_set (dirent->inode, this, + (void *)inodectx_size); + } + + ctx->cur_size += dirent_size; + + priv->rda_cache_size += (dirent_size + inodectx_size); + ctx->next_offset = dirent->d_off; } } @@ -321,19 +349,21 @@ out: * If we have been marked for bypass and have no pending stub, clear the * run state so we stop preloading the context with entries. */ - if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub) + if (!ctx->stub && ((ctx->state & RDA_FD_BYPASS) + || (priv->rda_cache_size > priv->rda_cache_limit))) ctx->state &= ~RDA_FD_RUNNING; if (!(ctx->state & RDA_FD_RUNNING)) { fill = 0; - if (ctx->xattrs) { - /* - * fill = 0 and hence rda_fill_fd() won't be invoked. - * unref for ref taken in rda_fill_fd() - */ - dict_unref (ctx->xattrs); - ctx->xattrs = NULL; - } + if (ctx->xattrs) { + /* + * fill = 0 and hence rda_fill_fd() won't be invoked. + * unref for ref taken in rda_fill_fd() + */ + dict_unref (ctx->xattrs); + ctx->xattrs = NULL; + } + STACK_DESTROY(ctx->fill_frame->root); ctx->fill_frame = NULL; } @@ -393,10 +423,10 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd) ctx->fill_frame = nframe; - if (!ctx->xattrs && orig_local && orig_local->xattrs) { - /* when this function is invoked by rda_opendir_cbk */ - ctx->xattrs = dict_ref(orig_local->xattrs); - } + if (!ctx->xattrs && orig_local && orig_local->xattrs) { + /* when this function is invoked by rda_opendir_cbk */ + ctx->xattrs = dict_ref(orig_local->xattrs); + } } else { nframe = ctx->fill_frame; local = nframe->local; @@ -578,11 +608,13 @@ reconfigure(xlator_t *this, dict_t *options) struct rda_priv *priv = this->private; GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options, - uint32, err); - GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64, - err); - GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size_uint64, - err); + size_uint64, err); + GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, + size_uint64, err); + GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, + size_uint64, err); + GF_OPTION_RECONF("rda-cache-limit", priv->rda_cache_limit, options, + size_uint64, err); return 0; err: @@ -619,9 +651,13 @@ init(xlator_t *this) if (!this->local_pool) goto err; - GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err); + GF_OPTION_INIT("rda-request-size", priv->rda_req_size, size_uint64, + err); GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err); - GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err); + GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, + err); + GF_OPTION_INIT("rda-cache-limit", priv->rda_cache_limit, size_uint64, + err); return 0; @@ -657,26 +693,38 @@ struct xlator_cbks cbks = { struct volume_options options[] = { { .key = {"rda-request-size"}, - .type = GF_OPTION_TYPE_INT, + .type = GF_OPTION_TYPE_SIZET, .min = 4096, .max = 131072, - .default_value = "131072", - .description = "readdir-ahead request size", + .default_value = "128KB", + .description = "size of buffer in readdirp calls initiated by " + "readdir-ahead ", }, { .key = {"rda-low-wmark"}, .type = GF_OPTION_TYPE_SIZET, .min = 0, .max = 10 * GF_UNIT_MB, .default_value = "4096", - .description = "the value under which we plug", + .description = "the value under which readdir-ahead plugs", }, { .key = {"rda-high-wmark"}, .type = GF_OPTION_TYPE_SIZET, .min = 0, .max = 100 * GF_UNIT_MB, - .default_value = "131072", - .description = "the value over which we unplug", + .default_value = "128KB", + .description = "the value over which readdir-ahead unplugs", }, + { .key = {"rda-cache-limit"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 1 * GF_UNIT_GB, + .default_value = "10MB", + .description = "maximum size of cache consumed by readdir-ahead " + "xlator. This value is global and total memory " + "consumption by readdir-ahead is capped by this " + "value, irrespective of the number/size of " + "directories cached", + }, { .key = {NULL} }, }; diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h index f030f10..6b65a62 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h @@ -40,9 +40,11 @@ struct rda_local { }; struct rda_priv { - uint32_t rda_req_size; + uint64_t rda_req_size; uint64_t rda_low_wmark; uint64_t rda_high_wmark; + uint64_t rda_cache_limit; + uint64_t rda_cache_size; }; #endif /* __READDIR_AHEAD_H */ -- 2.9.3