From 17a9ce965ef2fec9ee5c8e4b76981bb7cbcf1352 Mon Sep 17 00:00:00 2001 From: mohit84 Date: Mon, 9 Nov 2020 17:15:42 +0530 Subject: [PATCH 506/511] posix: Attach a posix_spawn_disk_thread with glusterfs_ctx (#1595) Currently posix xlator spawns posix_disk_space_threads per brick and in case of brick_mux environment while glusterd attached bricks at maximum level(250) with a single brick process in that case 250 threads are spawned for all bricks and brick process memory size also increased. Solution: Attach a posix_disk_space thread with glusterfs_ctx to spawn a thread per process basis instead of spawning a per brick > Fixes: #1482 > Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7 > Cherry-picked from commit 3f93be77e1acf5baacafa97a320e91e6879d1c0e > Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1482 > Signed-off-by: Mohit Agrawal Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7 Bug: 1898776 Signed-off-by: Mohit Agrawal Reviewed-on: https://code.engineering.redhat.com/gerrit/220366 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- glusterfsd/src/glusterfsd.c | 4 + libglusterfs/src/glusterfs/glusterfs.h | 6 ++ xlators/storage/posix/src/posix-common.c | 68 +++++++++++-- xlators/storage/posix/src/posix-handle.h | 3 +- xlators/storage/posix/src/posix-helpers.c | 131 ++++++++++++++----------- xlators/storage/posix/src/posix-inode-fd-ops.c | 3 +- xlators/storage/posix/src/posix-mem-types.h | 1 + xlators/storage/posix/src/posix.h | 12 ++- 8 files changed, 160 insertions(+), 68 deletions(-) diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 955bf1d..ac25255 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -1840,9 +1840,13 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx) INIT_LIST_HEAD(&cmd_args->xlator_options); INIT_LIST_HEAD(&cmd_args->volfile_servers); ctx->pxl_count = 0; + ctx->diskxl_count = 0; pthread_mutex_init(&ctx->fd_lock, NULL); pthread_cond_init(&ctx->fd_cond, NULL); INIT_LIST_HEAD(&ctx->janitor_fds); + pthread_mutex_init(&ctx->xl_lock, NULL); + pthread_cond_init(&ctx->xl_cond, NULL); + INIT_LIST_HEAD(&ctx->diskth_xl); lim.rlim_cur = RLIM_INFINITY; lim.rlim_max = RLIM_INFINITY; diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h index bf6a987..d3400bf 100644 --- a/libglusterfs/src/glusterfs/glusterfs.h +++ b/libglusterfs/src/glusterfs/glusterfs.h @@ -740,7 +740,13 @@ struct _glusterfs_ctx { pthread_t janitor; /* The variable is use to save total posix xlator count */ uint32_t pxl_count; + uint32_t diskxl_count; + /* List of posix xlator use by disk thread*/ + struct list_head diskth_xl; + pthread_mutex_t xl_lock; + pthread_cond_t xl_cond; + pthread_t disk_space_check; char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */ }; typedef struct _glusterfs_ctx glusterfs_ctx_t; diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index e5c6e62..2c9030b 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -138,6 +138,36 @@ posix_inode(xlator_t *this) return 0; } +static void +delete_posix_diskxl(xlator_t *this) +{ + struct posix_private *priv = this->private; + struct posix_diskxl *pxl = priv->pxl; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count = 1; + + if (pxl) { + pthread_mutex_lock(&ctx->xl_lock); + { + pxl->detach_notify = _gf_true; + while (pxl->is_use) + pthread_cond_wait(&pxl->cond, &ctx->xl_lock); + list_del_init(&pxl->list); + priv->pxl = NULL; + count = --ctx->diskxl_count; + if (count == 0) + pthread_cond_signal(&ctx->xl_cond); + } + pthread_mutex_unlock(&ctx->xl_lock); + pthread_cond_destroy(&pxl->cond); + GF_FREE(pxl); + if (count == 0) { + pthread_join(ctx->disk_space_check, NULL); + ctx->disk_space_check = 0; + } + } +} + /** * notify - when parent sends PARENT_UP, send CHILD_UP event from here */ @@ -194,6 +224,8 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...) } pthread_mutex_unlock(&ctx->fd_lock); + delete_posix_diskxl(this); + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", victim->name); default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); @@ -318,6 +350,7 @@ posix_reconfigure(xlator_t *this, dict_t *options) int32_t force_directory_mode = -1; int32_t create_mask = -1; int32_t create_directory_mask = -1; + double old_disk_reserve = 0.0; priv = this->private; @@ -383,6 +416,7 @@ posix_reconfigure(xlator_t *this, dict_t *options) " fallback to :"); } + old_disk_reserve = priv->disk_reserve; GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, out); /* option can be any one of percent or bytes */ @@ -390,11 +424,19 @@ posix_reconfigure(xlator_t *this, dict_t *options) if (priv->disk_reserve < 100.0) priv->disk_unit = 'p'; - if (priv->disk_reserve) { + /* Delete a pxl object from a list of disk_reserve while something + is changed for reserve option during graph reconfigure + */ + if (old_disk_reserve != priv->disk_reserve) { + delete_posix_diskxl(this); + old_disk_reserve = 0; + } + + if (!old_disk_reserve && priv->disk_reserve) { ret = posix_spawn_disk_space_check_thread(this); if (ret) { gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, - "Getting disk space check from thread failed"); + "Getting disk space check from thread failed "); goto out; } } @@ -1008,13 +1050,13 @@ posix_init(xlator_t *this) " fallback to :"); } - _private->disk_space_check_active = _gf_false; _private->disk_space_full = 0; GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); /* option can be any one of percent or bytes */ _private->disk_unit = 0; + pthread_cond_init(&_private->fd_cond, NULL); if (_private->disk_reserve < 100.0) _private->disk_unit = 'p'; @@ -1162,12 +1204,6 @@ posix_fini(xlator_t *this) priv->health_check = 0; } - if (priv->disk_space_check) { - priv->disk_space_check_active = _gf_false; - (void)gf_thread_cleanup_xint(priv->disk_space_check); - priv->disk_space_check = 0; - } - if (priv->janitor) { /*TODO: Make sure the synctask is also complete */ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); @@ -1192,10 +1228,24 @@ posix_fini(xlator_t *this) pthread_join(ctx->janitor, NULL); } + pthread_mutex_lock(&ctx->xl_lock); + { + count = --ctx->diskxl_count; + if (count == 0) + pthread_cond_signal(&ctx->xl_cond); + } + pthread_mutex_unlock(&ctx->xl_lock); + + if (count == 0) { + pthread_join(ctx->disk_space_check, NULL); + ctx->disk_space_check = 0; + } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; } + /*unlock brick dir*/ if (priv->mount_lock) (void)sys_closedir(priv->mount_lock); diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h index c4d7cb1..8e4c719 100644 --- a/xlators/storage/posix/src/posix-handle.h +++ b/xlators/storage/posix/src/posix-handle.h @@ -206,5 +206,6 @@ int posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata); void -posix_disk_space_check(xlator_t *this); +posix_disk_space_check(struct posix_private* priv); + #endif /* !_POSIX_HANDLE_H */ diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index ceac52a..110d383 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -2284,9 +2284,8 @@ unlock: } void -posix_disk_space_check(xlator_t *this) +posix_disk_space_check(struct posix_private *priv) { - struct posix_private *priv = NULL; char *subvol_path = NULL; int op_ret = 0; double size = 0; @@ -2295,16 +2294,14 @@ posix_disk_space_check(xlator_t *this) double totsz = 0; double freesz = 0; - GF_VALIDATE_OR_GOTO(this->name, this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO("posix-helpers", priv, out); subvol_path = priv->base_path; op_ret = sys_statvfs(subvol_path, &buf); if (op_ret == -1) { - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, + gf_msg("posix-disk", GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, "statvfs failed on %s", subvol_path); goto out; } @@ -2328,78 +2325,102 @@ out: } static void * -posix_disk_space_check_thread_proc(void *data) +posix_ctx_disk_thread_proc(void *data) { - xlator_t *this = NULL; struct posix_private *priv = NULL; + glusterfs_ctx_t *ctx = NULL; uint32_t interval = 0; - int ret = -1; - - this = data; - priv = this->private; + struct posix_diskxl *pthis = NULL; + xlator_t *this = NULL; + struct timespec sleep_till = { + 0, + }; + ctx = data; interval = 5; - gf_msg_debug(this->name, 0, - "disk-space thread started, " + + gf_msg_debug("glusterfs_ctx", 0, + "Ctx disk-space thread started, " "interval = %d seconds", interval); - while (1) { - /* aborting sleep() is a request to exit this thread, sleep() - * will normally not return when cancelled */ - ret = sleep(interval); - if (ret > 0) - break; - /* prevent thread errors while doing the health-check(s) */ - pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); - - /* Do the disk-check.*/ - posix_disk_space_check(this); - if (!priv->disk_space_check_active) - goto out; - pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); - } -out: - gf_msg_debug(this->name, 0, "disk space check thread exiting"); - LOCK(&priv->lock); + pthread_mutex_lock(&ctx->xl_lock); { - priv->disk_space_check_active = _gf_false; + while (ctx->diskxl_count > 0) { + list_for_each_entry(pthis, &ctx->diskth_xl, list) + { + pthis->is_use = _gf_true; + pthread_mutex_unlock(&ctx->xl_lock); + + THIS = this = pthis->xl; + priv = this->private; + + posix_disk_space_check(priv); + + pthread_mutex_lock(&ctx->xl_lock); + pthis->is_use = _gf_false; + /* Send a signal to posix_notify function */ + if (pthis->detach_notify) + pthread_cond_signal(&pthis->cond); + } + + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 5; + (void)pthread_cond_timedwait(&ctx->xl_cond, &ctx->xl_lock, + &sleep_till); + } } - UNLOCK(&priv->lock); + pthread_mutex_unlock(&ctx->xl_lock); return NULL; } int -posix_spawn_disk_space_check_thread(xlator_t *xl) +posix_spawn_disk_space_check_thread(xlator_t *this) { - struct posix_private *priv = NULL; - int ret = -1; + int ret = 0; + glusterfs_ctx_t *ctx = this->ctx; + struct posix_diskxl *pxl = NULL; + struct posix_private *priv = this->private; - priv = xl->private; + pxl = GF_CALLOC(1, sizeof(struct posix_diskxl), gf_posix_mt_diskxl_t); + if (!pxl) { + ret = -ENOMEM; + gf_log(this->name, GF_LOG_ERROR, + "Calloc is failed to allocate " + "memory for diskxl object"); + goto out; + } + pthread_cond_init(&pxl->cond, NULL); - LOCK(&priv->lock); + pthread_mutex_lock(&ctx->xl_lock); { - /* cancel the running thread */ - if (priv->disk_space_check_active == _gf_true) { - pthread_cancel(priv->disk_space_check); - priv->disk_space_check_active = _gf_false; - } + if (ctx->diskxl_count++ == 0) { + ret = gf_thread_create(&ctx->disk_space_check, NULL, + posix_ctx_disk_thread_proc, ctx, + "posixctxres"); - ret = gf_thread_create(&priv->disk_space_check, NULL, - posix_disk_space_check_thread_proc, xl, - "posix_reserve"); - if (ret) { - priv->disk_space_check_active = _gf_false; - gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED, - "unable to setup disk space check thread"); - goto unlock; + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, + "spawning disk space check thread failed"); + ctx->diskxl_count--; + pthread_mutex_unlock(&ctx->xl_lock); + goto out; + } } + pxl->xl = this; + priv->pxl = (void *)pxl; + list_add_tail(&pxl->list, &ctx->diskth_xl); + } + pthread_mutex_unlock(&ctx->xl_lock); - priv->disk_space_check_active = _gf_true; +out: + if (ret) { + if (pxl) { + pthread_cond_destroy(&pxl->cond); + GF_FREE(pxl); + } } -unlock: - UNLOCK(&priv->lock); return ret; } diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 1d37aed..761e018 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -37,6 +37,7 @@ #include #endif /* HAVE_LINKAT */ +#include "posix-handle.h" #include #include #include @@ -713,7 +714,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, option behaviour */ if (priv->disk_reserve) - posix_disk_space_check(this); + posix_disk_space_check(priv); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h index 2253f38..bb4c56d 100644 --- a/xlators/storage/posix/src/posix-mem-types.h +++ b/xlators/storage/posix/src/posix-mem-types.h @@ -20,6 +20,7 @@ enum gf_posix_mem_types_ { gf_posix_mt_paiocb, gf_posix_mt_inode_ctx_t, gf_posix_mt_mdata_attr, + gf_posix_mt_diskxl_t, gf_posix_mt_end }; #endif diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 07f367b..4be979c 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -36,7 +36,6 @@ #include #include #include "posix-mem-types.h" -#include "posix-handle.h" #include #ifdef HAVE_LIBAIO @@ -138,6 +137,14 @@ struct posix_fd { char _pad[4]; /* manual padding */ }; +struct posix_diskxl { + pthread_cond_t cond; + struct list_head list; + xlator_t *xl; + gf_boolean_t detach_notify; + gf_boolean_t is_use; +}; + struct posix_private { char *base_path; int32_t base_path_length; @@ -207,6 +214,7 @@ struct posix_private { pthread_mutex_t janitor_mutex; pthread_cond_t janitor_cond; pthread_cond_t fd_cond; + pthread_cond_t disk_cond; int fsync_queue_count; enum { @@ -233,7 +241,6 @@ struct posix_private { char disk_unit; uint32_t disk_space_full; pthread_t disk_space_check; - gf_boolean_t disk_space_check_active; #ifdef GF_DARWIN_HOST_OS enum { @@ -263,6 +270,7 @@ struct posix_private { gf_boolean_t ctime; gf_boolean_t janitor_task_stop; uint32_t rel_fdcount; + void *pxl; }; typedef struct { -- 1.8.3.1