74096c
From 17a9ce965ef2fec9ee5c8e4b76981bb7cbcf1352 Mon Sep 17 00:00:00 2001
74096c
From: mohit84 <moagrawa@redhat.com>
74096c
Date: Mon, 9 Nov 2020 17:15:42 +0530
74096c
Subject: [PATCH 506/511] posix: Attach a posix_spawn_disk_thread with
74096c
 glusterfs_ctx (#1595)
74096c
74096c
Currently posix xlator spawns posix_disk_space_threads per brick and in
74096c
case of brick_mux environment while glusterd attached bricks at maximum
74096c
level(250) with a single brick process in that case 250 threads are
74096c
spawned for all bricks and brick process memory size also increased.
74096c
74096c
Solution: Attach a posix_disk_space thread with glusterfs_ctx to
74096c
          spawn a thread per process basis instead of spawning a per brick
74096c
74096c
> Fixes: #1482
74096c
> Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7
74096c
> Cherry-picked from commit 3f93be77e1acf5baacafa97a320e91e6879d1c0e
74096c
> Reviewed on upstream link https://github.com/gluster/glusterfs/issues/1482
74096c
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
74096c
74096c
Change-Id: I8dd88f252a950495b71742e2a7588bd5bb019ec7
74096c
Bug: 1898776
74096c
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/220366
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 glusterfsd/src/glusterfsd.c                    |   4 +
74096c
 libglusterfs/src/glusterfs/glusterfs.h         |   6 ++
74096c
 xlators/storage/posix/src/posix-common.c       |  68 +++++++++++--
74096c
 xlators/storage/posix/src/posix-handle.h       |   3 +-
74096c
 xlators/storage/posix/src/posix-helpers.c      | 131 ++++++++++++++-----------
74096c
 xlators/storage/posix/src/posix-inode-fd-ops.c |   3 +-
74096c
 xlators/storage/posix/src/posix-mem-types.h    |   1 +
74096c
 xlators/storage/posix/src/posix.h              |  12 ++-
74096c
 8 files changed, 160 insertions(+), 68 deletions(-)
74096c
74096c
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
74096c
index 955bf1d..ac25255 100644
74096c
--- a/glusterfsd/src/glusterfsd.c
74096c
+++ b/glusterfsd/src/glusterfsd.c
74096c
@@ -1840,9 +1840,13 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
74096c
     INIT_LIST_HEAD(&cmd_args->xlator_options);
74096c
     INIT_LIST_HEAD(&cmd_args->volfile_servers);
74096c
     ctx->pxl_count = 0;
74096c
+    ctx->diskxl_count = 0;
74096c
     pthread_mutex_init(&ctx->fd_lock, NULL);
74096c
     pthread_cond_init(&ctx->fd_cond, NULL);
74096c
     INIT_LIST_HEAD(&ctx->janitor_fds);
74096c
+    pthread_mutex_init(&ctx->xl_lock, NULL);
74096c
+    pthread_cond_init(&ctx->xl_cond, NULL);
74096c
+    INIT_LIST_HEAD(&ctx->diskth_xl);
74096c
 
74096c
     lim.rlim_cur = RLIM_INFINITY;
74096c
     lim.rlim_max = RLIM_INFINITY;
74096c
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
74096c
index bf6a987..d3400bf 100644
74096c
--- a/libglusterfs/src/glusterfs/glusterfs.h
74096c
+++ b/libglusterfs/src/glusterfs/glusterfs.h
74096c
@@ -740,7 +740,13 @@ struct _glusterfs_ctx {
74096c
     pthread_t janitor;
74096c
     /* The variable is use to save total posix xlator count */
74096c
     uint32_t pxl_count;
74096c
+    uint32_t diskxl_count;
74096c
 
74096c
+    /* List of posix xlator use by disk thread*/
74096c
+    struct list_head diskth_xl;
74096c
+    pthread_mutex_t xl_lock;
74096c
+    pthread_cond_t xl_cond;
74096c
+    pthread_t disk_space_check;
74096c
     char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
74096c
 };
74096c
 typedef struct _glusterfs_ctx glusterfs_ctx_t;
74096c
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
74096c
index e5c6e62..2c9030b 100644
74096c
--- a/xlators/storage/posix/src/posix-common.c
74096c
+++ b/xlators/storage/posix/src/posix-common.c
74096c
@@ -138,6 +138,36 @@ posix_inode(xlator_t *this)
74096c
     return 0;
74096c
 }
74096c
 
74096c
+static void
74096c
+delete_posix_diskxl(xlator_t *this)
74096c
+{
74096c
+    struct posix_private *priv = this->private;
74096c
+    struct posix_diskxl *pxl = priv->pxl;
74096c
+    glusterfs_ctx_t *ctx = this->ctx;
74096c
+    uint32_t count = 1;
74096c
+
74096c
+    if (pxl) {
74096c
+        pthread_mutex_lock(&ctx->xl_lock);
74096c
+        {
74096c
+            pxl->detach_notify = _gf_true;
74096c
+            while (pxl->is_use)
74096c
+                pthread_cond_wait(&pxl->cond, &ctx->xl_lock);
74096c
+            list_del_init(&pxl->list);
74096c
+            priv->pxl = NULL;
74096c
+            count = --ctx->diskxl_count;
74096c
+            if (count == 0)
74096c
+                pthread_cond_signal(&ctx->xl_cond);
74096c
+        }
74096c
+        pthread_mutex_unlock(&ctx->xl_lock);
74096c
+        pthread_cond_destroy(&pxl->cond);
74096c
+        GF_FREE(pxl);
74096c
+        if (count == 0) {
74096c
+            pthread_join(ctx->disk_space_check, NULL);
74096c
+            ctx->disk_space_check = 0;
74096c
+        }
74096c
+    }
74096c
+}
74096c
+
74096c
 /**
74096c
  * notify - when parent sends PARENT_UP, send CHILD_UP event from here
74096c
  */
74096c
@@ -194,6 +224,8 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
74096c
             }
74096c
             pthread_mutex_unlock(&ctx->fd_lock);
74096c
 
74096c
+            delete_posix_diskxl(this);
74096c
+
74096c
             gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
74096c
                    victim->name);
74096c
             default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
74096c
@@ -318,6 +350,7 @@ posix_reconfigure(xlator_t *this, dict_t *options)
74096c
     int32_t force_directory_mode = -1;
74096c
     int32_t create_mask = -1;
74096c
     int32_t create_directory_mask = -1;
74096c
+    double old_disk_reserve = 0.0;
74096c
 
74096c
     priv = this->private;
74096c
 
74096c
@@ -383,6 +416,7 @@ posix_reconfigure(xlator_t *this, dict_t *options)
74096c
                " fallback to <hostname>:<export>");
74096c
     }
74096c
 
74096c
+    old_disk_reserve = priv->disk_reserve;
74096c
     GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size,
74096c
                      out);
74096c
     /* option can be any one of percent or bytes */
74096c
@@ -390,11 +424,19 @@ posix_reconfigure(xlator_t *this, dict_t *options)
74096c
     if (priv->disk_reserve < 100.0)
74096c
         priv->disk_unit = 'p';
74096c
 
74096c
-    if (priv->disk_reserve) {
74096c
+    /* Delete a pxl object from a list of disk_reserve while something
74096c
+       is changed for reserve option during graph reconfigure
74096c
+    */
74096c
+    if (old_disk_reserve != priv->disk_reserve) {
74096c
+        delete_posix_diskxl(this);
74096c
+        old_disk_reserve = 0;
74096c
+    }
74096c
+
74096c
+    if (!old_disk_reserve && priv->disk_reserve) {
74096c
         ret = posix_spawn_disk_space_check_thread(this);
74096c
         if (ret) {
74096c
             gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED,
74096c
-                   "Getting disk space check from thread failed");
74096c
+                   "Getting disk space check from thread failed ");
74096c
             goto out;
74096c
         }
74096c
     }
74096c
@@ -1008,13 +1050,13 @@ posix_init(xlator_t *this)
74096c
                " fallback to <hostname>:<export>");
74096c
     }
74096c
 
74096c
-    _private->disk_space_check_active = _gf_false;
74096c
     _private->disk_space_full = 0;
74096c
 
74096c
     GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out);
74096c
 
74096c
     /* option can be any one of percent or bytes */
74096c
     _private->disk_unit = 0;
74096c
+    pthread_cond_init(&_private->fd_cond, NULL);
74096c
     if (_private->disk_reserve < 100.0)
74096c
         _private->disk_unit = 'p';
74096c
 
74096c
@@ -1162,12 +1204,6 @@ posix_fini(xlator_t *this)
74096c
         priv->health_check = 0;
74096c
     }
74096c
 
74096c
-    if (priv->disk_space_check) {
74096c
-        priv->disk_space_check_active = _gf_false;
74096c
-        (void)gf_thread_cleanup_xint(priv->disk_space_check);
74096c
-        priv->disk_space_check = 0;
74096c
-    }
74096c
-
74096c
     if (priv->janitor) {
74096c
         /*TODO: Make sure the synctask is also complete */
74096c
         ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
74096c
@@ -1192,10 +1228,24 @@ posix_fini(xlator_t *this)
74096c
         pthread_join(ctx->janitor, NULL);
74096c
     }
74096c
 
74096c
+    pthread_mutex_lock(&ctx->xl_lock);
74096c
+    {
74096c
+        count = --ctx->diskxl_count;
74096c
+        if (count == 0)
74096c
+            pthread_cond_signal(&ctx->xl_cond);
74096c
+    }
74096c
+    pthread_mutex_unlock(&ctx->xl_lock);
74096c
+
74096c
+    if (count == 0) {
74096c
+        pthread_join(ctx->disk_space_check, NULL);
74096c
+        ctx->disk_space_check = 0;
74096c
+    }
74096c
+
74096c
     if (priv->fsyncer) {
74096c
         (void)gf_thread_cleanup_xint(priv->fsyncer);
74096c
         priv->fsyncer = 0;
74096c
     }
74096c
+
74096c
     /*unlock brick dir*/
74096c
     if (priv->mount_lock)
74096c
         (void)sys_closedir(priv->mount_lock);
74096c
diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
74096c
index c4d7cb1..8e4c719 100644
74096c
--- a/xlators/storage/posix/src/posix-handle.h
74096c
+++ b/xlators/storage/posix/src/posix-handle.h
74096c
@@ -206,5 +206,6 @@ int
74096c
 posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata);
74096c
 
74096c
 void
74096c
-posix_disk_space_check(xlator_t *this);
74096c
+posix_disk_space_check(struct posix_private* priv);
74096c
+
74096c
 #endif /* !_POSIX_HANDLE_H */
74096c
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
74096c
index ceac52a..110d383 100644
74096c
--- a/xlators/storage/posix/src/posix-helpers.c
74096c
+++ b/xlators/storage/posix/src/posix-helpers.c
74096c
@@ -2284,9 +2284,8 @@ unlock:
74096c
 }
74096c
 
74096c
 void
74096c
-posix_disk_space_check(xlator_t *this)
74096c
+posix_disk_space_check(struct posix_private *priv)
74096c
 {
74096c
-    struct posix_private *priv = NULL;
74096c
     char *subvol_path = NULL;
74096c
     int op_ret = 0;
74096c
     double size = 0;
74096c
@@ -2295,16 +2294,14 @@ posix_disk_space_check(xlator_t *this)
74096c
     double totsz = 0;
74096c
     double freesz = 0;
74096c
 
74096c
-    GF_VALIDATE_OR_GOTO(this->name, this, out);
74096c
-    priv = this->private;
74096c
-    GF_VALIDATE_OR_GOTO(this->name, priv, out);
74096c
+    GF_VALIDATE_OR_GOTO("posix-helpers", priv, out);
74096c
 
74096c
     subvol_path = priv->base_path;
74096c
 
74096c
     op_ret = sys_statvfs(subvol_path, &buf;;
74096c
 
74096c
     if (op_ret == -1) {
74096c
-        gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
74096c
+        gf_msg("posix-disk", GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
74096c
                "statvfs failed on %s", subvol_path);
74096c
         goto out;
74096c
     }
74096c
@@ -2328,78 +2325,102 @@ out:
74096c
 }
74096c
 
74096c
 static void *
74096c
-posix_disk_space_check_thread_proc(void *data)
74096c
+posix_ctx_disk_thread_proc(void *data)
74096c
 {
74096c
-    xlator_t *this = NULL;
74096c
     struct posix_private *priv = NULL;
74096c
+    glusterfs_ctx_t *ctx = NULL;
74096c
     uint32_t interval = 0;
74096c
-    int ret = -1;
74096c
-
74096c
-    this = data;
74096c
-    priv = this->private;
74096c
+    struct posix_diskxl *pthis = NULL;
74096c
+    xlator_t *this = NULL;
74096c
+    struct timespec sleep_till = {
74096c
+        0,
74096c
+    };
74096c
 
74096c
+    ctx = data;
74096c
     interval = 5;
74096c
-    gf_msg_debug(this->name, 0,
74096c
-                 "disk-space thread started, "
74096c
+
74096c
+    gf_msg_debug("glusterfs_ctx", 0,
74096c
+                 "Ctx disk-space thread started, "
74096c
                  "interval = %d seconds",
74096c
                  interval);
74096c
-    while (1) {
74096c
-        /* aborting sleep() is a request to exit this thread, sleep()
74096c
-         * will normally not return when cancelled */
74096c
-        ret = sleep(interval);
74096c
-        if (ret > 0)
74096c
-            break;
74096c
-        /* prevent thread errors while doing the health-check(s) */
74096c
-        pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
74096c
-
74096c
-        /* Do the disk-check.*/
74096c
-        posix_disk_space_check(this);
74096c
-        if (!priv->disk_space_check_active)
74096c
-            goto out;
74096c
-        pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
74096c
-    }
74096c
 
74096c
-out:
74096c
-    gf_msg_debug(this->name, 0, "disk space check thread exiting");
74096c
-    LOCK(&priv->lock);
74096c
+    pthread_mutex_lock(&ctx->xl_lock);
74096c
     {
74096c
-        priv->disk_space_check_active = _gf_false;
74096c
+        while (ctx->diskxl_count > 0) {
74096c
+            list_for_each_entry(pthis, &ctx->diskth_xl, list)
74096c
+            {
74096c
+                pthis->is_use = _gf_true;
74096c
+                pthread_mutex_unlock(&ctx->xl_lock);
74096c
+
74096c
+                THIS = this = pthis->xl;
74096c
+                priv = this->private;
74096c
+
74096c
+                posix_disk_space_check(priv);
74096c
+
74096c
+                pthread_mutex_lock(&ctx->xl_lock);
74096c
+                pthis->is_use = _gf_false;
74096c
+                /* Send a signal to posix_notify function */
74096c
+                if (pthis->detach_notify)
74096c
+                    pthread_cond_signal(&pthis->cond);
74096c
+            }
74096c
+
74096c
+            timespec_now_realtime(&sleep_till);
74096c
+            sleep_till.tv_sec += 5;
74096c
+            (void)pthread_cond_timedwait(&ctx->xl_cond, &ctx->xl_lock,
74096c
+                                         &sleep_till);
74096c
+        }
74096c
     }
74096c
-    UNLOCK(&priv->lock);
74096c
+    pthread_mutex_unlock(&ctx->xl_lock);
74096c
 
74096c
     return NULL;
74096c
 }
74096c
 
74096c
 int
74096c
-posix_spawn_disk_space_check_thread(xlator_t *xl)
74096c
+posix_spawn_disk_space_check_thread(xlator_t *this)
74096c
 {
74096c
-    struct posix_private *priv = NULL;
74096c
-    int ret = -1;
74096c
+    int ret = 0;
74096c
+    glusterfs_ctx_t *ctx = this->ctx;
74096c
+    struct posix_diskxl *pxl = NULL;
74096c
+    struct posix_private *priv = this->private;
74096c
 
74096c
-    priv = xl->private;
74096c
+    pxl = GF_CALLOC(1, sizeof(struct posix_diskxl), gf_posix_mt_diskxl_t);
74096c
+    if (!pxl) {
74096c
+        ret = -ENOMEM;
74096c
+        gf_log(this->name, GF_LOG_ERROR,
74096c
+               "Calloc is failed to allocate "
74096c
+               "memory for diskxl object");
74096c
+        goto out;
74096c
+    }
74096c
+    pthread_cond_init(&pxl->cond, NULL);
74096c
 
74096c
-    LOCK(&priv->lock);
74096c
+    pthread_mutex_lock(&ctx->xl_lock);
74096c
     {
74096c
-        /* cancel the running thread  */
74096c
-        if (priv->disk_space_check_active == _gf_true) {
74096c
-            pthread_cancel(priv->disk_space_check);
74096c
-            priv->disk_space_check_active = _gf_false;
74096c
-        }
74096c
+        if (ctx->diskxl_count++ == 0) {
74096c
+            ret = gf_thread_create(&ctx->disk_space_check, NULL,
74096c
+                                   posix_ctx_disk_thread_proc, ctx,
74096c
+                                   "posixctxres");
74096c
 
74096c
-        ret = gf_thread_create(&priv->disk_space_check, NULL,
74096c
-                               posix_disk_space_check_thread_proc, xl,
74096c
-                               "posix_reserve");
74096c
-        if (ret) {
74096c
-            priv->disk_space_check_active = _gf_false;
74096c
-            gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED,
74096c
-                   "unable to setup disk space check thread");
74096c
-            goto unlock;
74096c
+            if (ret) {
74096c
+                gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED,
74096c
+                       "spawning disk space check thread failed");
74096c
+                ctx->diskxl_count--;
74096c
+                pthread_mutex_unlock(&ctx->xl_lock);
74096c
+                goto out;
74096c
+            }
74096c
         }
74096c
+        pxl->xl = this;
74096c
+        priv->pxl = (void *)pxl;
74096c
+        list_add_tail(&pxl->list, &ctx->diskth_xl);
74096c
+    }
74096c
+    pthread_mutex_unlock(&ctx->xl_lock);
74096c
 
74096c
-        priv->disk_space_check_active = _gf_true;
74096c
+out:
74096c
+    if (ret) {
74096c
+        if (pxl) {
74096c
+            pthread_cond_destroy(&pxl->cond);
74096c
+            GF_FREE(pxl);
74096c
+        }
74096c
     }
74096c
-unlock:
74096c
-    UNLOCK(&priv->lock);
74096c
     return ret;
74096c
 }
74096c
 
74096c
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
74096c
index 1d37aed..761e018 100644
74096c
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
74096c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
74096c
@@ -37,6 +37,7 @@
74096c
 #include <fcntl.h>
74096c
 #endif /* HAVE_LINKAT */
74096c
 
74096c
+#include "posix-handle.h"
74096c
 #include <glusterfs/glusterfs.h>
74096c
 #include <glusterfs/checksum.h>
74096c
 #include <glusterfs/dict.h>
74096c
@@ -713,7 +714,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
74096c
        option behaviour
74096c
     */
74096c
     if (priv->disk_reserve)
74096c
-        posix_disk_space_check(this);
74096c
+        posix_disk_space_check(priv);
74096c
 
74096c
     DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock);
74096c
 
74096c
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
74096c
index 2253f38..bb4c56d 100644
74096c
--- a/xlators/storage/posix/src/posix-mem-types.h
74096c
+++ b/xlators/storage/posix/src/posix-mem-types.h
74096c
@@ -20,6 +20,7 @@ enum gf_posix_mem_types_ {
74096c
     gf_posix_mt_paiocb,
74096c
     gf_posix_mt_inode_ctx_t,
74096c
     gf_posix_mt_mdata_attr,
74096c
+    gf_posix_mt_diskxl_t,
74096c
     gf_posix_mt_end
74096c
 };
74096c
 #endif
74096c
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
74096c
index 07f367b..4be979c 100644
74096c
--- a/xlators/storage/posix/src/posix.h
74096c
+++ b/xlators/storage/posix/src/posix.h
74096c
@@ -36,7 +36,6 @@
74096c
 #include <glusterfs/compat.h>
74096c
 #include <glusterfs/timer.h>
74096c
 #include "posix-mem-types.h"
74096c
-#include "posix-handle.h"
74096c
 #include <glusterfs/call-stub.h>
74096c
 
74096c
 #ifdef HAVE_LIBAIO
74096c
@@ -138,6 +137,14 @@ struct posix_fd {
74096c
     char _pad[4]; /* manual padding */
74096c
 };
74096c
 
74096c
+struct posix_diskxl {
74096c
+    pthread_cond_t cond;
74096c
+    struct list_head list;
74096c
+    xlator_t *xl;
74096c
+    gf_boolean_t detach_notify;
74096c
+    gf_boolean_t is_use;
74096c
+};
74096c
+
74096c
 struct posix_private {
74096c
     char *base_path;
74096c
     int32_t base_path_length;
74096c
@@ -207,6 +214,7 @@ struct posix_private {
74096c
     pthread_mutex_t janitor_mutex;
74096c
     pthread_cond_t janitor_cond;
74096c
     pthread_cond_t fd_cond;
74096c
+    pthread_cond_t disk_cond;
74096c
     int fsync_queue_count;
74096c
 
74096c
     enum {
74096c
@@ -233,7 +241,6 @@ struct posix_private {
74096c
     char disk_unit;
74096c
     uint32_t disk_space_full;
74096c
     pthread_t disk_space_check;
74096c
-    gf_boolean_t disk_space_check_active;
74096c
 
74096c
 #ifdef GF_DARWIN_HOST_OS
74096c
     enum {
74096c
@@ -263,6 +270,7 @@ struct posix_private {
74096c
     gf_boolean_t ctime;
74096c
     gf_boolean_t janitor_task_stop;
74096c
     uint32_t rel_fdcount;
74096c
+    void *pxl;
74096c
 };
74096c
 
74096c
 typedef struct {
74096c
-- 
74096c
1.8.3.1
74096c