Blob Blame History Raw
From 3b1c45188c7260ae3dda4bcedd7cb81566f1f2ea Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Sat, 15 Jul 2017 17:55:14 +0530
Subject: [PATCH 092/128] posix: Needs to reserve disk space to prevent the
 brick from getting full

Problem: Currently there is no option available at posix xlator to save the
         disk from getting full

Solution: Introduce a new option storage.reserve at posix xlator to
          configure disk threshold.posix xlator spawn a thread to update the
          disk space status in posix private structure and same flag is checked
          by every posix fop before start operation.If flag value is 1 then
          it sets op_errno to ENOSPC and goto out from the fop.

> BUG: 1471366
> Change-Id: I98287cd409860f4c754fc69a332e0521bfb1b67e
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> Reviewed-on: https://review.gluster.org/17780
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>

BUG: 1464350
Change-Id: I98287cd409860f4c754fc69a332e0521bfb1b67e
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124629
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 xlators/mgmt/glusterd/src/glusterd-volume-set.c |   4 +
 xlators/storage/posix/src/posix-aio.c           |   1 +
 xlators/storage/posix/src/posix-helpers.c       | 115 ++++++++++++++++++++++++
 xlators/storage/posix/src/posix-messages.h      |  11 ++-
 xlators/storage/posix/src/posix.c               |  80 +++++++++++++++--
 xlators/storage/posix/src/posix.h               |  19 ++++
 6 files changed, 222 insertions(+), 8 deletions(-)

diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index b15a5af..a57eb9e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2805,6 +2805,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
           .voltype     = "storage/posix",
           .op_version  = GD_OP_VERSION_3_12_0,
         },
+        { .key         = "storage.reserve",
+          .voltype     = "storage/posix",
+          .op_version  = GD_OP_VERSION_3_13_0,
+        },
         { .key         = "storage.bd-aio",
           .voltype     = "storage/bd",
           .op_version  = GD_OP_VERSION_RHS_3_0
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
index b5ac1b9..2adafeb 100644
--- a/xlators/storage/posix/src/posix-aio.c
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -330,6 +330,7 @@ posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
         VALIDATE_OR_GOTO (fd, err);
 
         priv = this->private;
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_errno, op_errno, err);
 
         ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
         if (ret < 0) {
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index f97c90b..826441f 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1977,6 +1977,121 @@ unlock:
         UNLOCK (&priv->lock);
 }
 
+void
+posix_disk_space_check (xlator_t *this)
+{
+        struct  posix_private *priv     = NULL;
+        char    *subvol_path            = NULL;
+        int     op_ret                  = 0;
+        int     percent                 = 0;
+        struct statvfs buf              = {0};
+        uint64_t totsz                  = 0;
+        uint64_t freesz                 = 0;
+
+        GF_VALIDATE_OR_GOTO (this->name, this, out);
+        priv = this->private;
+        GF_VALIDATE_OR_GOTO ("posix-helpers", priv, out);
+
+        subvol_path = priv->base_path;
+        percent = priv->disk_threshhold;
+
+        op_ret = sys_statvfs (subvol_path, &buf);
+
+        if (op_ret == -1) {
+                gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+                        "statvfs failed on %s", subvol_path);
+                goto out;
+        }
+        totsz = (buf.f_blocks * buf.f_bsize);
+        freesz = (buf.f_bfree * buf.f_bsize);
+
+        if (freesz <= ((totsz * percent) / 100)) {
+                priv->disk_space_full = 1;
+        } else {
+                priv->disk_space_full = 0;
+        }
+out:
+        return;
+}
+
+
+static void *
+posix_disk_space_check_thread_proc (void *data)
+{
+        xlator_t             *this               = NULL;
+        struct posix_private *priv               = NULL;
+        uint32_t              interval           = 0;
+        int                   ret                = -1;
+
+        this = data;
+        priv = this->private;
+
+        interval = 5;
+        gf_msg_debug (this->name, 0, "disk-space thread started, "
+                      "interval = %d seconds", interval);
+        while (1) {
+                /* aborting sleep() is a request to exit this thread, sleep()
+                 * will normally not return when cancelled */
+                ret = sleep (interval);
+                if (ret > 0)
+                        break;
+                /* prevent thread errors while doing the health-check(s) */
+                pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+
+                /* Do the disk-check.*/
+                posix_disk_space_check (this);
+                if (!priv->disk_space_check_active)
+                        goto out;
+                pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+        }
+
+out:
+        gf_msg_debug (this->name, 0, "disk space check thread exiting");
+        LOCK (&priv->lock);
+        {
+                priv->disk_space_check_active = _gf_false;
+        }
+        UNLOCK (&priv->lock);
+
+
+        return NULL;
+}
+
+void
+posix_spawn_disk_space_check_thread (xlator_t *xl)
+{
+        struct posix_private *priv               = NULL;
+        int                   ret                = -1;
+
+        priv = xl->private;
+
+        LOCK (&priv->lock);
+        {
+                /* cancel the running thread  */
+                if (priv->disk_space_check_active == _gf_true) {
+                        pthread_cancel (priv->disk_space_check);
+                        priv->disk_space_check_active = _gf_false;
+                }
+
+                ret = gf_thread_create (&priv->disk_space_check, NULL,
+                                        posix_disk_space_check_thread_proc,
+                                        xl, "posix_reserve");
+                if (ret < 0) {
+                        priv->disk_space_check_active = _gf_false;
+                        gf_msg (xl->name, GF_LOG_ERROR, errno,
+                                P_MSG_DISK_SPACE_CHECK_FAILED,
+                                "unable to setup disk space check thread");
+                        goto unlock;
+                }
+
+                /* run the thread detached, resources will be freed on exit */
+                pthread_detach (priv->disk_space_check);
+                priv->disk_space_check_active = _gf_true;
+        }
+unlock:
+        UNLOCK (&priv->lock);
+}
+
 int
 posix_fsyncer_pick (xlator_t *this, struct list_head *head)
 {
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index ee06d6f..20cf1f0 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -45,7 +45,7 @@
  */
 
 #define POSIX_COMP_BASE         GLFS_MSGID_COMP_POSIX
-#define GLFS_NUM_MESSAGES       110
+#define GLFS_NUM_MESSAGES       111
 #define GLFS_MSGID_END          (POSIX_COMP_BASE + GLFS_NUM_MESSAGES + 1)
 /* Messaged with message IDs */
 #define glfs_msg_start_x POSIX_COMP_BASE, "Invalid: Start of messages"
@@ -955,6 +955,15 @@
  */
 
 
+#define P_MSG_DISK_SPACE_CHECK_FAILED             (POSIX_COMP_BASE + 112)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
 /*------------*/
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
 
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index d858878..1cb0fef 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -766,6 +766,7 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
         struct posix_fd    *pfd    = NULL;
         gf_boolean_t        locked = _gf_false;
         posix_inode_ctx_t  *ctx    = NULL;
+        struct  posix_private *priv = NULL;
 
         DECLARE_OLD_FS_ID_VAR;
 
@@ -775,6 +776,9 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
         VALIDATE_OR_GOTO (this, out);
         VALIDATE_OR_GOTO (fd, out);
 
+        priv = this->private;
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, ret, ret, out);
+
         ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
         if (ret < 0) {
                 gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd);
@@ -1073,20 +1077,27 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
         int32_t ret                      =  0;
         struct  iatt statpre             = {0,};
         struct  iatt statpost            = {0,};
+        struct  posix_private *priv      = NULL;
+        int     op_ret                   = -1;
+        int     op_errno                 = -1;
+
+        VALIDATE_OR_GOTO (frame, out);
+        VALIDATE_OR_GOTO (this, out);
+
+        priv = this->private;
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
 
         ret = posix_do_zerofill (frame, this, fd, offset, len,
                                  &statpre, &statpost, xdata);
-        if (ret < 0) {
-                goto err;
-        }
+        if (ret < 0)
+                goto out;
 
         STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
         return 0;
 
-err:
-        STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL);
+out:
+        STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL);
         return 0;
-
 }
 
 static int32_t
@@ -1354,6 +1365,7 @@ posix_mknod (call_frame_t *frame, xlator_t *this,
         VALIDATE_OR_GOTO (priv, out);
         GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
                                   out);
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
 
         MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
 
@@ -1574,6 +1586,7 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
         VALIDATE_OR_GOTO (priv, out);
         GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
                                   out);
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
 
         MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
         if (!real_path || !par_path) {
@@ -2400,6 +2413,7 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
         VALIDATE_OR_GOTO (priv, out);
         GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
                                   out);
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
 
         MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
 
@@ -2558,6 +2572,7 @@ posix_rename (call_frame_t *frame, xlator_t *this,
 
         priv = this->private;
         VALIDATE_OR_GOTO (priv, out);
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
 
         SET_FS_ID (frame->root->uid, frame->root->gid);
         MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL);
@@ -2840,6 +2855,7 @@ posix_link (call_frame_t *frame, xlator_t *this,
 
         priv = this->private;
         VALIDATE_OR_GOTO (priv, out);
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
 
         SET_FS_ID (frame->root->uid, frame->root->gid);
         MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf);
@@ -3049,6 +3065,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
         VALIDATE_OR_GOTO (priv, out);
         GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
                                   out);
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
 
         MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
 
@@ -3236,6 +3253,9 @@ posix_open (call_frame_t *frame, xlator_t *this,
         priv = this->private;
         VALIDATE_OR_GOTO (priv, out);
 
+        if (flags & O_CREAT)
+                DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
         MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
         if (!real_path) {
                 op_ret = -1;
@@ -3559,6 +3579,7 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
         priv = this->private;
 
         VALIDATE_OR_GOTO (priv, out);
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
 
         ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
         if (ret < 0) {
@@ -3698,6 +3719,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
         struct statvfs         buf       = {0, };
         struct posix_private * priv      = NULL;
         int                    shared_by = 1;
+        int                    percent   = 0;
 
         VALIDATE_OR_GOTO (frame, out);
         VALIDATE_OR_GOTO (this, out);
@@ -3722,6 +3744,9 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
                 goto out;
         }
 
+        percent = priv->disk_threshhold;
+        buf.f_bfree = (buf.f_bfree - ((buf.f_blocks * percent) / 100));
+
         shared_by = priv->shared_brick_count;
         if (shared_by > 1) {
                 buf.f_blocks /= shared_by;
@@ -3879,6 +3904,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
 #endif
 
 	priv = this->private;
+
 	if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
 		posix_batch_fsync (frame, this, fd, datasync, xdata);
 		return 0;
@@ -3983,6 +4009,7 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
         ssize_t       acl_size                = 0;
         dict_t       *xattr                   = NULL;
         posix_xattr_filler_t filler = {0,};
+        struct  posix_private *priv           = NULL;
 
         DECLARE_OLD_FS_ID_VAR;
         SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -3992,6 +4019,9 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
         VALIDATE_OR_GOTO (loc, out);
         VALIDATE_OR_GOTO (dict, out);
 
+        priv = this->private;
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
         MAKE_INODE_HANDLE (real_path, this, loc, NULL);
         if (!real_path) {
                 op_ret = -1;
@@ -5346,6 +5376,7 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
         struct  iatt       stbuf          = {0,};
         dict_t            *xattr          = NULL;
         posix_xattr_filler_t filler       = {0,};
+        struct  posix_private *priv       = NULL;
 
         DECLARE_OLD_FS_ID_VAR;
         SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -5355,6 +5386,9 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
         VALIDATE_OR_GOTO (fd, out);
         VALIDATE_OR_GOTO (dict, out);
 
+        priv = this->private;
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
         ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
         if (ret < 0) {
                 gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
@@ -6018,11 +6052,17 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
         dict_t               *xattr_rsp = NULL;
         dict_t               *xdata_rsp = NULL;
         struct iatt           stbuf = {0};
+        struct  posix_private *priv     = NULL;
+
 
         VALIDATE_OR_GOTO (frame, out);
         VALIDATE_OR_GOTO (xattr, out);
         VALIDATE_OR_GOTO (this, out);
 
+        priv = this->private;
+        DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
+
         if (fd) {
                 op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
                 if (op_ret < 0) {
@@ -6120,7 +6160,6 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this,
         return 0;
 }
 
-
 int
 posix_access (call_frame_t *frame, xlator_t *this,
               loc_t *loc, int32_t mask, dict_t *xdata)
@@ -6944,6 +6983,11 @@ notify (xlator_t *this,
                         pthread_cancel (priv->health_check);
                         priv->health_check = 0;
                 }
+                if (priv->disk_space_check) {
+                        priv->disk_space_check_active = _gf_false;
+                        pthread_cancel (priv->disk_space_check);
+                        priv->disk_space_check = 0;
+                }
                 if (priv->janitor) {
                         (void) gf_thread_cleanup_xint (priv->janitor);
                         priv->janitor = 0;
@@ -7140,6 +7184,11 @@ reconfigure (xlator_t *this, dict_t *options)
                         " fallback to <hostname>:<export>");
         }
 
+        GF_OPTION_RECONF ("reserve", priv->disk_threshhold,
+                          options, uint32, out);
+        if (priv->disk_threshhold)
+                posix_spawn_disk_space_check_thread (this);
+
         GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
                           options, uint32, out);
         posix_spawn_health_check_thread (this);
@@ -7738,6 +7787,13 @@ init (xlator_t *this)
                                 " fallback to <hostname>:<export>");
         }
 
+        _private->disk_space_check_active = _gf_false;
+        _private->disk_space_full          = 0;
+        GF_OPTION_INIT ("reserve",
+                        _private->disk_threshhold, uint32, out);
+        if (_private->disk_threshhold)
+                posix_spawn_disk_space_check_thread (this);
+
         _private->health_check_active = _gf_false;
         GF_OPTION_INIT ("health-check-interval",
                         _private->health_check_interval, uint32, out);
@@ -7940,6 +7996,16 @@ struct volume_options options[] = {
           .description = "Interval in seconds for a filesystem health check, "
                          "set to 0 to disable"
         },
+        {
+          .key = {"reserve"},
+          .type = GF_OPTION_TYPE_INT,
+          .min = 0,
+          .default_value = "1",
+          .validate = GF_OPT_VALIDATE_MIN,
+          .description = "Value in percentage in integer form required "
+           "to set reserve disk, "
+           "set to 0 to disable"
+        },
 	{ .key = {"batch-fsync-mode"},
 	  .type = GF_OPTION_TYPE_STR,
 	  .default_value = "reverse-fsync",
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index c2dcfda..21c7d36 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -63,6 +63,18 @@
 #define GF_UNLINK_TRUE 0x0000000000000001
 #define GF_UNLINK_FALSE 0x0000000000000000
 
+#define DISK_SPACE_CHECK_AND_GOTO(frame, priv, op_ret, op_errno, out)  do {   \
+               if (frame->root->pid >= 0 && priv->disk_space_full) {          \
+                        op_ret = -1;                                          \
+                        op_errno = ENOSPC;                                    \
+                        gf_msg_debug ("posix", ENOSPC,                        \
+                                      "disk space utilization reached limits" \
+                                      " for path %s ",  priv->base_path);     \
+                        goto out;                                             \
+               }                                                              \
+        } while (0)
+
+
 /**
  * posix_fd - internal structure common to file and directory fd's
  */
@@ -197,6 +209,11 @@ struct posix_private {
         pthread_t       health_check;
         gf_boolean_t    health_check_active;
 
+        uint32_t        disk_threshhold;
+        uint32_t        disk_space_full;
+        pthread_t       disk_space_check;
+        gf_boolean_t    disk_space_check_active;
+
 #ifdef GF_DARWIN_HOST_OS
         enum {
                 XATTR_NONE = 0,
@@ -304,6 +321,8 @@ __posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
 			off_t offset, size_t size);
 void posix_spawn_health_check_thread (xlator_t *this);
 
+void posix_spawn_disk_space_check_thread (xlator_t *this);
+
 void *posix_fsyncer (void *);
 int
 posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
-- 
1.8.3.1