From 3b1c45188c7260ae3dda4bcedd7cb81566f1f2ea Mon Sep 17 00:00:00 2001 From: Mohit Agrawal Date: Sat, 15 Jul 2017 17:55:14 +0530 Subject: [PATCH 092/128] posix: Needs to reserve disk space to prevent the brick from getting full Problem: Currently there is no option available at posix xlator to save the disk from getting full Solution: Introduce a new option storage.reserve at posix xlator to configure disk threshold.posix xlator spawn a thread to update the disk space status in posix private structure and same flag is checked by every posix fop before start operation.If flag value is 1 then it sets op_errno to ENOSPC and goto out from the fop. > BUG: 1471366 > Change-Id: I98287cd409860f4c754fc69a332e0521bfb1b67e > Signed-off-by: Mohit Agrawal > Reviewed-on: https://review.gluster.org/17780 > Smoke: Gluster Build System > CentOS-regression: Gluster Build System > Reviewed-by: Amar Tumballi > Reviewed-by: Jeff Darcy BUG: 1464350 Change-Id: I98287cd409860f4c754fc69a332e0521bfb1b67e Signed-off-by: Mohit Agrawal Reviewed-on: https://code.engineering.redhat.com/gerrit/124629 Tested-by: RHGS Build Bot Reviewed-by: Atin Mukherjee --- xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 + xlators/storage/posix/src/posix-aio.c | 1 + xlators/storage/posix/src/posix-helpers.c | 115 ++++++++++++++++++++++++ xlators/storage/posix/src/posix-messages.h | 11 ++- xlators/storage/posix/src/posix.c | 80 +++++++++++++++-- xlators/storage/posix/src/posix.h | 19 ++++ 6 files changed, 222 insertions(+), 8 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index b15a5af..a57eb9e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2805,6 +2805,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "storage/posix", .op_version = GD_OP_VERSION_3_12_0, }, + { .key = "storage.reserve", + .voltype = "storage/posix", + .op_version = GD_OP_VERSION_3_13_0, + }, { .key = "storage.bd-aio", .voltype = "storage/bd", .op_version = GD_OP_VERSION_RHS_3_0 diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c index b5ac1b9..2adafeb 100644 --- a/xlators/storage/posix/src/posix-aio.c +++ b/xlators/storage/posix/src/posix-aio.c @@ -330,6 +330,7 @@ posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, VALIDATE_OR_GOTO (fd, err); priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_errno, op_errno, err); ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); if (ret < 0) { diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index f97c90b..826441f 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -1977,6 +1977,121 @@ unlock: UNLOCK (&priv->lock); } +void +posix_disk_space_check (xlator_t *this) +{ + struct posix_private *priv = NULL; + char *subvol_path = NULL; + int op_ret = 0; + int percent = 0; + struct statvfs buf = {0}; + uint64_t totsz = 0; + uint64_t freesz = 0; + + GF_VALIDATE_OR_GOTO (this->name, this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO ("posix-helpers", priv, out); + + subvol_path = priv->base_path; + percent = priv->disk_threshhold; + + op_ret = sys_statvfs (subvol_path, &buf); + + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, + "statvfs failed on %s", subvol_path); + goto out; + } + totsz = (buf.f_blocks * buf.f_bsize); + freesz = (buf.f_bfree * buf.f_bsize); + + if (freesz <= ((totsz * percent) / 100)) { + priv->disk_space_full = 1; + } else { + priv->disk_space_full = 0; + } +out: + return; +} + + +static void * +posix_disk_space_check_thread_proc (void *data) +{ + xlator_t *this = NULL; + struct posix_private *priv = NULL; + uint32_t interval = 0; + int ret = -1; + + this = data; + priv = this->private; + + interval = 5; + gf_msg_debug (this->name, 0, "disk-space thread started, " + "interval = %d seconds", interval); + while (1) { + /* aborting sleep() is a request to exit this thread, sleep() + * will normally not return when cancelled */ + ret = sleep (interval); + if (ret > 0) + break; + /* prevent thread errors while doing the health-check(s) */ + pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); + + /* Do the disk-check.*/ + posix_disk_space_check (this); + if (!priv->disk_space_check_active) + goto out; + pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); + } + +out: + gf_msg_debug (this->name, 0, "disk space check thread exiting"); + LOCK (&priv->lock); + { + priv->disk_space_check_active = _gf_false; + } + UNLOCK (&priv->lock); + + + return NULL; +} + +void +posix_spawn_disk_space_check_thread (xlator_t *xl) +{ + struct posix_private *priv = NULL; + int ret = -1; + + priv = xl->private; + + LOCK (&priv->lock); + { + /* cancel the running thread */ + if (priv->disk_space_check_active == _gf_true) { + pthread_cancel (priv->disk_space_check); + priv->disk_space_check_active = _gf_false; + } + + ret = gf_thread_create (&priv->disk_space_check, NULL, + posix_disk_space_check_thread_proc, + xl, "posix_reserve"); + if (ret < 0) { + priv->disk_space_check_active = _gf_false; + gf_msg (xl->name, GF_LOG_ERROR, errno, + P_MSG_DISK_SPACE_CHECK_FAILED, + "unable to setup disk space check thread"); + goto unlock; + } + + /* run the thread detached, resources will be freed on exit */ + pthread_detach (priv->disk_space_check); + priv->disk_space_check_active = _gf_true; + } +unlock: + UNLOCK (&priv->lock); +} + int posix_fsyncer_pick (xlator_t *this, struct list_head *head) { diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h index ee06d6f..20cf1f0 100644 --- a/xlators/storage/posix/src/posix-messages.h +++ b/xlators/storage/posix/src/posix-messages.h @@ -45,7 +45,7 @@ */ #define POSIX_COMP_BASE GLFS_MSGID_COMP_POSIX -#define GLFS_NUM_MESSAGES 110 +#define GLFS_NUM_MESSAGES 111 #define GLFS_MSGID_END (POSIX_COMP_BASE + GLFS_NUM_MESSAGES + 1) /* Messaged with message IDs */ #define glfs_msg_start_x POSIX_COMP_BASE, "Invalid: Start of messages" @@ -955,6 +955,15 @@ */ +#define P_MSG_DISK_SPACE_CHECK_FAILED (POSIX_COMP_BASE + 112) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index d858878..1cb0fef 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -766,6 +766,7 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, struct posix_fd *pfd = NULL; gf_boolean_t locked = _gf_false; posix_inode_ctx_t *ctx = NULL; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -775,6 +776,9 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, ret, ret, out); + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); @@ -1073,20 +1077,27 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, int32_t ret = 0; struct iatt statpre = {0,}; struct iatt statpost = {0,}; + struct posix_private *priv = NULL; + int op_ret = -1; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); ret = posix_do_zerofill (frame, this, fd, offset, len, &statpre, &statpost, xdata); - if (ret < 0) { - goto err; - } + if (ret < 0) + goto out; STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); return 0; -err: - STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); +out: + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); return 0; - } static int32_t @@ -1354,6 +1365,7 @@ posix_mknod (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); @@ -1574,6 +1586,7 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); if (!real_path || !par_path) { @@ -2400,6 +2413,7 @@ posix_symlink (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); @@ -2558,6 +2572,7 @@ posix_rename (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); SET_FS_ID (frame->root->uid, frame->root->gid); MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL); @@ -2840,6 +2855,7 @@ posix_link (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); SET_FS_ID (frame->root->uid, frame->root->gid); MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf); @@ -3049,6 +3065,7 @@ posix_create (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); @@ -3236,6 +3253,9 @@ posix_open (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); + if (flags & O_CREAT) + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); if (!real_path) { op_ret = -1; @@ -3559,6 +3579,7 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; VALIDATE_OR_GOTO (priv, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); if (ret < 0) { @@ -3698,6 +3719,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this, struct statvfs buf = {0, }; struct posix_private * priv = NULL; int shared_by = 1; + int percent = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -3722,6 +3744,9 @@ posix_statfs (call_frame_t *frame, xlator_t *this, goto out; } + percent = priv->disk_threshhold; + buf.f_bfree = (buf.f_bfree - ((buf.f_blocks * percent) / 100)); + shared_by = priv->shared_brick_count; if (shared_by > 1) { buf.f_blocks /= shared_by; @@ -3879,6 +3904,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this, #endif priv = this->private; + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { posix_batch_fsync (frame, this, fd, datasync, xdata); return 0; @@ -3983,6 +4009,7 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, ssize_t acl_size = 0; dict_t *xattr = NULL; posix_xattr_filler_t filler = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -3992,6 +4019,9 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); VALIDATE_OR_GOTO (dict, out); + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); if (!real_path) { op_ret = -1; @@ -5346,6 +5376,7 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, struct iatt stbuf = {0,}; dict_t *xattr = NULL; posix_xattr_filler_t filler = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -5355,6 +5386,9 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd, out); VALIDATE_OR_GOTO (dict, out); + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, @@ -6018,11 +6052,17 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xattr_rsp = NULL; dict_t *xdata_rsp = NULL; struct iatt stbuf = {0}; + struct posix_private *priv = NULL; + VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (xattr, out); VALIDATE_OR_GOTO (this, out); + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out); + + if (fd) { op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); if (op_ret < 0) { @@ -6120,7 +6160,6 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this, return 0; } - int posix_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata) @@ -6944,6 +6983,11 @@ notify (xlator_t *this, pthread_cancel (priv->health_check); priv->health_check = 0; } + if (priv->disk_space_check) { + priv->disk_space_check_active = _gf_false; + pthread_cancel (priv->disk_space_check); + priv->disk_space_check = 0; + } if (priv->janitor) { (void) gf_thread_cleanup_xint (priv->janitor); priv->janitor = 0; @@ -7140,6 +7184,11 @@ reconfigure (xlator_t *this, dict_t *options) " fallback to :"); } + GF_OPTION_RECONF ("reserve", priv->disk_threshhold, + options, uint32, out); + if (priv->disk_threshhold) + posix_spawn_disk_space_check_thread (this); + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, options, uint32, out); posix_spawn_health_check_thread (this); @@ -7738,6 +7787,13 @@ init (xlator_t *this) " fallback to :"); } + _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; + GF_OPTION_INIT ("reserve", + _private->disk_threshhold, uint32, out); + if (_private->disk_threshhold) + posix_spawn_disk_space_check_thread (this); + _private->health_check_active = _gf_false; GF_OPTION_INIT ("health-check-interval", _private->health_check_interval, uint32, out); @@ -7940,6 +7996,16 @@ struct volume_options options[] = { .description = "Interval in seconds for a filesystem health check, " "set to 0 to disable" }, + { + .key = {"reserve"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "1", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Value in percentage in integer form required " + "to set reserve disk, " + "set to 0 to disable" + }, { .key = {"batch-fsync-mode"}, .type = GF_OPTION_TYPE_STR, .default_value = "reverse-fsync", diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index c2dcfda..21c7d36 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -63,6 +63,18 @@ #define GF_UNLINK_TRUE 0x0000000000000001 #define GF_UNLINK_FALSE 0x0000000000000000 +#define DISK_SPACE_CHECK_AND_GOTO(frame, priv, op_ret, op_errno, out) do { \ + if (frame->root->pid >= 0 && priv->disk_space_full) { \ + op_ret = -1; \ + op_errno = ENOSPC; \ + gf_msg_debug ("posix", ENOSPC, \ + "disk space utilization reached limits" \ + " for path %s ", priv->base_path); \ + goto out; \ + } \ + } while (0) + + /** * posix_fd - internal structure common to file and directory fd's */ @@ -197,6 +209,11 @@ struct posix_private { pthread_t health_check; gf_boolean_t health_check_active; + uint32_t disk_threshhold; + uint32_t disk_space_full; + pthread_t disk_space_check; + gf_boolean_t disk_space_check_active; + #ifdef GF_DARWIN_HOST_OS enum { XATTR_NONE = 0, @@ -304,6 +321,8 @@ __posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, off_t offset, size_t size); void posix_spawn_health_check_thread (xlator_t *this); +void posix_spawn_disk_space_check_thread (xlator_t *this); + void *posix_fsyncer (void *); int posix_get_ancestry (xlator_t *this, inode_t *leaf_inode, -- 1.8.3.1