From 3b1c45188c7260ae3dda4bcedd7cb81566f1f2ea Mon Sep 17 00:00:00 2001
From: Mohit Agrawal <moagrawa@redhat.com>
Date: Sat, 15 Jul 2017 17:55:14 +0530
Subject: [PATCH 092/128] posix: Needs to reserve disk space to prevent the
brick from getting full
Problem: Currently there is no option available at posix xlator to save the
disk from getting full
Solution: Introduce a new option storage.reserve at posix xlator to
configure disk threshold.posix xlator spawn a thread to update the
disk space status in posix private structure and same flag is checked
by every posix fop before start operation.If flag value is 1 then
it sets op_errno to ENOSPC and goto out from the fop.
> BUG: 1471366
> Change-Id: I98287cd409860f4c754fc69a332e0521bfb1b67e
> Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
> Reviewed-on: https://review.gluster.org/17780
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
BUG: 1464350
Change-Id: I98287cd409860f4c754fc69a332e0521bfb1b67e
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/124629
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 +
xlators/storage/posix/src/posix-aio.c | 1 +
xlators/storage/posix/src/posix-helpers.c | 115 ++++++++++++++++++++++++
xlators/storage/posix/src/posix-messages.h | 11 ++-
xlators/storage/posix/src/posix.c | 80 +++++++++++++++--
xlators/storage/posix/src/posix.h | 19 ++++
6 files changed, 222 insertions(+), 8 deletions(-)
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index b15a5af..a57eb9e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2805,6 +2805,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "storage/posix",
.op_version = GD_OP_VERSION_3_12_0,
},
+ { .key = "storage.reserve",
+ .voltype = "storage/posix",
+ .op_version = GD_OP_VERSION_3_13_0,
+ },
{ .key = "storage.bd-aio",
.voltype = "storage/bd",
.op_version = GD_OP_VERSION_RHS_3_0
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
index b5ac1b9..2adafeb 100644
--- a/xlators/storage/posix/src/posix-aio.c
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -330,6 +330,7 @@ posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (fd, err);
priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_errno, op_errno, err);
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index f97c90b..826441f 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1977,6 +1977,121 @@ unlock:
UNLOCK (&priv->lock);
}
+void
+posix_disk_space_check (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ char *subvol_path = NULL;
+ int op_ret = 0;
+ int percent = 0;
+ struct statvfs buf = {0};
+ uint64_t totsz = 0;
+ uint64_t freesz = 0;
+
+ GF_VALIDATE_OR_GOTO (this->name, this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO ("posix-helpers", priv, out);
+
+ subvol_path = priv->base_path;
+ percent = priv->disk_threshhold;
+
+ op_ret = sys_statvfs (subvol_path, &buf);
+
+ if (op_ret == -1) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+ "statvfs failed on %s", subvol_path);
+ goto out;
+ }
+ totsz = (buf.f_blocks * buf.f_bsize);
+ freesz = (buf.f_bfree * buf.f_bsize);
+
+ if (freesz <= ((totsz * percent) / 100)) {
+ priv->disk_space_full = 1;
+ } else {
+ priv->disk_space_full = 0;
+ }
+out:
+ return;
+}
+
+
+static void *
+posix_disk_space_check_thread_proc (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ uint32_t interval = 0;
+ int ret = -1;
+
+ this = data;
+ priv = this->private;
+
+ interval = 5;
+ gf_msg_debug (this->name, 0, "disk-space thread started, "
+ "interval = %d seconds", interval);
+ while (1) {
+ /* aborting sleep() is a request to exit this thread, sleep()
+ * will normally not return when cancelled */
+ ret = sleep (interval);
+ if (ret > 0)
+ break;
+ /* prevent thread errors while doing the health-check(s) */
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+
+ /* Do the disk-check.*/
+ posix_disk_space_check (this);
+ if (!priv->disk_space_check_active)
+ goto out;
+ pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+ }
+
+out:
+ gf_msg_debug (this->name, 0, "disk space check thread exiting");
+ LOCK (&priv->lock);
+ {
+ priv->disk_space_check_active = _gf_false;
+ }
+ UNLOCK (&priv->lock);
+
+
+ return NULL;
+}
+
+void
+posix_spawn_disk_space_check_thread (xlator_t *xl)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = xl->private;
+
+ LOCK (&priv->lock);
+ {
+ /* cancel the running thread */
+ if (priv->disk_space_check_active == _gf_true) {
+ pthread_cancel (priv->disk_space_check);
+ priv->disk_space_check_active = _gf_false;
+ }
+
+ ret = gf_thread_create (&priv->disk_space_check, NULL,
+ posix_disk_space_check_thread_proc,
+ xl, "posix_reserve");
+ if (ret < 0) {
+ priv->disk_space_check_active = _gf_false;
+ gf_msg (xl->name, GF_LOG_ERROR, errno,
+ P_MSG_DISK_SPACE_CHECK_FAILED,
+ "unable to setup disk space check thread");
+ goto unlock;
+ }
+
+ /* run the thread detached, resources will be freed on exit */
+ pthread_detach (priv->disk_space_check);
+ priv->disk_space_check_active = _gf_true;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+}
+
int
posix_fsyncer_pick (xlator_t *this, struct list_head *head)
{
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index ee06d6f..20cf1f0 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -45,7 +45,7 @@
*/
#define POSIX_COMP_BASE GLFS_MSGID_COMP_POSIX
-#define GLFS_NUM_MESSAGES 110
+#define GLFS_NUM_MESSAGES 111
#define GLFS_MSGID_END (POSIX_COMP_BASE + GLFS_NUM_MESSAGES + 1)
/* Messaged with message IDs */
#define glfs_msg_start_x POSIX_COMP_BASE, "Invalid: Start of messages"
@@ -955,6 +955,15 @@
*/
+#define P_MSG_DISK_SPACE_CHECK_FAILED (POSIX_COMP_BASE + 112)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index d858878..1cb0fef 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -766,6 +766,7 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct posix_fd *pfd = NULL;
gf_boolean_t locked = _gf_false;
posix_inode_ctx_t *ctx = NULL;
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -775,6 +776,9 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, ret, ret, out);
+
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd);
@@ -1073,20 +1077,27 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
int32_t ret = 0;
struct iatt statpre = {0,};
struct iatt statpost = {0,};
+ struct posix_private *priv = NULL;
+ int op_ret = -1;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
ret = posix_do_zerofill (frame, this, fd, offset, len,
&statpre, &statpost, xdata);
- if (ret < 0) {
- goto err;
- }
+ if (ret < 0)
+ goto out;
STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
return 0;
-err:
- STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL);
+out:
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL);
return 0;
-
}
static int32_t
@@ -1354,6 +1365,7 @@ posix_mknod (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
@@ -1574,6 +1586,7 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
if (!real_path || !par_path) {
@@ -2400,6 +2413,7 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
@@ -2558,6 +2572,7 @@ posix_rename (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL);
@@ -2840,6 +2855,7 @@ posix_link (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf);
@@ -3049,6 +3065,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno,
out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
@@ -3236,6 +3253,9 @@ posix_open (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
+ if (flags & O_CREAT)
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
if (!real_path) {
op_ret = -1;
@@ -3559,6 +3579,7 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
@@ -3698,6 +3719,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
struct statvfs buf = {0, };
struct posix_private * priv = NULL;
int shared_by = 1;
+ int percent = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -3722,6 +3744,9 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
goto out;
}
+ percent = priv->disk_threshhold;
+ buf.f_bfree = (buf.f_bfree - ((buf.f_blocks * percent) / 100));
+
shared_by = priv->shared_brick_count;
if (shared_by > 1) {
buf.f_blocks /= shared_by;
@@ -3879,6 +3904,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
#endif
priv = this->private;
+
if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
posix_batch_fsync (frame, this, fd, datasync, xdata);
return 0;
@@ -3983,6 +4009,7 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
ssize_t acl_size = 0;
dict_t *xattr = NULL;
posix_xattr_filler_t filler = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -3992,6 +4019,9 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
VALIDATE_OR_GOTO (dict, out);
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
MAKE_INODE_HANDLE (real_path, this, loc, NULL);
if (!real_path) {
op_ret = -1;
@@ -5346,6 +5376,7 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
struct iatt stbuf = {0,};
dict_t *xattr = NULL;
posix_xattr_filler_t filler = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -5355,6 +5386,9 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd, out);
VALIDATE_OR_GOTO (dict, out);
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (ret < 0) {
gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
@@ -6018,11 +6052,17 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xattr_rsp = NULL;
dict_t *xdata_rsp = NULL;
struct iatt stbuf = {0};
+ struct posix_private *priv = NULL;
+
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (xattr, out);
VALIDATE_OR_GOTO (this, out);
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO (frame, priv, op_ret, op_errno, out);
+
+
if (fd) {
op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno);
if (op_ret < 0) {
@@ -6120,7 +6160,6 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this,
return 0;
}
-
int
posix_access (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t mask, dict_t *xdata)
@@ -6944,6 +6983,11 @@ notify (xlator_t *this,
pthread_cancel (priv->health_check);
priv->health_check = 0;
}
+ if (priv->disk_space_check) {
+ priv->disk_space_check_active = _gf_false;
+ pthread_cancel (priv->disk_space_check);
+ priv->disk_space_check = 0;
+ }
if (priv->janitor) {
(void) gf_thread_cleanup_xint (priv->janitor);
priv->janitor = 0;
@@ -7140,6 +7184,11 @@ reconfigure (xlator_t *this, dict_t *options)
" fallback to <hostname>:<export>");
}
+ GF_OPTION_RECONF ("reserve", priv->disk_threshhold,
+ options, uint32, out);
+ if (priv->disk_threshhold)
+ posix_spawn_disk_space_check_thread (this);
+
GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
options, uint32, out);
posix_spawn_health_check_thread (this);
@@ -7738,6 +7787,13 @@ init (xlator_t *this)
" fallback to <hostname>:<export>");
}
+ _private->disk_space_check_active = _gf_false;
+ _private->disk_space_full = 0;
+ GF_OPTION_INIT ("reserve",
+ _private->disk_threshhold, uint32, out);
+ if (_private->disk_threshhold)
+ posix_spawn_disk_space_check_thread (this);
+
_private->health_check_active = _gf_false;
GF_OPTION_INIT ("health-check-interval",
_private->health_check_interval, uint32, out);
@@ -7940,6 +7996,16 @@ struct volume_options options[] = {
.description = "Interval in seconds for a filesystem health check, "
"set to 0 to disable"
},
+ {
+ .key = {"reserve"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "1",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Value in percentage in integer form required "
+ "to set reserve disk, "
+ "set to 0 to disable"
+ },
{ .key = {"batch-fsync-mode"},
.type = GF_OPTION_TYPE_STR,
.default_value = "reverse-fsync",
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index c2dcfda..21c7d36 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -63,6 +63,18 @@
#define GF_UNLINK_TRUE 0x0000000000000001
#define GF_UNLINK_FALSE 0x0000000000000000
+#define DISK_SPACE_CHECK_AND_GOTO(frame, priv, op_ret, op_errno, out) do { \
+ if (frame->root->pid >= 0 && priv->disk_space_full) { \
+ op_ret = -1; \
+ op_errno = ENOSPC; \
+ gf_msg_debug ("posix", ENOSPC, \
+ "disk space utilization reached limits" \
+ " for path %s ", priv->base_path); \
+ goto out; \
+ } \
+ } while (0)
+
+
/**
* posix_fd - internal structure common to file and directory fd's
*/
@@ -197,6 +209,11 @@ struct posix_private {
pthread_t health_check;
gf_boolean_t health_check_active;
+ uint32_t disk_threshhold;
+ uint32_t disk_space_full;
+ pthread_t disk_space_check;
+ gf_boolean_t disk_space_check_active;
+
#ifdef GF_DARWIN_HOST_OS
enum {
XATTR_NONE = 0,
@@ -304,6 +321,8 @@ __posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
off_t offset, size_t size);
void posix_spawn_health_check_thread (xlator_t *this);
+void posix_spawn_disk_space_check_thread (xlator_t *this);
+
void *posix_fsyncer (void *);
int
posix_get_ancestry (xlator_t *this, inode_t *leaf_inode,
--
1.8.3.1