|
|
21ab4e |
From 9eee0cea926bf4a953972fc6ed37a2c925c9c748 Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
Date: Mon, 17 Apr 2017 13:00:54 +0530
|
|
|
21ab4e |
Subject: [PATCH 395/406] cluster/dht: Skip file migration if the subvol that
|
|
|
21ab4e |
meets min-free-disk criteria happens to be the same subvol containing
|
|
|
21ab4e |
data-file
|
|
|
21ab4e |
|
|
|
21ab4e |
Rebalance need to figure out a new subvol in case the hashed subvol
|
|
|
21ab4e |
does not have enough space. In the process of figuring out the new subvol,
|
|
|
21ab4e |
we need to ignore the source subvol, otherwise it will lead to data loss.
|
|
|
21ab4e |
|
|
|
21ab4e |
Test: Manual
|
|
|
21ab4e |
Ran the following
|
|
|
21ab4e |
sizeof /tmp/1: 1.5GB
|
|
|
21ab4e |
sizeof /brick/1: 16GB
|
|
|
21ab4e |
sizeof /tmp/2: 1.5GB
|
|
|
21ab4e |
<start>
|
|
|
21ab4e |
|
|
|
21ab4e |
glusterd; gluster v create test1 vm1:/brick/1 vm1:/tmp/1;
|
|
|
21ab4e |
gluster v start test1;
|
|
|
21ab4e |
mount -t glusterfs vm1:test1 /mnt;
|
|
|
21ab4e |
for i in {1..2000}
|
|
|
21ab4e |
do
|
|
|
21ab4e |
dd if=/dev/zero of=/mnt/file$i bs=1KB count=1 &> /dev/null;
|
|
|
21ab4e |
done
|
|
|
21ab4e |
gluster v add-brick test1 vm1:/tmp/2
|
|
|
21ab4e |
gluster v set test1 min-free-disk 12GB
|
|
|
21ab4e |
gluster v remove-brick test1 vm1:/tmp/1 star
|
|
|
21ab4e |
<end>
|
|
|
21ab4e |
|
|
|
21ab4e |
file count and data were intact.
|
|
|
21ab4e |
|
|
|
21ab4e |
> Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
> Reviewed-on: https://review.gluster.org/17064
|
|
|
21ab4e |
> Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
21ab4e |
> Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
|
|
|
21ab4e |
Change-Id: Ib8fc8467a3d48a7c12958824c4f0b88e160b86c1
|
|
|
21ab4e |
BUG: 1360317
|
|
|
21ab4e |
Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/103915
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-common.h | 2 +-
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-diskusage.c | 19 ++++---
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-rebalance.c | 96 ++++++++++++++++++++++++++-------
|
|
|
21ab4e |
3 files changed, 92 insertions(+), 25 deletions(-)
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
index 37a6e61..eb6d1e8 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
@@ -1114,7 +1114,7 @@ dht_dir_has_layout (dict_t *xattr, char *name);
|
|
|
21ab4e |
gf_boolean_t
|
|
|
21ab4e |
dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
|
|
|
21ab4e |
xlator_t *
|
|
|
21ab4e |
-dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
+dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol, xlator_t *ignore,
|
|
|
21ab4e |
dht_layout_t *layout, uint64_t filesize);
|
|
|
21ab4e |
xlator_t *
|
|
|
21ab4e |
dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
|
|
|
21ab4e |
index 13698a9..0559215 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-diskusage.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-diskusage.c
|
|
|
21ab4e |
@@ -315,7 +315,7 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
|
|
|
21ab4e |
LOCK (&conf->subvolume_lock);
|
|
|
21ab4e |
{
|
|
|
21ab4e |
- avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
|
|
|
21ab4e |
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, NULL,
|
|
|
21ab4e |
layout, 0);
|
|
|
21ab4e |
if(!avail_subvol)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
@@ -340,8 +340,8 @@ out:
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
static inline
|
|
|
21ab4e |
-int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this,
|
|
|
21ab4e |
- dht_layout_t *layout)
|
|
|
21ab4e |
+int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this, xlator_t *ignore,
|
|
|
21ab4e |
+ dht_layout_t *layout)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
int ret = -1;
|
|
|
21ab4e |
int i = 0;
|
|
|
21ab4e |
@@ -349,6 +349,13 @@ int32_t dht_subvol_has_err (dht_conf_t *conf, xlator_t *this,
|
|
|
21ab4e |
if (!this || !layout)
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
|
|
|
21ab4e |
+ /* this check is meant for rebalance process. The source of the file
|
|
|
21ab4e |
+ * should be ignored for space check */
|
|
|
21ab4e |
+ if (this == ignore) {
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
/* check if subvol has layout errors, before selecting it */
|
|
|
21ab4e |
for (i = 0; i < layout->cnt; i++) {
|
|
|
21ab4e |
if (!strcmp (layout->list[i].xlator->name, this->name) &&
|
|
|
21ab4e |
@@ -376,7 +383,7 @@ out:
|
|
|
21ab4e |
|
|
|
21ab4e |
/*Get subvolume which has both space and inodes more than the min criteria*/
|
|
|
21ab4e |
xlator_t *
|
|
|
21ab4e |
-dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol, xlator_t *ignore,
|
|
|
21ab4e |
dht_layout_t *layout, uint64_t filesize)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
int i = 0;
|
|
|
21ab4e |
@@ -398,7 +405,7 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
/* check if subvol has layout errors and also it is not a
|
|
|
21ab4e |
* decommissioned brick, before selecting it */
|
|
|
21ab4e |
ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i],
|
|
|
21ab4e |
- layout);
|
|
|
21ab4e |
+ ignore, layout);
|
|
|
21ab4e |
if (ignore_subvol)
|
|
|
21ab4e |
continue;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -463,7 +470,7 @@ dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
/* check if subvol has layout errors and also it is not a
|
|
|
21ab4e |
* decommissioned brick, before selecting it*/
|
|
|
21ab4e |
|
|
|
21ab4e |
- ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i],
|
|
|
21ab4e |
+ ignore_subvol = dht_subvol_has_err (conf, conf->subvolumes[i], NULL,
|
|
|
21ab4e |
layout);
|
|
|
21ab4e |
if (ignore_subvol)
|
|
|
21ab4e |
continue;
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
index 9465cde..49b2230 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
@@ -719,27 +719,30 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
|
|
|
21ab4e |
loc->path, to->name, strerror (-ret));
|
|
|
21ab4e |
*/
|
|
|
21ab4e |
|
|
|
21ab4e |
+ ret = syncop_fsetattr (to, fd, stbuf,
|
|
|
21ab4e |
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
|
|
|
21ab4e |
+ NULL, NULL, NULL, NULL);
|
|
|
21ab4e |
+ if (ret < 0)
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
+ DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
+ "chown failed for %s on %s (%s)",
|
|
|
21ab4e |
+ loc->path, to->name, strerror (-ret));
|
|
|
21ab4e |
+
|
|
|
21ab4e |
/* Fallocate does not work for size 0, hence the check. Anyway we don't
|
|
|
21ab4e |
* need to care about min-free-disk for 0 byte size file */
|
|
|
21ab4e |
if (stbuf->ia_size > 0) {
|
|
|
21ab4e |
ret = syncop_fallocate (to, fd, 0, 0, stbuf->ia_size, NULL,
|
|
|
21ab4e |
NULL);
|
|
|
21ab4e |
- if (ret < 0)
|
|
|
21ab4e |
+ if (ret < 0) {
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
"fallocate failed for %s on %s (%s)",
|
|
|
21ab4e |
loc->path, to->name, strerror (-ret));
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
- ret = syncop_fsetattr (to, fd, stbuf,
|
|
|
21ab4e |
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
|
|
|
21ab4e |
- NULL, NULL, NULL, NULL);
|
|
|
21ab4e |
- if (ret < 0)
|
|
|
21ab4e |
- gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
- DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
- "chown failed for %s on %s (%s)",
|
|
|
21ab4e |
- loc->path, to->name, strerror (-ret));
|
|
|
21ab4e |
-
|
|
|
21ab4e |
/* success */
|
|
|
21ab4e |
ret = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -761,7 +764,8 @@ out:
|
|
|
21ab4e |
static int
|
|
|
21ab4e |
__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
|
|
|
21ab4e |
struct iatt *stbuf, int flag, dht_conf_t *conf,
|
|
|
21ab4e |
- gf_boolean_t *target_changed, xlator_t **new_subvol)
|
|
|
21ab4e |
+ gf_boolean_t *target_changed, xlator_t **new_subvol,
|
|
|
21ab4e |
+ gf_boolean_t *ignore_failure)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
struct statvfs src_statfs = {0,};
|
|
|
21ab4e |
struct statvfs dst_statfs = {0,};
|
|
|
21ab4e |
@@ -773,6 +777,7 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
|
|
|
21ab4e |
uint64_t dst_statfs_blocks = 1;
|
|
|
21ab4e |
double post_availspace = 0;
|
|
|
21ab4e |
double post_percent = 0;
|
|
|
21ab4e |
+ int i = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
this = THIS;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -897,13 +902,27 @@ find_new_subvol:
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
- *new_subvol = dht_subvol_with_free_space_inodes (this, to,
|
|
|
21ab4e |
- layout, stbuf->ia_size);
|
|
|
21ab4e |
- if (!(*new_subvol)) {
|
|
|
21ab4e |
+ *new_subvol = dht_subvol_with_free_space_inodes (this, to, from, layout,
|
|
|
21ab4e |
+ stbuf->ia_size);
|
|
|
21ab4e |
+ if ((!(*new_subvol)) || (*new_subvol == from)) {
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_WARNING, 0,
|
|
|
21ab4e |
DHT_MSG_SUBVOL_INSUFF_SPACE, "Could not find any subvol"
|
|
|
21ab4e |
- " with space accomodating the file. Consider adding "
|
|
|
21ab4e |
- "bricks");
|
|
|
21ab4e |
+ " with space accomodating the file - %s. Consider adding "
|
|
|
21ab4e |
+ "bricks", loc->path);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* For remove-brick case if the source is not one of the
|
|
|
21ab4e |
+ * removed-brick, do not mark the error as failure */
|
|
|
21ab4e |
+ if (conf->decommission_subvols_cnt) {
|
|
|
21ab4e |
+ *ignore_failure = _gf_true;
|
|
|
21ab4e |
+ for (i = 0; i < conf->decommission_subvols_cnt; i++) {
|
|
|
21ab4e |
+ if (conf->decommissioned_bricks[i] == from) {
|
|
|
21ab4e |
+ *ignore_failure = _gf_false;
|
|
|
21ab4e |
+ break;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ } else {
|
|
|
21ab4e |
+ *ignore_failure = _gf_false;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
|
|
|
21ab4e |
*target_changed = _gf_false;
|
|
|
21ab4e |
ret = -1;
|
|
|
21ab4e |
@@ -1382,6 +1401,8 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
gf_boolean_t target_changed = _gf_false;
|
|
|
21ab4e |
xlator_t *new_target = NULL;
|
|
|
21ab4e |
xlator_t *old_target = NULL;
|
|
|
21ab4e |
+ fd_t *linkto_fd = NULL;
|
|
|
21ab4e |
+ gf_boolean_t ignore_failure = _gf_false;
|
|
|
21ab4e |
|
|
|
21ab4e |
defrag = conf->defrag;
|
|
|
21ab4e |
if (!defrag)
|
|
|
21ab4e |
@@ -1499,7 +1520,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
clean_dst = _gf_true;
|
|
|
21ab4e |
|
|
|
21ab4e |
ret = __dht_check_free_space (to, from, loc, &stbuf, flag, conf,
|
|
|
21ab4e |
- &target_changed, &new_target);
|
|
|
21ab4e |
+ &target_changed, &new_target, &ignore_failure);
|
|
|
21ab4e |
if (target_changed) {
|
|
|
21ab4e |
/* Can't handle for hardlinks. Marking this as failure */
|
|
|
21ab4e |
if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) {
|
|
|
21ab4e |
@@ -1543,6 +1564,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
+ if (ignore_failure)
|
|
|
21ab4e |
+ ret = 0;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -1792,13 +1816,47 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
ret = syncop_setxattr (old_target, loc, dict, 0, NULL, NULL);
|
|
|
21ab4e |
- if (ret) {
|
|
|
21ab4e |
+ if (ret && -ret != ESTALE && -ret != ENOENT) {
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
"failed to set xattr on %s in %s (%s)",
|
|
|
21ab4e |
loc->path, old_target->name, strerror (-ret));
|
|
|
21ab4e |
ret = -1;
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
+ } else if (-ret == ESTALE || -ret == ENOENT) {
|
|
|
21ab4e |
+ /* The failure ESTALE indicates that the linkto
|
|
|
21ab4e |
+ * file on the hashed subvol might have been deleted.
|
|
|
21ab4e |
+ * In this case will create a linkto file with new target
|
|
|
21ab4e |
+ * as linkto xattr value*/
|
|
|
21ab4e |
+ linkto_fd = fd_create (loc->inode, DHT_REBALANCE_PID);
|
|
|
21ab4e |
+ if (!linkto_fd) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
+ DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
+ "%s: fd create failed (%s)",
|
|
|
21ab4e |
+ loc->path, strerror (errno));
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ ret = syncop_create (old_target, loc, O_RDWR,
|
|
|
21ab4e |
+ DHT_LINKFILE_MODE, linkto_fd,
|
|
|
21ab4e |
+ NULL, dict, NULL);
|
|
|
21ab4e |
+ if (ret != 0 && -ret != EEXIST && -ret != ESTALE) {
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
+ DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
+ "failed to create linkto file on %s in %s (%s)",
|
|
|
21ab4e |
+ loc->path, old_target->name, strerror (-ret));
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ } else if (ret == 0) {
|
|
|
21ab4e |
+ ret = syncop_fsetattr (old_target, linkto_fd, &stbuf,
|
|
|
21ab4e |
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
|
|
|
21ab4e |
+ NULL, NULL, NULL, NULL);
|
|
|
21ab4e |
+ if (ret < 0)
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
+ DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
+ "chown failed for %s on %s (%s)",
|
|
|
21ab4e |
+ loc->path, old_target->name, strerror (-ret));
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
@@ -2044,6 +2102,8 @@ out:
|
|
|
21ab4e |
syncop_close (dst_fd);
|
|
|
21ab4e |
if (src_fd)
|
|
|
21ab4e |
syncop_close (src_fd);
|
|
|
21ab4e |
+ if (linkto_fd)
|
|
|
21ab4e |
+ syncop_close (linkto_fd);
|
|
|
21ab4e |
|
|
|
21ab4e |
loc_wipe (&tmp_loc);
|
|
|
21ab4e |
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|