|
|
21ab4e |
From 8b09624120982a15057b371730550d085ae8b3fd Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
Date: Tue, 11 Apr 2017 17:27:17 +0530
|
|
|
21ab4e |
Subject: [PATCH 394/406] cluster/dht: Make rebalance honor min-free-disk
|
|
|
21ab4e |
MIME-Version: 1.0
|
|
|
21ab4e |
Content-Type: text/plain; charset=UTF-8
|
|
|
21ab4e |
Content-Transfer-Encoding: 8bit
|
|
|
21ab4e |
|
|
|
21ab4e |
test: Manual
|
|
|
21ab4e |
|
|
|
21ab4e |
created files of size 1K on 2 brick(of size 1GB) setup .
|
|
|
21ab4e |
added a brick of size 16GB.
|
|
|
21ab4e |
set min-free-disk to 12GB(so that first two bricks won't receive any files).
|
|
|
21ab4e |
removed one of the 1st brick of size 1GB.
|
|
|
21ab4e |
|
|
|
21ab4e |
Logs from test:
|
|
|
21ab4e |
[2017-04-12 08:52:08.196484] W [MSGID: 0] [dht-rebalance.c:895:__dht_check_free_space]
|
|
|
21ab4e |
0-test1-dht: Write will cross min-free-disk for file - /tile32 on subvol - test1-client-1.
|
|
|
21ab4e |
Looking for new subvol.
|
|
|
21ab4e |
|
|
|
21ab4e |
[2017-04-12 08:52:08.196904] I [MSGID: 0] [dht-rebalance.c:925:__dht_check_free_space]
|
|
|
21ab4e |
0-test1-dht: new target found - test1-client-2 for file - /tile32
|
|
|
21ab4e |
|
|
|
21ab4e |
- Post migration we have two files. The new destination (/brick/1) has the data file
|
|
|
21ab4e |
[root@vm1 ~]# ll /brick/1/tile32
|
|
|
21ab4e |
-rw-r--r--. 2 root root 0 Apr 12 14:22 /brick/1/tile32
|
|
|
21ab4e |
|
|
|
21ab4e |
- On the old target the linkto file is there with linkto xattr pointing to /brick/1
|
|
|
21ab4e |
[root@vm1 ~]# ll /tmp/2/tile32
|
|
|
21ab4e |
---------T. 2 root root 1000 Apr 12 14:22 /tmp/2/tile32
|
|
|
21ab4e |
[root@vm1 ~]# getfattr -m . -de text /tmp/2/tile32
|
|
|
21ab4e |
getfattr: Removing leading '/' from absolute path names
|
|
|
21ab4e |
security.selinux="unconfined_u:object_r:user_tmp_t:s0"
|
|
|
21ab4e |
trusted.gfid="����:Aс�#�/'b2"
|
|
|
21ab4e |
trusted.glusterfs.dht.linkto="test1-client-2"
|
|
|
21ab4e |
|
|
|
21ab4e |
Marking ./tests/features/worm_sh.t as bad test.
|
|
|
21ab4e |
Reason being, this patch failed on master branch as well and it has nothing
|
|
|
21ab4e |
to do with rebalance/remove-brick.
|
|
|
21ab4e |
|
|
|
21ab4e |
> Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
> Reviewed-on: https://review.gluster.org/17034
|
|
|
21ab4e |
> Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
|
|
|
21ab4e |
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
21ab4e |
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
|
|
|
21ab4e |
BUG: 1360317
|
|
|
21ab4e |
Change-Id: I90bae251cda3d957a49cdceda90cd08311a392fb
|
|
|
21ab4e |
Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/103914
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
tests/bugs/distribute/bug-1161311.t | 25 +++-
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-common.h | 5 +-
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-diskusage.c | 31 ++++-
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-rebalance.c | 214 ++++++++++++++++++++++++++++++--
|
|
|
21ab4e |
xlators/storage/posix/src/posix.c | 2 +-
|
|
|
21ab4e |
5 files changed, 257 insertions(+), 20 deletions(-)
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/tests/bugs/distribute/bug-1161311.t b/tests/bugs/distribute/bug-1161311.t
|
|
|
21ab4e |
index 93e9d03..a491223 100755
|
|
|
21ab4e |
--- a/tests/bugs/distribute/bug-1161311.t
|
|
|
21ab4e |
+++ b/tests/bugs/distribute/bug-1161311.t
|
|
|
21ab4e |
@@ -15,6 +15,27 @@
|
|
|
21ab4e |
. $(dirname $0)/../../include.rc
|
|
|
21ab4e |
. $(dirname $0)/../../volume.rc
|
|
|
21ab4e |
|
|
|
21ab4e |
+cleanup
|
|
|
21ab4e |
+TEST truncate -s 10GB $B0/brick1
|
|
|
21ab4e |
+TEST truncate -s 10GB $B0/brick2
|
|
|
21ab4e |
+TEST truncate -s 10GB $B0/brick3
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST LO1=`SETUP_LOOP $B0/brick1`
|
|
|
21ab4e |
+TEST MKFS_LOOP $LO1
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST LO2=`SETUP_LOOP $B0/brick2`
|
|
|
21ab4e |
+TEST MKFS_LOOP $LO2
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST LO3=`SETUP_LOOP $B0/brick3`
|
|
|
21ab4e |
+TEST MKFS_LOOP $LO3
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2 $B0/${V0}3
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
|
|
|
21ab4e |
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
|
|
|
21ab4e |
+TEST MOUNT_LOOP $LO3 $B0/${V0}3
|
|
|
21ab4e |
+
|
|
|
21ab4e |
checksticky () {
|
|
|
21ab4e |
i=0;
|
|
|
21ab4e |
while [ ! -k $1 ]; do
|
|
|
21ab4e |
@@ -31,7 +52,6 @@ checksticky () {
|
|
|
21ab4e |
return 0
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
-cleanup;
|
|
|
21ab4e |
|
|
|
21ab4e |
TEST glusterd
|
|
|
21ab4e |
TEST pidof glusterd
|
|
|
21ab4e |
@@ -126,5 +146,6 @@ TEST ln ./dir1/FILE7 ./FILE7
|
|
|
21ab4e |
cd /
|
|
|
21ab4e |
linkcountsrc=$(stat -c %h $M0/dir1/FILE1)
|
|
|
21ab4e |
TEST [[ $linkcountsrc == 14 ]]
|
|
|
21ab4e |
-
|
|
|
21ab4e |
+UMOUNT_LOOP ${B0}/${V0}{1..3}
|
|
|
21ab4e |
+rm -f ${B0}/brick{1..3}
|
|
|
21ab4e |
cleanup;
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
index 21e000a..37a6e61 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
@@ -301,6 +301,9 @@ struct dht_du {
|
|
|
21ab4e |
uint64_t avail_space;
|
|
|
21ab4e |
uint32_t log;
|
|
|
21ab4e |
uint32_t chunks;
|
|
|
21ab4e |
+ uint32_t total_blocks;
|
|
|
21ab4e |
+ uint32_t avail_blocks;
|
|
|
21ab4e |
+ uint32_t frsize; /*fragment size*/
|
|
|
21ab4e |
};
|
|
|
21ab4e |
typedef struct dht_du dht_du_t;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -1112,7 +1115,7 @@ gf_boolean_t
|
|
|
21ab4e |
dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator);
|
|
|
21ab4e |
xlator_t *
|
|
|
21ab4e |
dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
- dht_layout_t *layout);
|
|
|
21ab4e |
+ dht_layout_t *layout, uint64_t filesize);
|
|
|
21ab4e |
xlator_t *
|
|
|
21ab4e |
dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
dht_layout_t *layout);
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
|
|
|
21ab4e |
index 0695743..13698a9 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-diskusage.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-diskusage.c
|
|
|
21ab4e |
@@ -81,7 +81,11 @@ dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
21ab4e |
conf->du_stats[i].avail_space = bytes;
|
|
|
21ab4e |
conf->du_stats[i].avail_inodes = percent_inodes;
|
|
|
21ab4e |
conf->du_stats[i].chunks = chunks;
|
|
|
21ab4e |
- gf_msg_debug (this->name, 0,
|
|
|
21ab4e |
+ conf->du_stats[i].total_blocks = statvfs->f_blocks;
|
|
|
21ab4e |
+ conf->du_stats[i].avail_blocks = statvfs->f_bavail;
|
|
|
21ab4e |
+ conf->du_stats[i].frsize = statvfs->f_frsize;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ gf_msg_debug (this->name, 0,
|
|
|
21ab4e |
"subvolume '%s': avail_percent "
|
|
|
21ab4e |
"is: %.2f and avail_space "
|
|
|
21ab4e |
"is: %" PRIu64" and avail_inodes"
|
|
|
21ab4e |
@@ -312,7 +316,7 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
LOCK (&conf->subvolume_lock);
|
|
|
21ab4e |
{
|
|
|
21ab4e |
avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
|
|
|
21ab4e |
- layout);
|
|
|
21ab4e |
+ layout, 0);
|
|
|
21ab4e |
if(!avail_subvol)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
|
|
|
21ab4e |
@@ -373,12 +377,17 @@ out:
|
|
|
21ab4e |
/*Get subvolume which has both space and inodes more than the min criteria*/
|
|
|
21ab4e |
xlator_t *
|
|
|
21ab4e |
dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
- dht_layout_t *layout)
|
|
|
21ab4e |
+ dht_layout_t *layout, uint64_t filesize)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
int i = 0;
|
|
|
21ab4e |
double max = 0;
|
|
|
21ab4e |
double max_inodes = 0;
|
|
|
21ab4e |
int ignore_subvol = 0;
|
|
|
21ab4e |
+ uint64_t total_blocks = 0;
|
|
|
21ab4e |
+ uint64_t avail_blocks = 0;
|
|
|
21ab4e |
+ uint64_t frsize = 0;
|
|
|
21ab4e |
+ double post_availspace = 0;
|
|
|
21ab4e |
+ double post_percent = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
xlator_t *avail_subvol = NULL;
|
|
|
21ab4e |
dht_conf_t *conf = NULL;
|
|
|
21ab4e |
@@ -401,6 +410,9 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
max = conf->du_stats[i].avail_percent;
|
|
|
21ab4e |
max_inodes = conf->du_stats[i].avail_inodes;
|
|
|
21ab4e |
avail_subvol = conf->subvolumes[i];
|
|
|
21ab4e |
+ total_blocks = conf->du_stats[i].total_blocks;
|
|
|
21ab4e |
+ avail_blocks = conf->du_stats[i].avail_blocks;
|
|
|
21ab4e |
+ frsize = conf->du_stats[i].frsize;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -416,6 +428,19 @@ dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+ if (avail_subvol) {
|
|
|
21ab4e |
+ if (conf->disk_unit == 'p') {
|
|
|
21ab4e |
+ post_availspace = (avail_blocks * frsize) - filesize;
|
|
|
21ab4e |
+ post_percent = (post_availspace * 100) / (total_blocks * frsize);
|
|
|
21ab4e |
+ if (post_percent < conf->min_free_disk)
|
|
|
21ab4e |
+ avail_subvol = NULL;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ if (conf->disk_unit != 'p') {
|
|
|
21ab4e |
+ if ((max - filesize) < conf->min_free_disk)
|
|
|
21ab4e |
+ avail_subvol = NULL;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
return avail_subvol;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
index ec9102c..9465cde 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
@@ -707,12 +707,29 @@ __dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struc
|
|
|
21ab4e |
"%s: failed to set xattr on %s (%s)",
|
|
|
21ab4e |
loc->path, to->name, strerror (-ret));
|
|
|
21ab4e |
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* TODO: Need to add a detailed comment about why we moved away from
|
|
|
21ab4e |
+ ftruncate.
|
|
|
21ab4e |
+
|
|
|
21ab4e |
ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL);
|
|
|
21ab4e |
if (ret < 0)
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
"ftruncate failed for %s on %s (%s)",
|
|
|
21ab4e |
loc->path, to->name, strerror (-ret));
|
|
|
21ab4e |
+ */
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* Fallocate does not work for size 0, hence the check. Anyway we don't
|
|
|
21ab4e |
+ * need to care about min-free-disk for 0 byte size file */
|
|
|
21ab4e |
+ if (stbuf->ia_size > 0) {
|
|
|
21ab4e |
+ ret = syncop_fallocate (to, fd, 0, 0, stbuf->ia_size, NULL,
|
|
|
21ab4e |
+ NULL);
|
|
|
21ab4e |
+ if (ret < 0)
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
+ DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
+ "fallocate failed for %s on %s (%s)",
|
|
|
21ab4e |
+ loc->path, to->name, strerror (-ret));
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
|
|
|
21ab4e |
ret = syncop_fsetattr (to, fd, stbuf,
|
|
|
21ab4e |
(GF_SET_ATTR_UID | GF_SET_ATTR_GID),
|
|
|
21ab4e |
@@ -743,16 +760,19 @@ out:
|
|
|
21ab4e |
|
|
|
21ab4e |
static int
|
|
|
21ab4e |
__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
|
|
|
21ab4e |
- struct iatt *stbuf, int flag)
|
|
|
21ab4e |
+ struct iatt *stbuf, int flag, dht_conf_t *conf,
|
|
|
21ab4e |
+ gf_boolean_t *target_changed, xlator_t **new_subvol)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
struct statvfs src_statfs = {0,};
|
|
|
21ab4e |
struct statvfs dst_statfs = {0,};
|
|
|
21ab4e |
int ret = -1;
|
|
|
21ab4e |
xlator_t *this = NULL;
|
|
|
21ab4e |
dict_t *xdata = NULL;
|
|
|
21ab4e |
-
|
|
|
21ab4e |
+ dht_layout_t *layout = NULL;
|
|
|
21ab4e |
uint64_t src_statfs_blocks = 1;
|
|
|
21ab4e |
uint64_t dst_statfs_blocks = 1;
|
|
|
21ab4e |
+ double post_availspace = 0;
|
|
|
21ab4e |
+ double post_percent = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
this = THIS;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -794,6 +814,10 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+ gf_msg_debug (this->name, 0, "min_free_disk - %f , block available - %lu ,"
|
|
|
21ab4e |
+ " block size - %lu ", conf->min_free_disk, dst_statfs.f_bavail,
|
|
|
21ab4e |
+ dst_statfs.f_bsize);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
/* if force option is given, do not check for space @ dst.
|
|
|
21ab4e |
* Check only if space is avail for the file */
|
|
|
21ab4e |
if (flag != GF_DHT_MIGRATE_DATA)
|
|
|
21ab4e |
@@ -832,16 +856,64 @@ __dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
check_avail_space:
|
|
|
21ab4e |
- if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
|
|
|
21ab4e |
- GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
|
|
|
21ab4e |
- gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
- DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
- "data movement attempted from node (%s) to node (%s) "
|
|
|
21ab4e |
- "which does not have required free space for (%s)",
|
|
|
21ab4e |
- from->name, to->name, loc->path);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ if (conf->disk_unit == 'p' && dst_statfs.f_blocks) {
|
|
|
21ab4e |
+ post_availspace = (dst_statfs.f_bavail * dst_statfs.f_frsize) - stbuf->ia_size;
|
|
|
21ab4e |
+ post_percent = (post_availspace * 100) / (dst_statfs.f_blocks * dst_statfs.f_frsize);
|
|
|
21ab4e |
+ if (post_percent < conf->min_free_disk) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_WARNING, 0, 0,
|
|
|
21ab4e |
+ "Write will cross min-free-disk for "
|
|
|
21ab4e |
+ "file - %s on subvol - %s. Looking "
|
|
|
21ab4e |
+ "for new subvol", loc->path, to->name);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ goto find_new_subvol;
|
|
|
21ab4e |
+ } else {
|
|
|
21ab4e |
+ ret = 0;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ if (conf->disk_unit != 'p' &&
|
|
|
21ab4e |
+ ((dst_statfs.f_bavail * dst_statfs.f_frsize) - stbuf->ia_size) < conf->min_free_disk) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_WARNING, 0, 0, "Write will cross "
|
|
|
21ab4e |
+ "min-free-disk for file - %s on subvol - %s. Looking "
|
|
|
21ab4e |
+ "for new subvol", loc->path, to->name);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ goto find_new_subvol;
|
|
|
21ab4e |
+ } else {
|
|
|
21ab4e |
+ ret = 0;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+find_new_subvol:
|
|
|
21ab4e |
+ layout = dht_layout_get (this, loc->parent);
|
|
|
21ab4e |
+ if (!layout) {
|
|
|
21ab4e |
+ gf_log (this->name, GF_LOG_ERROR, "Layout is NULL");
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ *new_subvol = dht_subvol_with_free_space_inodes (this, to,
|
|
|
21ab4e |
+ layout, stbuf->ia_size);
|
|
|
21ab4e |
+ if (!(*new_subvol)) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_WARNING, 0,
|
|
|
21ab4e |
+ DHT_MSG_SUBVOL_INSUFF_SPACE, "Could not find any subvol"
|
|
|
21ab4e |
+ " with space accomodating the file. Consider adding "
|
|
|
21ab4e |
+ "bricks");
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ *target_changed = _gf_false;
|
|
|
21ab4e |
ret = -1;
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
+ } else {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_INFO, 0, 0, "new target found - %s"
|
|
|
21ab4e |
+ " for file - %s", (*new_subvol)->name, loc->path);
|
|
|
21ab4e |
+ *target_changed = _gf_true;
|
|
|
21ab4e |
+ ret = 0;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
ret = 0;
|
|
|
21ab4e |
@@ -1307,6 +1379,9 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
lock_migration_info_t locklist;
|
|
|
21ab4e |
dict_t *meta_dict = NULL;
|
|
|
21ab4e |
gf_boolean_t meta_locked = _gf_false;
|
|
|
21ab4e |
+ gf_boolean_t target_changed = _gf_false;
|
|
|
21ab4e |
+ xlator_t *new_target = NULL;
|
|
|
21ab4e |
+ xlator_t *old_target = NULL;
|
|
|
21ab4e |
|
|
|
21ab4e |
defrag = conf->defrag;
|
|
|
21ab4e |
if (!defrag)
|
|
|
21ab4e |
@@ -1415,12 +1490,57 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
/* create the destination, with required modes/xattr */
|
|
|
21ab4e |
ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
|
|
|
21ab4e |
&dst_fd, xattr);
|
|
|
21ab4e |
- if (ret)
|
|
|
21ab4e |
+ if (ret) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "Create dst failed"
|
|
|
21ab4e |
+ " on - %s for file - %s", to->name, loc->path);
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
|
|
|
21ab4e |
clean_dst = _gf_true;
|
|
|
21ab4e |
|
|
|
21ab4e |
- ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
|
|
|
21ab4e |
+ ret = __dht_check_free_space (to, from, loc, &stbuf, flag, conf,
|
|
|
21ab4e |
+ &target_changed, &new_target);
|
|
|
21ab4e |
+ if (target_changed) {
|
|
|
21ab4e |
+ /* Can't handle for hardlinks. Marking this as failure */
|
|
|
21ab4e |
+ if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
+ DHT_MSG_SUBVOL_INSUFF_SPACE, "Exiting migration for"
|
|
|
21ab4e |
+ " file - %s. flag - %d, stbuf.ia_nlink - %d",
|
|
|
21ab4e |
+ loc->path, flag, stbuf.ia_nlink);
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ ret = syncop_ftruncate (to, dst_fd, 0, NULL, NULL);
|
|
|
21ab4e |
+ if (ret) {
|
|
|
21ab4e |
+ gf_log (this->name, GF_LOG_WARNING,
|
|
|
21ab4e |
+ "%s: failed to perform truncate on %s (%s)",
|
|
|
21ab4e |
+ loc->path, to->name, strerror (-ret));
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ syncop_close (dst_fd);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ old_target = to;
|
|
|
21ab4e |
+ to = new_target;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* if the file migration is successful to this new target, then
|
|
|
21ab4e |
+ * update the xattr on the old destination to point the new
|
|
|
21ab4e |
+ * destination. We need to do update this only post migration
|
|
|
21ab4e |
+ * as in case of failure the linkto needs to point to the source
|
|
|
21ab4e |
+ * subvol */
|
|
|
21ab4e |
+ ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
|
|
|
21ab4e |
+ &dst_fd, xattr);
|
|
|
21ab4e |
+ if (ret) {
|
|
|
21ab4e |
+ gf_log (this->name, GF_LOG_ERROR, "Create dst failed"
|
|
|
21ab4e |
+ " on - %s for file - %s", to->name, loc->path);
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ } else {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_INFO, 0, 0, "destination for file "
|
|
|
21ab4e |
+ "- %s is changed to - %s", loc->path, to->name);
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
@@ -1652,6 +1772,36 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
ret = -1;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+ if (target_changed) {
|
|
|
21ab4e |
+ if (!dict) {
|
|
|
21ab4e |
+ dict = dict_new ();
|
|
|
21ab4e |
+ if (!dict) {
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ } else {
|
|
|
21ab4e |
+ dict_del (dict, conf->link_xattr_name);
|
|
|
21ab4e |
+ dict_del (dict, GLUSTERFS_POSIXLK_COUNT);
|
|
|
21ab4e |
+ ret = dict_set_str (dict, conf->link_xattr_name, to->name);
|
|
|
21ab4e |
+ if (ret) {
|
|
|
21ab4e |
+ gf_log (this->name, GF_LOG_ERROR,
|
|
|
21ab4e |
+ "failed to set xattr in dict for %s (linkto:%s)",
|
|
|
21ab4e |
+ loc->path, to->name);
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ ret = syncop_setxattr (old_target, loc, dict, 0, NULL, NULL);
|
|
|
21ab4e |
+ if (ret) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
+ DHT_MSG_MIGRATE_FILE_FAILED,
|
|
|
21ab4e |
+ "failed to set xattr on %s in %s (%s)",
|
|
|
21ab4e |
+ loc->path, old_target->name, strerror (-ret));
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
|
|
|
21ab4e |
clean_dst = _gf_false;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -2114,6 +2264,8 @@ gf_defrag_migrate_single_file (void *opaque)
|
|
|
21ab4e |
double elapsed = {0,};
|
|
|
21ab4e |
struct dht_container *rebal_entry = NULL;
|
|
|
21ab4e |
inode_t *inode = NULL;
|
|
|
21ab4e |
+ call_frame_t *statfs_frame = NULL;
|
|
|
21ab4e |
+ xlator_t *old_THIS = NULL;
|
|
|
21ab4e |
|
|
|
21ab4e |
rebal_entry = (struct dht_container *)opaque;
|
|
|
21ab4e |
if (!rebal_entry) {
|
|
|
21ab4e |
@@ -2186,6 +2338,20 @@ gf_defrag_migrate_single_file (void *opaque)
|
|
|
21ab4e |
/* use the inode returned by inode_link */
|
|
|
21ab4e |
entry_loc.inode = inode;
|
|
|
21ab4e |
|
|
|
21ab4e |
+ old_THIS = THIS;
|
|
|
21ab4e |
+ THIS = this;
|
|
|
21ab4e |
+ statfs_frame = create_frame (this, this->ctx->pool);
|
|
|
21ab4e |
+ if (!statfs_frame) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
|
|
|
21ab4e |
+ "Insufficient memory. Frame creation failed");
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* async statfs information for honoring min-free-disk */
|
|
|
21ab4e |
+ dht_get_du_info (statfs_frame, this, loc);
|
|
|
21ab4e |
+ THIS = old_THIS;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
ret = syncop_setxattr (this, &entry_loc, migrate_data, 0, NULL, NULL);
|
|
|
21ab4e |
if (ret < 0) {
|
|
|
21ab4e |
op_errno = -ret;
|
|
|
21ab4e |
@@ -2256,6 +2422,10 @@ gf_defrag_migrate_single_file (void *opaque)
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
out:
|
|
|
21ab4e |
+ if (statfs_frame) {
|
|
|
21ab4e |
+ STACK_DESTROY (statfs_frame->root);
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
loc_wipe (&entry_loc);
|
|
|
21ab4e |
|
|
|
21ab4e |
return ret;
|
|
|
21ab4e |
@@ -2580,7 +2750,6 @@ gf_defrag_get_entry (xlator_t *this, int i, struct dht_container **container,
|
|
|
21ab4e |
continue;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
-
|
|
|
21ab4e |
ret = syncop_lookup (this, &entry_loc, NULL, NULL,
|
|
|
21ab4e |
NULL, NULL);
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
@@ -3755,7 +3924,8 @@ gf_defrag_start_crawl (void *data)
|
|
|
21ab4e |
int thread_spawn_count = 0;
|
|
|
21ab4e |
pthread_t *tid = NULL;
|
|
|
21ab4e |
gf_boolean_t is_tier_detach = _gf_false;
|
|
|
21ab4e |
-
|
|
|
21ab4e |
+ call_frame_t *statfs_frame = NULL;
|
|
|
21ab4e |
+ xlator_t *old_THIS = NULL;
|
|
|
21ab4e |
|
|
|
21ab4e |
this = data;
|
|
|
21ab4e |
if (!this)
|
|
|
21ab4e |
@@ -3792,6 +3962,21 @@ gf_defrag_start_crawl (void *data)
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+ old_THIS = THIS;
|
|
|
21ab4e |
+ THIS = this;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ statfs_frame = create_frame (this, this->ctx->pool);
|
|
|
21ab4e |
+ if (!statfs_frame) {
|
|
|
21ab4e |
+ gf_msg (this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
|
|
|
21ab4e |
+ "Insufficient memory. Frame creation failed");
|
|
|
21ab4e |
+ ret = -1;
|
|
|
21ab4e |
+ goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* async statfs update for honoring min-free-disk */
|
|
|
21ab4e |
+ dht_get_du_info (statfs_frame, this, &loc;;
|
|
|
21ab4e |
+ THIS = old_THIS;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
fix_layout = dict_new ();
|
|
|
21ab4e |
if (!fix_layout) {
|
|
|
21ab4e |
ret = -1;
|
|
|
21ab4e |
@@ -4059,6 +4244,9 @@ out:
|
|
|
21ab4e |
if (migrate_data)
|
|
|
21ab4e |
dict_unref (migrate_data);
|
|
|
21ab4e |
|
|
|
21ab4e |
+ if (statfs_frame) {
|
|
|
21ab4e |
+ STACK_DESTROY (statfs_frame->root);
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
exit:
|
|
|
21ab4e |
return ret;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
|
|
|
21ab4e |
index 266cd5d..207c7fd 100644
|
|
|
21ab4e |
--- a/xlators/storage/posix/src/posix.c
|
|
|
21ab4e |
+++ b/xlators/storage/posix/src/posix.c
|
|
|
21ab4e |
@@ -683,7 +683,7 @@ posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
|
|
|
21ab4e |
+ if (xdata && dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
|
|
|
21ab4e |
locked = _gf_true;
|
|
|
21ab4e |
LOCK(&fd->inode->lock);
|
|
|
21ab4e |
}
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|