|
|
21ab4e |
From a587b4c7e84c18257bc3ba724bb9aae81bb1f3b9 Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
Date: Tue, 25 Apr 2017 18:32:45 +0530
|
|
|
21ab4e |
Subject: [PATCH 477/486] cluster/dht: fix on demand migration files from
|
|
|
21ab4e |
client
|
|
|
21ab4e |
|
|
|
21ab4e |
On demand migration of files i.e. migration done by clients
|
|
|
21ab4e |
triggered by a setfattr was broken.
|
|
|
21ab4e |
|
|
|
21ab4e |
Dependency on defrag led to crash when migration was triggered from
|
|
|
21ab4e |
client.
|
|
|
21ab4e |
|
|
|
21ab4e |
Note: This functionality is not available for tiered volumes. Migration
|
|
|
21ab4e |
from tier served client will fail with ENOTSUP.
|
|
|
21ab4e |
|
|
|
21ab4e |
usage (But refer to the steps mentioned below to avoid any issues) :
|
|
|
21ab4e |
setfattr -n "trusted.distribute.migrate-data" -v "1" <filename>
|
|
|
21ab4e |
|
|
|
21ab4e |
The purpose of fixing the on-demand client migration was to give a
|
|
|
21ab4e |
workaround where the user has lots of empty directories compared to
|
|
|
21ab4e |
files and want to do a remove-brick process.
|
|
|
21ab4e |
|
|
|
21ab4e |
Here are the steps to trigger file migration for remove-brick process from
|
|
|
21ab4e |
client. (This is highly recommended to follow below steps as is)
|
|
|
21ab4e |
|
|
|
21ab4e |
Let's say it is a replica volume and user want to remove a replica pair
|
|
|
21ab4e |
named brick1 and brick2. (Make sure healing is completed before you run
|
|
|
21ab4e |
these steps)
|
|
|
21ab4e |
|
|
|
21ab4e |
Step-1: Start remove-brick process
|
|
|
21ab4e |
- gluster v remove-brick <volname> brick1 brick2 start
|
|
|
21ab4e |
Step-2: Kill the rebalance daemon
|
|
|
21ab4e |
- ps aux | grep glusterfs | grep rebalance\/ | awk '{print $2}' | xargs kill
|
|
|
21ab4e |
Step-3: Do a fresh mount as mentioned here
|
|
|
21ab4e |
- glusterfs -s ${localhostname} --volfile-id rebalance/$volume-name /tmp/mount/point
|
|
|
21ab4e |
Step-4: Go to one of the bricks (among brick1 and brick2)
|
|
|
21ab4e |
- cd <brick1 path>
|
|
|
21ab4e |
Step-5: Run the following command.
|
|
|
21ab4e |
- find . -not \( -path ./.glusterfs -prune \) -type f -not -perm 01000 -exec bash -c 'setfattr -n "distribute.fix.layout" -v "1" ${mountpoint}/$(dirname '{}')' \; -exec setfattr -n "trusted.distribute.migrate-data" -v "1" ${mountpoint}/'{}' \;
|
|
|
21ab4e |
|
|
|
21ab4e |
This command will ignore the linkto files and empty directories. Do a fix-layout of
|
|
|
21ab4e |
the parent directory. And trigger a migration operation on the files.
|
|
|
21ab4e |
|
|
|
21ab4e |
Step-6: Once this process is completed do "remove-brick force"
|
|
|
21ab4e |
- gluster v remove-brick <volname> brick1 brick2 force
|
|
|
21ab4e |
|
|
|
21ab4e |
Note: Use the above script only when there are large number of empty directories.
|
|
|
21ab4e |
Since the script does a crawl on the brick side directly and avoids directories those
|
|
|
21ab4e |
are empty, the time spent on fixing layout on those directories are eliminated(even if the script
|
|
|
21ab4e |
does not do fix-layout on empty directories, post remove-brick a fresh layout will be built
|
|
|
21ab4e |
for the directory, hence not affecting application continuity).
|
|
|
21ab4e |
|
|
|
21ab4e |
Detailing the expectation for hardlink migartion with this patch:
|
|
|
21ab4e |
Hardlink is migrated only for remove-brick process. It is highly essential
|
|
|
21ab4e |
to have a new mount(step-3) for the hardlink migration to happen. Why?:
|
|
|
21ab4e |
setfattr operation is an inode based operation. Since, we are doing setfattr from
|
|
|
21ab4e |
fuse mount here, inode_path will try to build path from the linked dentries to the inode.
|
|
|
21ab4e |
For a file without hardlinks the path construction will be correct. But for hardlinks,
|
|
|
21ab4e |
the inode will have multiple dentries linked.
|
|
|
21ab4e |
|
|
|
21ab4e |
Without fresh mount, inode_path will always get the most recently linked dentry.
|
|
|
21ab4e |
e.g. if there are three hardlinks named dir1/link1, dir2/link2, dir3/link3, on a client
|
|
|
21ab4e |
where these hardlinks are looked up, inode_path will always return the path dir3/link3
|
|
|
21ab4e |
if dir3/link3 was looked up most recently. Hence, we won't be able to create linkto
|
|
|
21ab4e |
files for all other hardlinks on destination (read gf_defrag_handle_hardlink for more details
|
|
|
21ab4e |
on hardlink migration).
|
|
|
21ab4e |
|
|
|
21ab4e |
With a fresh mount, the lookup and setfattr become serialized. e.g. link2 won't be
|
|
|
21ab4e |
looked up until link1 is looked up and migrated. Hence, inode_path will always have the correct
|
|
|
21ab4e |
path, in this case link1 dentry is picked up(as this is the most recently looked up inode) and
|
|
|
21ab4e |
the path is built right.
|
|
|
21ab4e |
|
|
|
21ab4e |
Note: If you run the above script on an existing mount(all entries looked up), hard links may
|
|
|
21ab4e |
not be migrated, but there should not be any other issue. Please raise a bug, if you find any
|
|
|
21ab4e |
issue.
|
|
|
21ab4e |
|
|
|
21ab4e |
Tests: Manual
|
|
|
21ab4e |
|
|
|
21ab4e |
> Change-Id: I9854cdd4955d9e24494f348fb29ba856ea7ac50a
|
|
|
21ab4e |
> UG: 1450975
|
|
|
21ab4e |
> Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
> Reviewed-on: https://review.gluster.org/17115
|
|
|
21ab4e |
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
21ab4e |
> Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
|
|
|
21ab4e |
Change-Id: I9854cdd4955d9e24494f348fb29ba856ea7ac50a
|
|
|
21ab4e |
BUG: 1428936
|
|
|
21ab4e |
Signed-off-by: Susant Palai <spalai@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/107633
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
Reviewed-by: Nithya Balachandran <nbalacha@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-common.c | 3 +++
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-common.h | 7 +++---
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-rebalance.c | 41 ++++++++++++++++++++-------------
|
|
|
21ab4e |
xlators/cluster/dht/src/dht-shared.c | 4 +++-
|
|
|
21ab4e |
4 files changed, 35 insertions(+), 20 deletions(-)
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
|
|
|
21ab4e |
index 9286125..052ac7f 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-common.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-common.c
|
|
|
21ab4e |
@@ -4159,6 +4159,9 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
|
|
|
21ab4e |
goto err;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+ if (gf_uuid_is_null (local->loc.pargfid))
|
|
|
21ab4e |
+ gf_uuid_copy (local->loc.pargfid, local->loc.parent->gfid);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
methods->migration_get_dst_subvol(this, local);
|
|
|
21ab4e |
|
|
|
21ab4e |
if (!local->rebalance.target_node) {
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
index 184bd22..eb1f2bd 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-common.h
|
|
|
21ab4e |
@@ -463,9 +463,6 @@ struct gf_defrag_info_ {
|
|
|
21ab4e |
int32_t current_thread_count;
|
|
|
21ab4e |
pthread_cond_t df_wakeup_thread;
|
|
|
21ab4e |
|
|
|
21ab4e |
- /* Hard link handle requirement */
|
|
|
21ab4e |
- synclock_t link_lock;
|
|
|
21ab4e |
-
|
|
|
21ab4e |
/* lock migration flag */
|
|
|
21ab4e |
gf_boolean_t lock_migration_enabled;
|
|
|
21ab4e |
};
|
|
|
21ab4e |
@@ -565,6 +562,10 @@ struct dht_conf {
|
|
|
21ab4e |
|
|
|
21ab4e |
gf_boolean_t lock_migration_enabled;
|
|
|
21ab4e |
gf_lock_t lock;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* Hard link handle requirement for migration triggered from client*/
|
|
|
21ab4e |
+ synclock_t link_lock;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
};
|
|
|
21ab4e |
typedef struct dht_conf dht_conf_t;
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
index 0bbe952..1ee76fc 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-rebalance.c
|
|
|
21ab4e |
@@ -494,7 +494,7 @@ out:
|
|
|
21ab4e |
static int
|
|
|
21ab4e |
__check_file_has_hardlink (xlator_t *this, loc_t *loc,
|
|
|
21ab4e |
struct iatt *stbuf, dict_t *xattrs, int flags,
|
|
|
21ab4e |
- gf_defrag_info_t *defrag, int *fop_errno)
|
|
|
21ab4e |
+ gf_defrag_info_t *defrag, dht_conf_t *conf, int *fop_errno)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
int ret = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -505,10 +505,10 @@ __check_file_has_hardlink (xlator_t *this, loc_t *loc,
|
|
|
21ab4e |
if (stbuf->ia_nlink > 1) {
|
|
|
21ab4e |
/* support for decomission */
|
|
|
21ab4e |
if (flags == GF_DHT_MIGRATE_HARDLINK) {
|
|
|
21ab4e |
- synclock_lock (&defrag->link_lock);
|
|
|
21ab4e |
+ synclock_lock (&conf->link_lock);
|
|
|
21ab4e |
ret = gf_defrag_handle_hardlink
|
|
|
21ab4e |
(this, loc, xattrs, stbuf, fop_errno);
|
|
|
21ab4e |
- synclock_unlock (&defrag->link_lock);
|
|
|
21ab4e |
+ synclock_unlock (&conf->link_lock);
|
|
|
21ab4e |
/*
|
|
|
21ab4e |
Returning zero will force the file to be remigrated.
|
|
|
21ab4e |
Checkout gf_defrag_handle_hardlink for more
|
|
|
21ab4e |
@@ -546,7 +546,8 @@ __check_file_has_hardlink (xlator_t *this, loc_t *loc,
|
|
|
21ab4e |
static int
|
|
|
21ab4e |
__is_file_migratable (xlator_t *this, loc_t *loc,
|
|
|
21ab4e |
struct iatt *stbuf, dict_t *xattrs, int flags,
|
|
|
21ab4e |
- gf_defrag_info_t *defrag, int *fop_errno)
|
|
|
21ab4e |
+ gf_defrag_info_t *defrag, dht_conf_t *conf,
|
|
|
21ab4e |
+ int *fop_errno)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
int ret = -1;
|
|
|
21ab4e |
int lock_count = 0;
|
|
|
21ab4e |
@@ -561,7 +562,7 @@ __is_file_migratable (xlator_t *this, loc_t *loc,
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (!defrag->lock_migration_enabled) {
|
|
|
21ab4e |
+ if (!conf->lock_migration_enabled) {
|
|
|
21ab4e |
ret = dict_get_int32 (xattrs, GLUSTERFS_POSIXLK_COUNT,
|
|
|
21ab4e |
&lock_count);
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
@@ -588,7 +589,7 @@ __is_file_migratable (xlator_t *this, loc_t *loc,
|
|
|
21ab4e |
|
|
|
21ab4e |
/* Check if file has hardlink*/
|
|
|
21ab4e |
ret = __check_file_has_hardlink (this, loc, stbuf, xattrs,
|
|
|
21ab4e |
- flags, defrag, fop_errno);
|
|
|
21ab4e |
+ flags, defrag, conf, fop_errno);
|
|
|
21ab4e |
out:
|
|
|
21ab4e |
return ret;
|
|
|
21ab4e |
}
|
|
|
21ab4e |
@@ -1493,11 +1494,19 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
fd_t *linkto_fd = NULL;
|
|
|
21ab4e |
gf_boolean_t ignore_failure = _gf_false;
|
|
|
21ab4e |
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* If defrag is NULL, it should be assumed that migration is triggered
|
|
|
21ab4e |
+ * from client */
|
|
|
21ab4e |
defrag = conf->defrag;
|
|
|
21ab4e |
- if (!defrag)
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ /* migration of files from clients is restricted to non-tiered clients
|
|
|
21ab4e |
+ * for now */
|
|
|
21ab4e |
+ if (!defrag && dht_is_tier_xlator (this)) {
|
|
|
21ab4e |
+ ret = ENOTSUP;
|
|
|
21ab4e |
goto out;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (defrag->tier_conf.is_tier)
|
|
|
21ab4e |
+ if (defrag && defrag->tier_conf.is_tier)
|
|
|
21ab4e |
log_level = GF_LOG_TRACE;
|
|
|
21ab4e |
|
|
|
21ab4e |
gf_log (this->name,
|
|
|
21ab4e |
@@ -1526,7 +1535,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
|
|
|
21ab4e |
/* Do not migrate file in case lock migration is not enabled on the
|
|
|
21ab4e |
* volume*/
|
|
|
21ab4e |
- if (!defrag->lock_migration_enabled) {
|
|
|
21ab4e |
+ if (!conf->lock_migration_enabled) {
|
|
|
21ab4e |
ret = dict_set_int32 (dict,
|
|
|
21ab4e |
GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t));
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
@@ -1582,7 +1591,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
src_ia_prot = stbuf.ia_prot;
|
|
|
21ab4e |
|
|
|
21ab4e |
/* Check if file can be migrated */
|
|
|
21ab4e |
- ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag, defrag,
|
|
|
21ab4e |
+ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag, defrag, conf,
|
|
|
21ab4e |
fop_errno);
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
if (ret == -2)
|
|
|
21ab4e |
@@ -1702,7 +1711,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
|
|
|
21ab4e |
/* Check again if file has hardlink */
|
|
|
21ab4e |
ret = __check_file_has_hardlink (this, loc, &stbuf, xattr_rsp,
|
|
|
21ab4e |
- flag, defrag, fop_errno);
|
|
|
21ab4e |
+ flag, defrag, conf, fop_errno);
|
|
|
21ab4e |
if (ret) {
|
|
|
21ab4e |
if (ret == -2)
|
|
|
21ab4e |
ret = 0;
|
|
|
21ab4e |
@@ -1715,7 +1724,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
|
|
|
21ab4e |
|
|
|
21ab4e |
/* All I/O happens in this function */
|
|
|
21ab4e |
- if (defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
|
|
|
21ab4e |
+ if (defrag && defrag->cmd == GF_DEFRAG_CMD_START_TIER) {
|
|
|
21ab4e |
ret = __tier_migrate_data (defrag, from, to, src_fd, dst_fd,
|
|
|
21ab4e |
stbuf.ia_size,
|
|
|
21ab4e |
file_has_holes, fop_errno);
|
|
|
21ab4e |
@@ -1774,7 +1783,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
|
|
|
21ab4e |
/* Take meta lock */
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (defrag->lock_migration_enabled) {
|
|
|
21ab4e |
+ if (conf->lock_migration_enabled) {
|
|
|
21ab4e |
meta_dict = dict_new ();
|
|
|
21ab4e |
if (!meta_dict) {
|
|
|
21ab4e |
gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
21ab4e |
@@ -1822,7 +1831,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (!defrag->lock_migration_enabled) {
|
|
|
21ab4e |
+ if (!conf->lock_migration_enabled) {
|
|
|
21ab4e |
plock.l_type = F_WRLCK;
|
|
|
21ab4e |
plock.l_start = 0;
|
|
|
21ab4e |
plock.l_len = 0;
|
|
|
21ab4e |
@@ -2020,7 +2029,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
/* store size of previous migrated file */
|
|
|
21ab4e |
- if (defrag->tier_conf.is_tier) {
|
|
|
21ab4e |
+ if (defrag && defrag->tier_conf.is_tier) {
|
|
|
21ab4e |
if (from != TIER_HASHED_SUBVOL) {
|
|
|
21ab4e |
defrag->tier_conf.st_last_promoted_size = stbuf.ia_size;
|
|
|
21ab4e |
} else {
|
|
|
21ab4e |
@@ -2130,7 +2139,7 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
|
|
21ab4e |
|
|
|
21ab4e |
metaunlock:
|
|
|
21ab4e |
|
|
|
21ab4e |
- if (defrag->lock_migration_enabled && meta_locked) {
|
|
|
21ab4e |
+ if (conf->lock_migration_enabled && meta_locked) {
|
|
|
21ab4e |
|
|
|
21ab4e |
dict_del (meta_dict, GF_META_LOCK_KEY);
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
|
|
|
21ab4e |
index 86b19e3..1128cfe 100644
|
|
|
21ab4e |
--- a/xlators/cluster/dht/src/dht-shared.c
|
|
|
21ab4e |
+++ b/xlators/cluster/dht/src/dht-shared.c
|
|
|
21ab4e |
@@ -241,6 +241,8 @@ dht_fini (xlator_t *this)
|
|
|
21ab4e |
|
|
|
21ab4e |
GF_FREE (conf->subvolume_status);
|
|
|
21ab4e |
|
|
|
21ab4e |
+ synclock_destroy (&conf->link_lock);
|
|
|
21ab4e |
+
|
|
|
21ab4e |
if (conf->lock_pool)
|
|
|
21ab4e |
mem_pool_destroy (conf->lock_pool);
|
|
|
21ab4e |
|
|
|
21ab4e |
@@ -678,6 +680,7 @@ dht_init (xlator_t *this)
|
|
|
21ab4e |
LOCK_INIT (&conf->subvolume_lock);
|
|
|
21ab4e |
LOCK_INIT (&conf->layout_lock);
|
|
|
21ab4e |
LOCK_INIT (&conf->lock);
|
|
|
21ab4e |
+ synclock_init (&conf->link_lock, SYNC_LOCK_DEFAULT);
|
|
|
21ab4e |
|
|
|
21ab4e |
/* We get the commit-hash to set only for rebalance process */
|
|
|
21ab4e |
if (dict_get_uint32 (this->options,
|
|
|
21ab4e |
@@ -733,7 +736,6 @@ dht_init (xlator_t *this)
|
|
|
21ab4e |
|
|
|
21ab4e |
defrag->wakeup_crawler = 0;
|
|
|
21ab4e |
|
|
|
21ab4e |
- synclock_init (&defrag->link_lock, SYNC_LOCK_DEFAULT);
|
|
|
21ab4e |
pthread_mutex_init (&defrag->dfq_mutex, 0);
|
|
|
21ab4e |
pthread_cond_init (&defrag->parallel_migration_cond, 0);
|
|
|
21ab4e |
pthread_cond_init (&defrag->rebalance_crawler_alarm, 0);
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|