From 6565749c95e90f360a994bde1416cffd22cd8ce9 Mon Sep 17 00:00:00 2001 From: N Balachandran Date: Mon, 25 Mar 2019 15:56:56 +0530 Subject: [PATCH 126/141] cluster/dht: refactor dht lookup functions Part 1: refactor the dht_lookup_dir_cbk and dht_selfheal_directory functions. Added a simple dht selfheal directory test upstream: https://review.gluster.org/#/c/glusterfs/+/22407/ > Change-Id: I1410c26359e3c14b396adbe751937a52bd2fcff9 > updates: bz#1590385 Change-Id: Idd0a7df7122d634c371ecf30c0dbb94dc6063416 BUG: 1703897 Signed-off-by: N Balachandran Reviewed-on: https://code.engineering.redhat.com/gerrit/169037 Tested-by: RHGS Build Bot Reviewed-by: Susant Palai Reviewed-by: Atin Mukherjee --- tests/basic/distribute/dir-heal.t | 145 +++++++++++++++++++++++++++ xlators/cluster/dht/src/dht-common.c | 178 +++++++++++++++------------------ xlators/cluster/dht/src/dht-selfheal.c | 65 +++++++----- 3 files changed, 264 insertions(+), 124 deletions(-) create mode 100644 tests/basic/distribute/dir-heal.t diff --git a/tests/basic/distribute/dir-heal.t b/tests/basic/distribute/dir-heal.t new file mode 100644 index 0000000..851f765 --- /dev/null +++ b/tests/basic/distribute/dir-heal.t @@ -0,0 +1,145 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../nfs.rc +. $(dirname $0)/../../common-utils.rc + +# Test 1 overview: +# ---------------- +# +# 1. Kill one brick of the volume. +# 2. Create directories and change directory properties. +# 3. Bring up the brick and access the directory +# 4. Check the permissions and xattrs on the backend + +cleanup + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3} +TEST $CLI volume start $V0 + +# We want the lookup to reach DHT +TEST $CLI volume set $V0 performance.stat-prefetch off + +# Mount using FUSE , kill a brick and create directories +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +ls $M0/ +cd $M0 + +TEST kill_brick $V0 $H0 $B0/$V0-1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1 + +TEST mkdir dir{1..4} + +# No change for dir1 +# Change permissions for dir2 +# Set xattr on dir3 +# Change permissions and set xattr on dir4 + +TEST chmod 777 $M0/dir2 + +TEST setfattr -n "user.test" -v "test" $M0/dir3 + +TEST chmod 777 $M0/dir4 +TEST setfattr -n "user.test" -v "test" $M0/dir4 + + +# Start all bricks + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1 + +#$CLI volume status + +# It takes a while for the client to reconnect to the brick +sleep 5 + +stat $M0/dir* > /dev/null + +# Check that directories have been created on the brick that was killed + +TEST ls $B0/$V0-1/dir1 + +TEST ls $B0/$V0-1/dir2 +EXPECT "777" stat -c "%a" $B0/$V0-1/dir2 + +TEST ls $B0/$V0-1/dir3 +EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3 + + +TEST ls $B0/$V0-1/dir4 +EXPECT "777" stat -c "%a" $B0/$V0-1/dir4 +EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4 + + +TEST rm -rf $M0/* + +cd + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + + +# Test 2 overview: +# ---------------- +# 1. Create directories with all bricks up. +# 2. Kill a brick and change directory properties and set user xattr. +# 2. Bring up the brick and access the directory +# 3. Check the permissions and xattrs on the backend + + +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0 + +ls $M0/ +cd $M0 +TEST mkdir dir{1..4} + +TEST kill_brick $V0 $H0 $B0/$V0-1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1 + +# No change for dir1 +# Change permissions for dir2 +# Set xattr on dir3 +# Change permissions and set xattr on dir4 + +TEST chmod 777 $M0/dir2 + +TEST setfattr -n "user.test" -v "test" $M0/dir3 + +TEST chmod 777 $M0/dir4 +TEST setfattr -n "user.test" -v "test" $M0/dir4 + + +# Start all bricks + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1 + +#$CLI volume status + +# It takes a while for the client to reconnect to the brick +sleep 5 + +stat $M0/dir* > /dev/null + +# Check directories on the brick that was killed + +TEST ls $B0/$V0-1/dir2 +EXPECT "777" stat -c "%a" $B0/$V0-1/dir2 + +TEST ls $B0/$V0-1/dir3 +EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3 + + +TEST ls $B0/$V0-1/dir4 +EXPECT "777" stat -c "%a" $B0/$V0-1/dir4 +EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4 +cd + + +# Cleanup +cleanup + diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 2a68193..d3e900c 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -801,9 +801,8 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, call_frame_t *xattr_frame = NULL; gf_boolean_t vol_down = _gf_false; - this = frame->this; - GF_VALIDATE_OR_GOTO("dht", frame, out); + this = frame->this; GF_VALIDATE_OR_GOTO("dht", this, out); GF_VALIDATE_OR_GOTO(this->name, frame->local, out); GF_VALIDATE_OR_GOTO(this->name, this->private, out); @@ -812,6 +811,7 @@ dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, conf = this->private; layout = local->selfheal.layout; local->mds_heal_fresh_lookup = mark_during_fresh_lookup; + gf_uuid_unparse(local->gfid, gfid_local); /* Code to update hashed subvol consider as a mds subvol @@ -1240,6 +1240,31 @@ out: } int +dht_needs_selfheal(call_frame_t *frame, xlator_t *this) +{ + dht_local_t *local = NULL; + dht_layout_t *layout = NULL; + int needs_selfheal = 0; + int ret = 0; + + local = frame->local; + layout = local->layout; + + if (local->need_attrheal || local->need_xattr_heal || + local->need_selfheal) { + needs_selfheal = 1; + } + + ret = dht_layout_normalize(this, &local->loc, layout); + + if (ret != 0) { + gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path); + needs_selfheal = 1; + } + return needs_selfheal; +} + +int dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr, struct iatt *postparent) @@ -1256,8 +1281,6 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, char gfid_local[GF_UUID_BUF_SIZE] = {0}; char gfid_node[GF_UUID_BUF_SIZE] = {0}; int32_t mds_xattr_val[1] = {0}; - call_frame_t *copy = NULL; - dht_local_t *copy_local = NULL; GF_VALIDATE_OR_GOTO("dht", frame, out); GF_VALIDATE_OR_GOTO("dht", this, out); @@ -1270,7 +1293,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, conf = this->private; layout = local->layout; + gf_msg_debug(this->name, op_errno, + "%s: lookup on %s returned with op_ret = %d, op_errno = %d", + local->loc.path, prev->name, op_ret, op_errno); + /* The first successful lookup*/ if (!op_ret && gf_uuid_is_null(local->gfid)) { memcpy(local->gfid, stbuf->ia_gfid, 16); } @@ -1298,13 +1325,10 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret == -1) { local->op_errno = op_errno; - gf_msg_debug(this->name, op_errno, - "%s: lookup on %s returned error", local->loc.path, - prev->name); /* The GFID is missing on this subvol. Force a heal. */ if (op_errno == ENODATA) { - local->need_selfheal = 1; + local->need_lookup_everywhere = 1; } goto unlock; } @@ -1312,12 +1336,11 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, is_dir = check_is_dir(inode, stbuf, xattr); if (!is_dir) { gf_msg_debug(this->name, 0, - "lookup of %s on %s returned non" - "dir 0%o" + "%s: lookup on %s returned non dir 0%o" "calling lookup_everywhere", local->loc.path, prev->name, stbuf->ia_type); - local->need_selfheal = 1; + local->need_lookup_everywhere = 1; goto unlock; } @@ -1328,14 +1351,8 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, dht_aggregate_xattr(local->xattr, xattr); } - if (dict_get(xattr, conf->mds_xattr_key)) { - local->mds_subvol = prev; - local->mds_stbuf.ia_gid = stbuf->ia_gid; - local->mds_stbuf.ia_uid = stbuf->ia_uid; - local->mds_stbuf.ia_prot = stbuf->ia_prot; - } - if (local->stbuf.ia_type != IA_INVAL) { + /* This is not the first subvol to respond */ if (!__is_root_gfid(stbuf->ia_gfid) && ((local->stbuf.ia_gid != stbuf->ia_gid) || (local->stbuf.ia_uid != stbuf->ia_uid) || @@ -1348,65 +1365,64 @@ dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (local->inode == NULL) local->inode = inode_ref(inode); + /* This could be a problem */ dht_iatt_merge(this, &local->stbuf, stbuf); dht_iatt_merge(this, &local->postparent, postparent); if (!dict_get(xattr, conf->mds_xattr_key)) { gf_msg_debug(this->name, 0, - "Internal xattr %s is not present " - " on path %s gfid is %s ", - conf->mds_xattr_key, local->loc.path, gfid_local); + "%s: mds xattr %s is not present " + "on %s(gfid = %s)", + local->loc.path, conf->mds_xattr_key, prev->name, + gfid_local); goto unlock; - } else { - /* Save mds subvol on inode ctx */ - ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, - DHT_MSG_SET_INODE_CTX_FAILED, - "Failed to set hashed subvol for %s vol is %s", - local->loc.path, prev->name); - } + } + + local->mds_subvol = prev; + local->mds_stbuf = *stbuf; + + /* Save mds subvol on inode ctx */ + + ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED, + "%s: Failed to set mds (%s)", local->loc.path, prev->name); } check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key, mds_xattr_val, 1, &errst); if ((check_mds < 0) && !errst) { local->mds_xattr = dict_ref(xattr); gf_msg_debug(this->name, 0, - "Value of %s is not zero on hashed subvol " - "so xattr needs to be heal on non hashed" - " path is %s and vol name is %s " - " gfid is %s", - conf->mds_xattr_key, local->loc.path, prev->name, + "%s: %s is not zero on %s. Xattrs need to be healed." + "(gfid = %s)", + local->loc.path, conf->mds_xattr_key, prev->name, gfid_local); local->need_xattr_heal = 1; - local->mds_subvol = prev; } } + unlock: UNLOCK(&frame->lock); this_call_cnt = dht_frame_return(frame); if (is_last_call(this_call_cnt)) { + /* If the mds subvol is not set correctly*/ + if (!__is_root_gfid(local->gfid) && + (!dict_get(local->xattr, conf->mds_xattr_key))) { + local->need_selfheal = 1; + } + /* No need to call xattr heal code if volume count is 1 */ - if (conf->subvolume_cnt == 1) + if (conf->subvolume_cnt == 1) { local->need_xattr_heal = 0; - - /* Code to update all extended attributed from hashed subvol - to local->xattr - */ - if (local->need_xattr_heal && (local->mds_xattr)) { - dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr, - NULL, NULL); - dict_unref(local->mds_xattr); - local->mds_xattr = NULL; } - if (local->need_selfheal) { - local->need_selfheal = 0; + if (local->need_selfheal || local->need_lookup_everywhere) { /* Set the gfid-req so posix will set the GFID*/ if (!gf_uuid_is_null(local->gfid)) { + /* Ok, this should _never_ happen */ ret = dict_set_static_bin(local->xattr_req, "gfid-req", local->gfid, 16); } else { @@ -1414,73 +1430,36 @@ unlock: ret = dict_set_static_bin(local->xattr_req, "gfid-req", local->gfid_req, 16); } + } + + if (local->need_lookup_everywhere) { + local->need_lookup_everywhere = 0; dht_lookup_everywhere(frame, this, &local->loc); return 0; } if (local->op_ret == 0) { - ret = dht_layout_normalize(this, &local->loc, layout); - - if (ret != 0) { - gf_msg_debug(this->name, 0, "fixing assignment on %s", - local->loc.path); + if (dht_needs_selfheal(frame, this)) { goto selfheal; } dht_layout_set(this, local->inode, layout); - if (!dict_get(local->xattr, conf->mds_xattr_key) || - local->need_xattr_heal) - goto selfheal; - } - - if (local->inode) { - dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); - } - - if (local->loc.parent) { - dht_inode_ctx_time_update(local->loc.parent, this, - &local->postparent, 1); - } - - if (local->need_attrheal) { - local->need_attrheal = 0; - if (!__is_root_gfid(inode->gfid)) { - local->stbuf.ia_gid = local->mds_stbuf.ia_gid; - local->stbuf.ia_uid = local->mds_stbuf.ia_uid; - local->stbuf.ia_prot = local->mds_stbuf.ia_prot; + if (local->inode) { + dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1); } - copy = create_frame(this, this->ctx->pool); - if (copy) { - copy_local = dht_local_init(copy, &local->loc, NULL, 0); - if (!copy_local) { - DHT_STACK_DESTROY(copy); - goto skip_attr_heal; - } - copy_local->stbuf = local->stbuf; - gf_uuid_copy(copy_local->loc.gfid, local->stbuf.ia_gfid); - copy_local->mds_stbuf = local->mds_stbuf; - copy_local->mds_subvol = local->mds_subvol; - copy->local = copy_local; - FRAME_SU_DO(copy, dht_local_t); - ret = synctask_new(this->ctx->env, dht_dir_attr_heal, - dht_dir_attr_heal_done, copy, copy); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, ENOMEM, - DHT_MSG_DIR_ATTR_HEAL_FAILED, - "Synctask creation failed to heal attr " - "for path %s gfid %s ", - local->loc.path, local->gfid); - DHT_STACK_DESTROY(copy); - } + + if (local->loc.parent) { + dht_inode_ctx_time_update(local->loc.parent, this, + &local->postparent, 1); } } - skip_attr_heal: DHT_STRIP_PHASE1_FLAGS(&local->stbuf); dht_set_fixed_dir_stat(&local->postparent); /* Delete mds xattr at the time of STACK UNWIND */ if (local->xattr) GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr); + DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, local->xattr, &local->postparent); @@ -5444,9 +5423,8 @@ dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, } else { gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED, - "Failed to get mds subvol for path %s" - "gfid is %s ", - loc->path, gfid_local); + "%s: Failed to get mds subvol. (gfid is %s)", loc->path, + gfid_local); } (*op_errno) = ENOENT; goto err; diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index bd1b7ea..5420fca 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -1033,18 +1033,27 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf, int missing_attr = 0; int i = 0, ret = -1; dht_local_t *local = NULL; + dht_conf_t *conf = NULL; xlator_t *this = NULL; int cnt = 0; local = frame->local; this = frame->this; + conf = this->private; + + /* We need to heal the attrs if: + * 1. Any directories were missing - the newly created dirs will need + * to have the correct attrs set + * 2. An existing dir does not have the correct permissions -they may + * have been changed when a brick was down. + */ for (i = 0; i < layout->cnt; i++) { if (layout->list[i].err == -1) missing_attr++; } - if (missing_attr == 0) { + if ((missing_attr == 0) && (local->need_attrheal == 0)) { if (!local->heal_layout) { gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ", loc->path, uuid_utoa(loc->gfid)); @@ -1062,19 +1071,12 @@ dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf, return 0; } - local->call_cnt = missing_attr; - cnt = layout->cnt; + cnt = local->call_cnt = conf->subvolume_cnt; for (i = 0; i < cnt; i++) { - if (layout->list[i].err == -1) { - gf_msg_trace(this->name, 0, "%s: setattr on subvol %s, gfid = %s", - loc->path, layout->list[i].xlator->name, - uuid_utoa(loc->gfid)); - - STACK_WIND( - frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator, - layout->list[i].xlator->fops->setattr, loc, stbuf, valid, NULL); - } + STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator, + layout->list[i].xlator->fops->setattr, loc, stbuf, valid, + NULL); } return 0; @@ -1492,6 +1494,9 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, } if (missing_dirs == 0) { + /* We don't need to create any directories. Proceed to heal the + * attrs and xattrs + */ if (!__is_root_gfid(local->stbuf.ia_gfid)) { if (local->need_xattr_heal) { local->need_xattr_heal = 0; @@ -1499,8 +1504,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, if (ret) gf_msg(this->name, GF_LOG_ERROR, ret, DHT_MSG_DIR_XATTR_HEAL_FAILED, - "xattr heal failed for " - "directory %s gfid %s ", + "%s:xattr heal failed for " + "directory (gfid = %s)", local->loc.path, local->gfid); } else { if (!gf_uuid_is_null(local->gfid)) @@ -1512,8 +1517,8 @@ dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout, gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED, - "Failed to set mds xattr " - "for directory %s gfid %s ", + "%s: Failed to set mds xattr " + "for directory (gfid = %s)", local->loc.path, local->gfid); } } @@ -2085,10 +2090,10 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, loc_t *loc, dht_layout_t *layout) { dht_local_t *local = NULL; + xlator_t *this = NULL; uint32_t down = 0; uint32_t misc = 0; int ret = 0; - xlator_t *this = NULL; char pgfid[GF_UUID_BUF_SIZE] = {0}; char gfid[GF_UUID_BUF_SIZE] = {0}; inode_t *linked_inode = NULL, *inode = NULL; @@ -2099,6 +2104,11 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, local->selfheal.dir_cbk = dir_cbk; local->selfheal.layout = dht_layout_ref(this, layout); + if (local->need_attrheal && !IA_ISINVAL(local->mds_stbuf.ia_type)) { + /*Use the one in the mds_stbuf*/ + local->stbuf = local->mds_stbuf; + } + if (!__is_root_gfid(local->stbuf.ia_gfid)) { gf_uuid_unparse(local->stbuf.ia_gfid, gfid); gf_uuid_unparse(loc->parent->gfid, pgfid); @@ -2118,6 +2128,13 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, inode_unref(inode); } + if (local->need_xattr_heal && (local->mds_xattr)) { + dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr, + NULL, NULL); + dict_unref(local->mds_xattr); + local->mds_xattr = NULL; + } + dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt, &local->selfheal.overlaps_cnt, &local->selfheal.missing_cnt, &local->selfheal.down, @@ -2128,18 +2145,18 @@ dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, if (down) { gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, - "Directory selfheal failed: %d subvolumes down." - "Not fixing. path = %s, gfid = %s", - down, loc->path, gfid); + "%s: Directory selfheal failed: %d subvolumes down." + "Not fixing. gfid = %s", + loc->path, down, gfid); ret = 0; goto sorry_no_fix; } if (misc) { gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DIR_SELFHEAL_FAILED, - "Directory selfheal failed : %d subvolumes " - "have unrecoverable errors. path = %s, gfid = %s", - misc, loc->path, gfid); + "%s: Directory selfheal failed : %d subvolumes " + "have unrecoverable errors. gfid = %s", + loc->path, misc, gfid); ret = 0; goto sorry_no_fix; @@ -2369,13 +2386,13 @@ dht_dir_attr_heal(void *data) frame = data; local = frame->local; - mds_subvol = local->mds_subvol; this = frame->this; GF_VALIDATE_OR_GOTO("dht", this, out); GF_VALIDATE_OR_GOTO("dht", local, out); conf = this->private; GF_VALIDATE_OR_GOTO("dht", conf, out); + mds_subvol = local->mds_subvol; call_cnt = conf->subvolume_cnt; if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) { -- 1.8.3.1