From 8d24d891aade910b0bb86b27c25a8d2382e19ba0 Mon Sep 17 00:00:00 2001 From: karthik-us Date: Tue, 15 Dec 2020 15:04:19 +0530 Subject: [PATCH 516/517] afr: return -EIO for gfid split-brains. Problem: entry-self-heal-anon-dir-off.t was failing occasionally because afr_gfid_split_brain_source() returned -1 instead of -EIO for split-brains, causing the code to proceed to afr_lookup_done(), which in turn succeeded the lookup if there was a parallel client side heal going on. Fix: Return -EIO instead of -1 so that lookp fails. Also, afr_selfheal_name() was using the same dict to get and set values. This could be problematic if the caller passed local->xdata_req, since setting a response in a request dict can lead to bugs.So changed it to use separate request and response dicts. Upstream patch details: > Fixes: #1739 > Credits Pranith Karampuri > Signed-off-by: Ravishankar N >Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443 Upstream patch: https://github.com/gluster/glusterfs/pull/1819/ BUG: 1640148 Signed-off-by: karthik-us Change-Id: I5cb4c547fb25e6bfc8bec1740f7eb64e1a5ad443 Reviewed-on: https://code.engineering.redhat.com/gerrit/221209 Tested-by: RHGS Build Bot Reviewed-by: Ravishankar Narayanankutty --- xlators/cluster/afr/src/afr-common.c | 12 ++++++++---- xlators/cluster/afr/src/afr-self-heal-common.c | 27 +++++++++++++------------- xlators/cluster/afr/src/afr-self-heal-entry.c | 8 ++++---- xlators/cluster/afr/src/afr-self-heal-name.c | 23 +++++++++++----------- xlators/cluster/afr/src/afr-self-heal.h | 5 +++-- xlators/cluster/afr/src/afr-self-heald.c | 2 +- 6 files changed, 42 insertions(+), 35 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 6f2da11..416012c 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2366,7 +2366,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) /* If we were called from glfsheal and there is still a gfid * mismatch, succeed the lookup and let glfsheal print the * response via gfid-heal-msg.*/ - if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", + if (!dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", &gfid_heal_msg)) goto cant_interpret; @@ -2421,7 +2421,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this) goto error; } - ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); + ret = dict_get_str_sizen(local->xattr_rsp, "gfid-heal-msg", &gfid_heal_msg); if (!ret) { ret = dict_set_str_sizen(local->replies[read_subvol].xdata, "gfid-heal-msg", gfid_heal_msg); @@ -2768,9 +2768,12 @@ afr_lookup_selfheal_wrap(void *opaque) local = frame->local; this = frame->this; loc_pargfid(&local->loc, pargfid); + if (!local->xattr_rsp) + local->xattr_rsp = dict_new(); ret = afr_selfheal_name(frame->this, pargfid, local->loc.name, - &local->cont.lookup.gfid_req, local->xattr_req); + &local->cont.lookup.gfid_req, local->xattr_req, + local->xattr_rsp); if (ret == -EIO) goto unwind; @@ -2786,7 +2789,8 @@ afr_lookup_selfheal_wrap(void *opaque) return 0; unwind: - AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL); + AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, local->xattr_rsp, + NULL); return 0; } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 0a8a7fd..0954d2c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -245,7 +245,8 @@ int afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, inode_t *inode, uuid_t pargfid, const char *bname, int src_idx, int child_idx, - unsigned char *locked_on, int *src, dict_t *xdata) + unsigned char *locked_on, int *src, dict_t *req, + dict_t *rsp) { afr_private_t *priv = NULL; char g1[64] = { @@ -266,8 +267,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, "All the bricks should be up to resolve the gfid split " "barin"); - if (xdata) { - ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", + if (rsp) { + ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", SALL_BRICKS_UP_TO_RESOLVE); if (ret) gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED, @@ -277,8 +278,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, goto out; } - if (xdata) { - ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op); + if (req) { + ret = dict_get_int32_sizen(req, "heal-op", &heal_op); if (ret) goto fav_child; } else { @@ -292,8 +293,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, if (*src == -1) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, SNO_BIGGER_FILE); - if (xdata) { - ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", + if (rsp) { + ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", SNO_BIGGER_FILE); if (ret) gf_msg(this->name, GF_LOG_ERROR, 0, @@ -310,8 +311,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, if (*src == -1) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, SNO_DIFF_IN_MTIME); - if (xdata) { - ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", + if (rsp) { + ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", SNO_DIFF_IN_MTIME); if (ret) gf_msg(this->name, GF_LOG_ERROR, 0, @@ -323,7 +324,7 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, break; case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: - ret = dict_get_str_sizen(xdata, "child-name", &src_brick); + ret = dict_get_str_sizen(req, "child-name", &src_brick); if (ret) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, "Error getting the source " @@ -335,8 +336,8 @@ afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, if (*src == -1) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, SERROR_GETTING_SRC_BRICK); - if (xdata) { - ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", + if (rsp) { + ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", SERROR_GETTING_SRC_BRICK); if (ret) gf_msg(this->name, GF_LOG_ERROR, 0, @@ -400,7 +401,7 @@ out: uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx, priv->children[src_idx]->name, src_idx, uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2)); - return -1; + return -EIO; } return 0; } diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 20b07dd..a17dd93 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -399,7 +399,7 @@ afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this, (ia_type == replies[i].poststat.ia_type)) { ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, bname, src_idx, i, locked_on, src, - NULL); + NULL, NULL); if (ret) gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, "Skipping conservative merge on the " @@ -474,7 +474,7 @@ __afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, return ret; /* In case of type mismatch / unable to resolve gfid mismatch on the - * entry, return -1.*/ + * entry, return -EIO.*/ ret = afr_selfheal_detect_gfid_and_type_mismatch( this, replies, inode, fd->inode->gfid, name, source, locked_on, &src); @@ -905,7 +905,7 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, break; } - if (ret == -1) { + if (ret == -EIO) { /* gfid or type mismatch. */ mismatch = _gf_true; ret = 0; @@ -1072,7 +1072,7 @@ afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source, else ret = afr_selfheal_entry_do_subvol(frame, this, fd, i); - if (ret == -1) { + if (ret == -EIO) { /* gfid or type mismatch. */ mismatch = _gf_true; ret = 0; diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 51e3d8c..9ec2066 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -217,7 +217,8 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies, int source, unsigned char *sources, int *gfid_idx, uuid_t pargfid, const char *bname, inode_t *inode, - unsigned char *locked_on, dict_t *xdata) + unsigned char *locked_on, dict_t *req, + dict_t *rsp) { int i = 0; int gfid_idx_iter = -1; @@ -245,11 +246,11 @@ afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies, if (sources[i] || source == -1) { if ((sources[gfid_idx_iter] || source == -1) && gf_uuid_compare(gfid, gfid1)) { - ret = afr_gfid_split_brain_source(this, replies, inode, pargfid, - bname, gfid_idx_iter, i, - locked_on, gfid_idx, xdata); + ret = afr_gfid_split_brain_source( + this, replies, inode, pargfid, bname, gfid_idx_iter, i, + locked_on, gfid_idx, req, rsp); if (!ret && *gfid_idx >= 0) { - ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg", + ret = dict_set_sizen_str_sizen(rsp, "gfid-heal-msg", "GFID split-brain resolved"); if (ret) gf_msg(this->name, GF_LOG_ERROR, 0, @@ -303,7 +304,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, int source, unsigned char *locked_on, struct afr_reply *replies, - void *gfid_req, dict_t *xdata) + void *gfid_req, dict_t *req, dict_t *rsp) { int gfid_idx = -1; int ret = -1; @@ -333,7 +334,7 @@ __afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources, &gfid_idx, pargfid, bname, - inode, locked_on, xdata); + inode, locked_on, req, rsp); if (ret) return ret; @@ -450,7 +451,7 @@ out: int afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, void *gfid_req, - dict_t *xdata) + dict_t *req, dict_t *rsp) { afr_private_t *priv = NULL; unsigned char *sources = NULL; @@ -505,7 +506,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode, sources, sinks, healed_sinks, source, - locked_on, replies, gfid_req, xdata); + locked_on, replies, gfid_req, req, rsp); } unlock: afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on, @@ -578,7 +579,7 @@ afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this, int afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname, - void *gfid_req, dict_t *xdata) + void *gfid_req, dict_t *req, dict_t *rsp) { inode_t *parent = NULL; call_frame_t *frame = NULL; @@ -600,7 +601,7 @@ afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname, if (need_heal) { ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname, - gfid_req, xdata); + gfid_req, req, rsp); if (ret) goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index c8dc384..6b0bf69 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -127,7 +127,7 @@ afr_throttled_selfheal(call_frame_t *frame, xlator_t *this); int afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req, - dict_t *xdata); + dict_t *req, dict_t *rsp); int afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd); @@ -357,7 +357,8 @@ int afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies, inode_t *inode, uuid_t pargfid, const char *bname, int src_idx, int child_idx, - unsigned char *locked_on, int *src, dict_t *xdata); + unsigned char *locked_on, int *src, dict_t *req, + dict_t *rsp); int afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources, unsigned char *sinks, diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 939a135..18aed93 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -295,7 +295,7 @@ afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent, { int ret = -1; - ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL); + ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL, NULL); return ret; } -- 1.8.3.1