From 231cea9678734044d2294e74d3742e61058bc760 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Fri, 27 May 2016 15:47:07 +0530 Subject: [PATCH 175/178] cluster/afr: Unwind xdata_rsp even in case of failures DHT expects GF_PREOP_CHECK_FAILED to be present in xdata_rsp in case of mkdir failures because of stale layout. But AFR was unwinding null xdata_rsp in case of failures. This was leading to mkdir failures just after remove-brick. Unwind the xdata_rsp in case of failures to make sure the response from brick reaches dht. >BUG: 1340623 >Change-Id: Idd3f7b95730e8ea987b608e892011ff190e181d1 >Signed-off-by: Pranith Kumar K >Reviewed-on: http://review.gluster.org/14553 >NetBSD-regression: NetBSD Build System >Reviewed-by: Ravishankar N >Smoke: Gluster Build System >CentOS-regression: Gluster Build System >Reviewed-by: Anuradha Talur >Reviewed-by: Krutika Dhananjay BUG: 1340085 Change-Id: I7fb653cea3e888de9f0db6f96a32156ace3c3fea Signed-off-by: Pranith Kumar K Reviewed-on: https://code.engineering.redhat.com/gerrit/75455 --- xlators/cluster/afr/src/afr-common.c | 29 +++++++++--- xlators/cluster/afr/src/afr-dir-read.c | 3 +- xlators/cluster/afr/src/afr-dir-write.c | 20 ++++++--- xlators/cluster/afr/src/afr-inode-write.c | 6 +- xlators/cluster/afr/src/afr-transaction.c | 71 +++++++++++++++++++++++++++++ xlators/cluster/afr/src/afr-transaction.h | 4 ++ xlators/cluster/afr/src/afr.h | 8 ++- 7 files changed, 120 insertions(+), 21 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 3bed0c5..605c641 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1274,6 +1274,7 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this, int afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, + unsigned char *readables, int *event_p, afr_transaction_type type, afr_read_subvol_args_t *args) { @@ -1310,6 +1311,9 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, *subvol_p = subvol; if (event_p) *event_p = event; + if (readables) + memcpy (readables, readable, + sizeof (*readables) * priv->child_count); return subvol; } @@ -1439,6 +1443,7 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) GF_FREE (local->read_attempted); GF_FREE (local->readable); + GF_FREE (local->readable2); if (local->inode) inode_unref (local->inode); @@ -1598,8 +1603,8 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent, priv = this->private; if (parent) - par_read_subvol = afr_data_subvol_get (parent, this, 0, 0, - NULL); + par_read_subvol = afr_data_subvol_get (parent, this, NULL, NULL, + NULL, NULL); for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) @@ -1638,8 +1643,7 @@ afr_read_subvol_decide (inode_t *inode, xlator_t *this, int data_subvol = -1; int mdata_subvol = -1; - data_subvol = afr_data_subvol_get (inode, this, - 0, 0, args); + data_subvol = afr_data_subvol_get (inode, this, NULL, NULL, NULL, args); mdata_subvol = afr_metadata_subvol_get (inode, this, 0, 0, args); if (data_subvol == -1 || mdata_subvol == -1) @@ -1787,7 +1791,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) goto cant_interpret; } else { read_subvol = afr_data_subvol_get (local->inode, this, - 0, 0, &args); + NULL, NULL, NULL, &args); } } else { cant_interpret: @@ -2414,7 +2418,7 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req return 0; } - afr_read_subvol_get (loc->inode, this, NULL, &event, + afr_read_subvol_get (loc->inode, this, NULL, NULL, &event, AFR_DATA_TRANSACTION, NULL); if (event != local->event_generation) @@ -2565,7 +2569,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) } } - afr_read_subvol_get (loc->parent, this, NULL, &event, + afr_read_subvol_get (loc->parent, this, NULL, NULL, &event, AFR_DATA_TRANSACTION, NULL); if (event != local->event_generation) @@ -2888,7 +2892,8 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; - read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL); + read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL, + NULL, NULL); LOCK (&frame->lock); { @@ -4288,6 +4293,14 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) goto out; } + local->readable2 = GF_CALLOC (priv->child_count, sizeof (char), + gf_afr_mt_char); + if (!local->readable2) { + if (op_errno) + *op_errno = ENOMEM; + goto out; + } + local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies), gf_afr_mt_reply_t); if (!local->replies) { diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 559211e..f7da55f 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -158,7 +158,8 @@ afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol) * -1 above due to gen being 0, which is why it is OK to pass NULL for * read_subvol_args here. */ - entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0, NULL); + entry_read_subvol = afr_data_subvol_get (inode, this, NULL, NULL, + NULL, NULL); if (entry_read_subvol != par_read_subvol) return -1; diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index b1ff557..bd24276 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -106,18 +106,21 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) if (local->inode) { afr_replies_interpret (frame, this, local->inode, NULL); inode_read_subvol = afr_data_subvol_get (local->inode, this, - NULL, NULL, &args); + NULL, NULL, NULL, &args); } if (local->parent) parent_read_subvol = afr_data_subvol_get (local->parent, this, - NULL, NULL, NULL); + NULL, local->readable, NULL, NULL); + if (local->parent2) parent2_read_subvol = afr_data_subvol_get (local->parent2, this, - NULL, NULL, NULL); + NULL, local->readable2, NULL, NULL); local->op_ret = -1; local->op_errno = afr_final_errno (local, priv); + afr_pick_error_xdata (local, priv, local->parent, local->readable, + local->parent2, local->readable2); for (i = 0; i < priv->child_count; i++) { if (!local->replies[i].valid) @@ -149,6 +152,11 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) local->replies[i].preparent2; local->cont.dir_fop.postnewparent = local->replies[i].postparent2; + if (local->xdata_rsp) { + dict_unref (local->xdata_rsp); + local->xdata_rsp = NULL; + } + if (local->replies[i].xdata) local->xdata_rsp = dict_ref (local->replies[i].xdata); @@ -201,6 +209,9 @@ __afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index, local->replies[child_index].valid = 1; local->replies[child_index].op_ret = op_ret; local->replies[child_index].op_errno = op_errno; + if (xdata) + local->replies[child_index].xdata = dict_ref (xdata); + if (op_ret >= 0) { if (poststat) @@ -213,9 +224,6 @@ __afr_dir_write_fill (call_frame_t *frame, xlator_t *this, int child_index, local->replies[child_index].preparent2 = *preparent2; if (postparent2) local->replies[child_index].postparent2 = *postparent2; - if (xdata) - local->replies[child_index].xdata = dict_ref (xdata); - if (fd_ctx) fd_ctx->opened_on[child_index] = AFR_FD_OPENED; } else { diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index baa5871..9206681 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -89,7 +89,7 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this) &args); else read_subvol = afr_data_subvol_get (local->inode, this, - NULL, NULL, &args); + NULL, NULL, NULL, &args); } local->op_ret = -1; @@ -169,8 +169,8 @@ __afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index, local->replies[child_index].poststat = *postbuf; if (xattr) local->replies[child_index].xattr = dict_ref (xattr); - if (xdata) - local->replies[child_index].xdata = dict_ref (xdata); + if (xdata) + local->replies[child_index].xdata = dict_ref (xdata); } else { afr_transaction_fop_failed (frame, this, child_index); } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 8b667c9..316c7cd 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -51,6 +51,66 @@ afr_zero_fill_stat (afr_local_t *local) } } +/* In case of errors afr needs to choose which xdata from lower xlators it needs + * to unwind with. The way it is done is by checking if there are + * any good subvols which failed. Give preference to errnos other than + * ENOTCONN even if the child is source */ +void +afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv, + inode_t *inode1, unsigned char *readable1, + inode_t *inode2, unsigned char *readable2) +{ + int s = -1;/*selection*/ + int i = 0; + unsigned char *readable = NULL; + + if (local->xdata_rsp) { + dict_unref (local->xdata_rsp); + local->xdata_rsp = NULL; + } + + readable = alloca0 (priv->child_count * sizeof (*readable)); + if (inode2 && readable2) {/*rename fop*/ + AFR_INTERSECT (readable, readable1, readable2, + priv->child_count); + } else { + memcpy (readable, readable1, + sizeof (*readable) * priv->child_count); + } + + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + + if (local->replies[i].op_ret >= 0) + continue; + + if (local->replies[i].op_errno == ENOTCONN) + continue; + + /*Order is important in the following condition*/ + if ((s < 0) || (!readable[s] && readable[i])) + s = i; + } + + if (s != -1 && local->replies[s].xdata) { + local->xdata_rsp = dict_ref (local->replies[s].xdata); + } else if (s == -1) { + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + + if (local->replies[i].op_ret >= 0) + continue; + + if (!local->replies[i].xdata) + continue; + local->xdata_rsp = dict_ref (local->replies[i].xdata); + break; + } + } +} + gf_boolean_t afr_needs_changelog_update (afr_local_t *local) { @@ -741,6 +801,17 @@ afr_handle_quorum (call_frame_t *frame) local->op_errno = afr_final_errno (local, priv); if (local->op_errno == 0) local->op_errno = afr_quorum_errno (priv); + switch (local->transaction.type) { + case AFR_ENTRY_TRANSACTION: + case AFR_ENTRY_RENAME_TRANSACTION: + afr_pick_error_xdata (local, priv, local->parent, + local->readable, local->parent2, + local->readable2); + break; + default: + /*TBD*/ + break; + } } int diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index c58531e..ca8fcfe 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -55,4 +55,8 @@ gf_boolean_t afr_has_quorum (unsigned char *subvols, xlator_t *this); gf_boolean_t afr_needs_changelog_update (afr_local_t *local); void afr_zero_fill_stat (afr_local_t *local); +void +afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv, + inode_t *inode1, unsigned char *readable1, + inode_t *inode2, unsigned char *readable2); #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 6370577..63be1fb 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -417,6 +417,7 @@ typedef struct _afr_local { performed. This is the output of afr_inode_refresh() */ unsigned char *readable; + unsigned char *readable2; /*For rename transaction*/ afr_inode_refresh_cbk_t refreshfn; @@ -836,14 +837,15 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this, int type); int afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, + unsigned char *readables, int *event_p, afr_transaction_type type, afr_read_subvol_args_t *args); -#define afr_data_subvol_get(i, t, s, e, a) \ - afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION, a) +#define afr_data_subvol_get(i, t, s, r, e, a) \ + afr_read_subvol_get(i, t, s, r, e, AFR_DATA_TRANSACTION, a) #define afr_metadata_subvol_get(i, t, s, e, a) \ - afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION, a) + afr_read_subvol_get(i, t, s, NULL, e, AFR_METADATA_TRANSACTION, a) int afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, -- 1.7.1