From d77a9d1b2ba6aa582685d14e4301e4ea888f34bd Mon Sep 17 00:00:00 2001 From: Anuradha Talur Date: Fri, 26 Jun 2015 11:40:00 +0530 Subject: [PATCH 146/190] cluster/afr : set pending xattrs for replaced brick Backport of: http://review.gluster.org/10448/ Patch for bug 1140649 was already merged downstream, commit-id : a4f226f50a2ef943b5db095877bab5a3eebf7283 . There were a few amendments to be made. This patch contains delta changes b/w : 1) http://review.gluster.org/10448/ & 2) https://code.engineering.redhat.com/gerrit/#/c/51021/ Change-Id: Iad0c89578706b0b7e424b271977908e830c5236b BUG: 1140649 Signed-off-by: Anuradha Talur Reviewed-on: https://code.engineering.redhat.com/gerrit/51660 Reviewed-by: Ravishankar Narayanankutty Tested-by: Ravishankar Narayanankutty --- tests/basic/afr/replace-brick-self-heal.t | 33 +++++---- xlators/cluster/afr/src/afr-inode-write.c | 102 ++++++++----------------- xlators/cluster/afr/src/afr-messages.h | 12 +++- xlators/cluster/afr/src/afr-self-heal-entry.c | 7 ++ 4 files changed, 69 insertions(+), 85 deletions(-) diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t index 1901466..8ced7df 100644 --- a/tests/basic/afr/replace-brick-self-heal.t +++ b/tests/basic/afr/replace-brick-self-heal.t @@ -3,16 +3,6 @@ . $(dirname $0)/../../volume.rc cleanup; -function match_dirs { - diff <(ls $1 | sort) <(ls $2 | sort) - if [ $? -eq 0 ]; - then - echo "Y" - else - echo "N" - fi -} - TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} @@ -24,12 +14,15 @@ TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 self-heal-daemon off TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; -#Create files +# Create files for i in {1..5} do echo $i > $M0/file$i.txt done +# Metadata changes +TEST setfattr -n user.test -v qwerty $M0/file5.txt + # Replace brick1 TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force @@ -45,15 +38,27 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 TEST $CLI volume set $V0 self-heal-daemon on EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 -# Check if heal has happened -EXPECT_WITHIN $HEAL_TIMEOUT "Y" match_dirs $B0/${V0}0 $B0/${V0}1_new +# Wait for heal to complete +EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0 -# To make sure that data was not lost from brick0 +# Check if entry-heal has happened +TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort) + +# To make sure that files were not lost from brick0 TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort) EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0 # Test if data was healed +TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt +# To make sure that data was not lost from brick0 TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt + +# Test if metadata was healed and exists on both the bricks +EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt +EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt + cleanup; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 5d32927..8800b81 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -38,6 +38,7 @@ #include "byte-order.h" #include "afr-transaction.h" #include "afr-self-heal.h" +#include "afr-messages.h" static void __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this) @@ -979,42 +980,26 @@ afr_rb_set_pending_changelog_cbk (call_frame_t *frame, void *cookie, { afr_local_t *local = NULL; + afr_private_t *priv = NULL; int i = 0; local = frame->local; + priv = this->private; i = (long) cookie; local->replies[i].valid = 1; local->replies[i].op_ret = op_ret; local->replies[i].op_errno = op_errno; + gf_msg (this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO, + op_ret ? op_errno : 0, + AFR_MSG_REPLACE_BRICK_STATUS, "Set of pending xattr %s on" + " %s.", op_ret ? "failed" : "succeeded", + priv->children[i]->name); syncbarrier_wake (&local->barrier); return 0; } -char * -afr_opret_matrix_generate (afr_private_t *priv, afr_local_t *local) -{ - char *matrix = NULL; - char *ptr = NULL; - int i = 0; - - /* Allocate max amount of chars required, including -ve values - * and spaces */ - matrix = GF_CALLOC (priv->child_count, 3 * sizeof (char), - gf_afr_mt_char); - if (!matrix) - return NULL; - ptr = matrix; - for (i = 0; i < priv->child_count; i++) { - if (local->replies[i].valid) - ptr += sprintf (ptr, "%d ", local->replies[i].op_ret); - else - ptr += sprintf (ptr, "-1 "); - } - return matrix; -} - int afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this, unsigned char *locked_nodes) @@ -1022,7 +1007,6 @@ afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this, afr_local_t *local = NULL; afr_private_t *priv = NULL; int ret = 0, i = 0; - char *matrix = NULL; local = frame->local; priv = this->private; @@ -1033,22 +1017,18 @@ afr_rb_set_pending_changelog (call_frame_t *frame, xlator_t *this, /* It is sufficient if xattrop was successful on one child */ for (i = 0; i < priv->child_count; i++) { - if (local->replies[i].valid && - local->replies[i].op_ret == 0) { - matrix = afr_opret_matrix_generate (priv, local); - gf_log (this->name, GF_LOG_DEBUG, "Successfully set " - "pending changelog. op_ret matrix : [ %s].", - matrix); + if (!local->replies[i].valid) + continue; + + if (local->replies[i].op_ret == 0) { ret = 0; goto out; + } else { + ret = afr_higher_errno (ret, + local->replies[i].op_errno); } - ret = afr_higher_errno (ret, local->replies[i].op_errno); } - gf_log (this->name, GF_LOG_ERROR, "Couldn't set pending xattr " - "on any child. (%s)", strerror (ret)); out: - if (matrix) - GF_FREE (matrix); return -ret; } @@ -1060,9 +1040,6 @@ _afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame, afr_local_t *local = NULL; afr_private_t *priv = NULL; unsigned char *locked_nodes = NULL; - struct gf_flock flock = {0, }; - struct gf_flock unflock = {0, }; - int i = 0; int count = 0; int ret = -ENOMEM; int idx = -1; @@ -1079,10 +1056,7 @@ _afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame, if (!local->pending) goto out; - for (i = 0; i < priv->child_count; i++) { - if (i == rb_index) - local->pending[i][idx] = hton32 (1); - } + local->pending[rb_index][idx] = hton32 (1); local->xdata_req = dict_new (); if (!local->xdata_req) @@ -1093,16 +1067,13 @@ _afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame, goto out; if (AFR_ENTRY_TRANSACTION == type) { - AFR_SEQ (frame, afr_selfheal_lock_cbk, entrylk, this->name, - loc, NULL, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL); + count = afr_selfheal_entrylk (frame, this, loc->inode, + this->name, NULL, locked_nodes); } else { - flock.l_type = F_WRLCK; - flock.l_start = LLONG_MAX - 1; - flock.l_len = 0; - AFR_SEQ (frame, afr_selfheal_lock_cbk, inodelk, this->name, - loc, F_SETLKW, &flock, NULL); + count = afr_selfheal_inodelk (frame, this, loc->inode, + this->name, LLONG_MAX - 1, 0, + locked_nodes); } - count = afr_locked_fill (frame, this, locked_nodes); if (!count) { gf_log (this->name, GF_LOG_ERROR, "Couldn't acquire lock on" @@ -1117,15 +1088,11 @@ _afr_handle_replace_brick_type (xlator_t *this, call_frame_t *frame, ret = 0; unlock: if (AFR_ENTRY_TRANSACTION == type) { - AFR_ONLIST (locked_nodes, frame, afr_selfheal_lock_cbk, - entrylk, this->name, loc, NULL, ENTRYLK_UNLOCK, - ENTRYLK_WRLCK, NULL); + afr_selfheal_unentrylk (frame, this, loc->inode, this->name, + NULL, locked_nodes); } else { - unflock.l_type = F_UNLCK; - unflock.l_start = LLONG_MAX - 1; - unflock.l_len = 0; - AFR_ONLIST (locked_nodes, frame, afr_selfheal_lock_cbk, - inodelk, this->name, loc, F_SETLK, &unflock, NULL); + afr_selfheal_uninodelk (frame, this, loc->inode, this->name, + LLONG_MAX - 1, 0, locked_nodes); } out: return ret; @@ -1162,6 +1129,8 @@ _afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, dict_unref (local->xdata_req); afr_matrix_cleanup (local->pending, priv->child_count); + local->pending = NULL; + local->xdata_req = NULL; ret = _afr_handle_replace_brick_type (this, frame, loc, rb_index, AFR_ENTRY_TRANSACTION); @@ -1379,21 +1348,14 @@ int afr_handle_replace_brick (xlator_t *this, call_frame_t *frame, loc_t *loc, dict_t *dict) { - int len = 0; int ret = -1; int rb_index = -1; - int op_errno = EPERM; - void *value = NULL; char *replace_brick = NULL; - ret = dict_get_ptr_and_len (dict, GF_AFR_REPLACE_BRICK, &value, - &len); - - if (value) { - replace_brick = alloca0 (len + 1); - memcpy (replace_brick, value, len); + ret = dict_get_str (dict, GF_AFR_REPLACE_BRICK, &replace_brick); - if (!(frame->root->pid == GF_CLIENT_PID_AFR_SELF_HEALD)) { + if (!ret) { + if (frame->root->pid != GF_CLIENT_PID_AFR_SELF_HEALD) { ret = 1; goto out; } @@ -1412,8 +1374,8 @@ out: if (ret == 1) { gf_log (this->name, GF_LOG_ERROR, "'%s' is an internal" " extended attribute : %s.", - GF_AFR_REPLACE_BRICK, strerror (op_errno)); - AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + GF_AFR_REPLACE_BRICK, strerror (EPERM)); + AFR_STACK_UNWIND (setxattr, frame, -1, EPERM, NULL); ret = 0; } return ret; diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index 52bdead..66b8ed1 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -45,7 +45,7 @@ */ #define GLFS_COMP_BASE_AFR GLFS_MSGID_COMP_AFR -#define GLFS_NUM_MESSAGES 10 +#define GLFS_NUM_MESSAGES 11 #define GLFS_MSGID_END (GLFS_COMP_BASE_AFR + GLFS_NUM_MESSAGES + 1) #define glfs_msg_start_x GLFS_COMP_BASE_AFR, "Invalid: Start of messages" @@ -142,6 +142,16 @@ #define AFR_MSG_INODE_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10) +/*! + * @messageid 108011 + * @diagnosis Setting of pending xattrs succeeded/failed during replace-brick + * operation. + * @recommendedaction In case of failure, error number in the log should give + * the reason why it failed. Also observe brick logs for more information. +*/ +#define AFR_MSG_REPLACE_BRICK_STATUS (GLFS_COMP_BASE_AFR + 11) + + #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_AFR_MESSAGES_H_ */ diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index e64b6e4..12da920 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -295,6 +295,13 @@ __afr_selfheal_merge_dirent (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } + /* Set all the sources as 1, otheriwse newentry_mark won't be set */ + for (i = 0; i < priv->child_count; i++) { + if (replies[i].valid && replies[i].op_ret == 0) { + sources[i] = 1; + } + } + /* In case of a gfid or type mismatch on the entry, return -1.*/ ret = afr_selfheal_detect_gfid_and_type_mismatch (this, replies, fd->inode->gfid, -- 1.7.1