|
|
b7d4d7 |
From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
|
|
|
b7d4d7 |
From: karthik-us <ksubrahm@redhat.com>
|
|
|
b7d4d7 |
Date: Wed, 23 Dec 2020 14:57:51 +0530
|
|
|
b7d4d7 |
Subject: [PATCH 514/517] afr: event gen changes
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
The general idea of the changes is to prevent resetting event generation
|
|
|
b7d4d7 |
to zero in the inode ctx, since event gen is something that should
|
|
|
b7d4d7 |
follow 'causal order'.
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
Change #1:
|
|
|
b7d4d7 |
For a read txn, in inode refresh cbk, if event_generation is
|
|
|
b7d4d7 |
found zero, we are failing the read fop. This is not needed
|
|
|
b7d4d7 |
because change in event gen is only a marker for the next inode refresh to
|
|
|
b7d4d7 |
happen and should not be taken into account by the current read txn.
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
Change #2:
|
|
|
b7d4d7 |
The event gen being zero above can happen if there is a racing lookup,
|
|
|
b7d4d7 |
which resets even get (in afr_lookup_done) if there are non zero afr
|
|
|
b7d4d7 |
xattrs. The resetting is done only to trigger an inode refresh and a
|
|
|
b7d4d7 |
possible client side heal on the next lookup. That can be acheived by
|
|
|
b7d4d7 |
setting the need_refresh flag in the inode ctx. So replaced all
|
|
|
b7d4d7 |
occurences of resetting even gen to zero with a call to
|
|
|
b7d4d7 |
afr_inode_need_refresh_set().
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
Change #3:
|
|
|
b7d4d7 |
In both lookup and discover path, we are doing an inode refresh which is
|
|
|
b7d4d7 |
not required since all 3 essentially do the same thing- update the inode
|
|
|
b7d4d7 |
ctx with the good/bad copies from the brick replies. Inode refresh also
|
|
|
b7d4d7 |
triggers background heals, but I think it is okay to do it when we call
|
|
|
b7d4d7 |
refresh during the read and write txns and not in the lookup path.
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
The .ts which relied on inode refresh in lookup path to trigger heals are
|
|
|
b7d4d7 |
now changed to do read txn so that inode refresh and the heal happens.
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
Upstream patch details:
|
|
|
b7d4d7 |
> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
|
|
|
b7d4d7 |
> Fixes: #1179
|
|
|
b7d4d7 |
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
|
|
|
b7d4d7 |
> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
|
|
|
b7d4d7 |
Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
BUG: 1640148
|
|
|
b7d4d7 |
Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
|
|
|
b7d4d7 |
Signed-off-by: karthik-us <ksubrahm@redhat.com>
|
|
|
b7d4d7 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
|
|
|
b7d4d7 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
b7d4d7 |
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
|
|
|
b7d4d7 |
---
|
|
|
b7d4d7 |
...fid-mismatch-resolution-with-fav-child-policy.t | 8 +-
|
|
|
b7d4d7 |
xlators/cluster/afr/src/afr-common.c | 92 +++++-----------------
|
|
|
b7d4d7 |
xlators/cluster/afr/src/afr-dir-write.c | 6 +-
|
|
|
b7d4d7 |
xlators/cluster/afr/src/afr.h | 5 +-
|
|
|
b7d4d7 |
4 files changed, 29 insertions(+), 82 deletions(-)
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
|
|
|
b7d4d7 |
index f4aa351..12af0c8 100644
|
|
|
b7d4d7 |
--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
|
|
|
b7d4d7 |
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
|
|
|
b7d4d7 |
@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
|
|
|
b7d4d7 |
#We know that second brick has the bigger size file
|
|
|
b7d4d7 |
BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1)
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
-TEST ls $M0/f3
|
|
|
b7d4d7 |
-TEST cat $M0/f3
|
|
|
b7d4d7 |
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
|
|
|
b7d4d7 |
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
|
|
|
b7d4d7 |
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
#gfid split-brain should be resolved
|
|
|
b7d4d7 |
@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
|
|
|
b7d4d7 |
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
|
|
|
b7d4d7 |
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
-TEST ls $M0/f4
|
|
|
b7d4d7 |
-TEST cat $M0/f4
|
|
|
b7d4d7 |
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
|
|
|
b7d4d7 |
+TEST cat $M0/f4 #Trigger data heal via readv inode refresh
|
|
|
b7d4d7 |
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
#gfid split-brain should be resolved
|
|
|
b7d4d7 |
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
|
|
|
b7d4d7 |
index fca2cd5..90b4f14 100644
|
|
|
b7d4d7 |
--- a/xlators/cluster/afr/src/afr-common.c
|
|
|
b7d4d7 |
+++ b/xlators/cluster/afr/src/afr-common.c
|
|
|
b7d4d7 |
@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
|
|
|
b7d4d7 |
metadatamap |= (1 << index);
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
if (metadatamap_old != metadatamap) {
|
|
|
b7d4d7 |
- event = 0;
|
|
|
b7d4d7 |
+ __afr_inode_need_refresh_set(inode, this);
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
break;
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
|
|
|
b7d4d7 |
datamap |= (1 << index);
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
if (datamap_old != datamap)
|
|
|
b7d4d7 |
- event = 0;
|
|
|
b7d4d7 |
+ __afr_inode_need_refresh_set(inode, this);
|
|
|
b7d4d7 |
break;
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
default:
|
|
|
b7d4d7 |
@@ -461,34 +461,6 @@ out:
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
int
|
|
|
b7d4d7 |
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
|
|
|
b7d4d7 |
-{
|
|
|
b7d4d7 |
- int ret = -1;
|
|
|
b7d4d7 |
- uint16_t datamap = 0;
|
|
|
b7d4d7 |
- uint16_t metadatamap = 0;
|
|
|
b7d4d7 |
- uint32_t event = 0;
|
|
|
b7d4d7 |
- uint64_t val = 0;
|
|
|
b7d4d7 |
- afr_inode_ctx_t *ctx = NULL;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- ret = __afr_inode_ctx_get(this, inode, &ctx;;
|
|
|
b7d4d7 |
- if (ret)
|
|
|
b7d4d7 |
- return ret;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- val = ctx->read_subvol;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- metadatamap = (val & 0x000000000000ffff) >> 0;
|
|
|
b7d4d7 |
- datamap = (val & 0x00000000ffff0000) >> 16;
|
|
|
b7d4d7 |
- event = 0;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
|
|
|
b7d4d7 |
- (((uint64_t)event) << 32);
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- ctx->read_subvol = val;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- return ret;
|
|
|
b7d4d7 |
-}
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
-int
|
|
|
b7d4d7 |
__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
|
|
|
b7d4d7 |
unsigned char *metadata, int *event_p)
|
|
|
b7d4d7 |
{
|
|
|
b7d4d7 |
@@ -559,22 +531,6 @@ out:
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
int
|
|
|
b7d4d7 |
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
|
|
|
b7d4d7 |
-{
|
|
|
b7d4d7 |
- afr_private_t *priv = NULL;
|
|
|
b7d4d7 |
- int ret = -1;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- priv = this->private;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- if (priv->child_count <= 16)
|
|
|
b7d4d7 |
- ret = __afr_inode_event_gen_reset_small(inode, this);
|
|
|
b7d4d7 |
- else
|
|
|
b7d4d7 |
- ret = -1;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- return ret;
|
|
|
b7d4d7 |
-}
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
-int
|
|
|
b7d4d7 |
afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
|
|
|
b7d4d7 |
unsigned char *metadata, int *event_p)
|
|
|
b7d4d7 |
{
|
|
|
b7d4d7 |
@@ -723,30 +679,22 @@ out:
|
|
|
b7d4d7 |
return need_refresh;
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
-static int
|
|
|
b7d4d7 |
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
|
|
|
b7d4d7 |
+int
|
|
|
b7d4d7 |
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
|
|
|
b7d4d7 |
{
|
|
|
b7d4d7 |
int ret = -1;
|
|
|
b7d4d7 |
afr_inode_ctx_t *ctx = NULL;
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
- GF_VALIDATE_OR_GOTO(this->name, inode, out);
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
- LOCK(&inode->lock);
|
|
|
b7d4d7 |
- {
|
|
|
b7d4d7 |
- ret = __afr_inode_ctx_get(this, inode, &ctx;;
|
|
|
b7d4d7 |
- if (ret)
|
|
|
b7d4d7 |
- goto unlock;
|
|
|
b7d4d7 |
-
|
|
|
b7d4d7 |
+ ret = __afr_inode_ctx_get(this, inode, &ctx;;
|
|
|
b7d4d7 |
+ if (ret == 0) {
|
|
|
b7d4d7 |
ctx->need_refresh = _gf_true;
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
-unlock:
|
|
|
b7d4d7 |
- UNLOCK(&inode->lock);
|
|
|
b7d4d7 |
-out:
|
|
|
b7d4d7 |
+
|
|
|
b7d4d7 |
return ret;
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
int
|
|
|
b7d4d7 |
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
|
|
|
b7d4d7 |
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
|
|
|
b7d4d7 |
{
|
|
|
b7d4d7 |
int ret = -1;
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
LOCK(&inode->lock);
|
|
|
b7d4d7 |
{
|
|
|
b7d4d7 |
- ret = __afr_inode_event_gen_reset(inode, this);
|
|
|
b7d4d7 |
+ ret = __afr_inode_need_refresh_set(inode, this);
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
UNLOCK(&inode->lock);
|
|
|
b7d4d7 |
out:
|
|
|
b7d4d7 |
@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
|
|
|
b7d4d7 |
ret = afr_inode_get_readable(frame, inode, this, local->readable,
|
|
|
b7d4d7 |
&event_generation, local->transaction.type);
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
- if (ret == -EIO || (local->is_read_txn && !event_generation)) {
|
|
|
b7d4d7 |
+ if (ret == -EIO) {
|
|
|
b7d4d7 |
/* No readable subvolume even after refresh ==> splitbrain.*/
|
|
|
b7d4d7 |
if (!priv->fav_child_policy) {
|
|
|
b7d4d7 |
err = EIO;
|
|
|
b7d4d7 |
@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
|
|
|
b7d4d7 |
if (read_subvol == -1)
|
|
|
b7d4d7 |
goto cant_interpret;
|
|
|
b7d4d7 |
if (ret) {
|
|
|
b7d4d7 |
- afr_inode_event_gen_reset(local->inode, this);
|
|
|
b7d4d7 |
+ afr_inode_need_refresh_set(local->inode, this);
|
|
|
b7d4d7 |
dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
} else {
|
|
|
b7d4d7 |
@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
|
|
|
b7d4d7 |
afr_private_t *priv = NULL;
|
|
|
b7d4d7 |
afr_local_t *local = NULL;
|
|
|
b7d4d7 |
int read_subvol = -1;
|
|
|
b7d4d7 |
+ int ret = 0;
|
|
|
b7d4d7 |
unsigned char *data_readable = NULL;
|
|
|
b7d4d7 |
unsigned char *success_replies = NULL;
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
|
|
|
b7d4d7 |
if (!afr_has_quorum(success_replies, this, frame))
|
|
|
b7d4d7 |
goto unwind;
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
- afr_replies_interpret(frame, this, local->inode, NULL);
|
|
|
b7d4d7 |
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
|
|
|
b7d4d7 |
+ if (ret) {
|
|
|
b7d4d7 |
+ afr_inode_need_refresh_set(local->inode, this);
|
|
|
b7d4d7 |
+ }
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
|
|
|
b7d4d7 |
data_readable);
|
|
|
b7d4d7 |
@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
|
|
|
b7d4d7 |
afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
|
|
|
b7d4d7 |
AFR_DATA_TRANSACTION, NULL);
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
|
|
|
b7d4d7 |
- local->event_generation))
|
|
|
b7d4d7 |
- afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
|
|
|
b7d4d7 |
- else
|
|
|
b7d4d7 |
- afr_discover_do(frame, this, 0);
|
|
|
b7d4d7 |
+ afr_discover_do(frame, this, 0);
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
return 0;
|
|
|
b7d4d7 |
out:
|
|
|
b7d4d7 |
@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
|
|
|
b7d4d7 |
afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
|
|
|
b7d4d7 |
AFR_DATA_TRANSACTION, NULL);
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
- if (afr_is_inode_refresh_reqd(loc->inode, this, event,
|
|
|
b7d4d7 |
- local->event_generation))
|
|
|
b7d4d7 |
- afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
|
|
|
b7d4d7 |
- else
|
|
|
b7d4d7 |
- afr_lookup_do(frame, this, 0);
|
|
|
b7d4d7 |
+ afr_lookup_do(frame, this, 0);
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
return 0;
|
|
|
b7d4d7 |
out:
|
|
|
b7d4d7 |
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
|
|
|
b7d4d7 |
index 416c19d..d419bfc 100644
|
|
|
b7d4d7 |
--- a/xlators/cluster/afr/src/afr-dir-write.c
|
|
|
b7d4d7 |
+++ b/xlators/cluster/afr/src/afr-dir-write.c
|
|
|
b7d4d7 |
@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
|
|
|
b7d4d7 |
continue;
|
|
|
b7d4d7 |
if (local->replies[i].op_ret < 0) {
|
|
|
b7d4d7 |
if (local->inode)
|
|
|
b7d4d7 |
- afr_inode_event_gen_reset(local->inode, this);
|
|
|
b7d4d7 |
+ afr_inode_need_refresh_set(local->inode, this);
|
|
|
b7d4d7 |
if (local->parent)
|
|
|
b7d4d7 |
- afr_inode_event_gen_reset(local->parent, this);
|
|
|
b7d4d7 |
+ afr_inode_need_refresh_set(local->parent, this);
|
|
|
b7d4d7 |
if (local->parent2)
|
|
|
b7d4d7 |
- afr_inode_event_gen_reset(local->parent2, this);
|
|
|
b7d4d7 |
+ afr_inode_need_refresh_set(local->parent2, this);
|
|
|
b7d4d7 |
continue;
|
|
|
b7d4d7 |
}
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
|
|
|
b7d4d7 |
index ed5096e..3a2b26d 100644
|
|
|
b7d4d7 |
--- a/xlators/cluster/afr/src/afr.h
|
|
|
b7d4d7 |
+++ b/xlators/cluster/afr/src/afr.h
|
|
|
b7d4d7 |
@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
|
|
|
b7d4d7 |
int event_generation);
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
int
|
|
|
b7d4d7 |
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
|
|
|
b7d4d7 |
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
|
|
|
b7d4d7 |
+
|
|
|
b7d4d7 |
+int
|
|
|
b7d4d7 |
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
|
|
|
b7d4d7 |
|
|
|
b7d4d7 |
int
|
|
|
b7d4d7 |
afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
|
|
|
b7d4d7 |
--
|
|
|
b7d4d7 |
1.8.3.1
|
|
|
b7d4d7 |
|