14f8ab
From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
14f8ab
From: karthik-us <ksubrahm@redhat.com>
14f8ab
Date: Wed, 23 Dec 2020 14:57:51 +0530
14f8ab
Subject: [PATCH 514/517] afr: event gen changes
14f8ab
14f8ab
The general idea of the changes is to prevent resetting event generation
14f8ab
to zero in the inode ctx, since event gen is something that should
14f8ab
follow 'causal order'.
14f8ab
14f8ab
Change #1:
14f8ab
For a read txn, in inode refresh cbk, if event_generation is
14f8ab
found zero, we are failing the read fop. This is not needed
14f8ab
because change in event gen is only a marker for the next inode refresh to
14f8ab
happen and should not be taken into account by the current read txn.
14f8ab
14f8ab
Change #2:
14f8ab
The event gen being zero above can happen if there is a racing lookup,
14f8ab
which resets even get (in afr_lookup_done) if there are non zero afr
14f8ab
xattrs. The resetting is done only to trigger an inode refresh and a
14f8ab
possible client side heal on the next lookup. That can be acheived by
14f8ab
setting the need_refresh flag in the inode ctx. So replaced all
14f8ab
occurences of resetting even gen to zero with a call to
14f8ab
afr_inode_need_refresh_set().
14f8ab
14f8ab
Change #3:
14f8ab
In both lookup and discover path, we are doing an inode refresh which is
14f8ab
not required since all 3 essentially do the same thing- update the inode
14f8ab
ctx with the good/bad copies from the brick replies. Inode refresh also
14f8ab
triggers background heals, but I think it is okay to do it when we call
14f8ab
refresh during the read and write txns and not in the lookup path.
14f8ab
14f8ab
The .ts which relied on inode refresh in lookup path to trigger heals are
14f8ab
now changed to do read txn so that inode refresh and the heal happens.
14f8ab
14f8ab
Upstream patch details:
14f8ab
> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
14f8ab
> Fixes: #1179
14f8ab
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
14f8ab
> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
14f8ab
Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
14f8ab
14f8ab
BUG: 1640148
14f8ab
Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
14f8ab
Signed-off-by: karthik-us <ksubrahm@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
14f8ab
---
14f8ab
 ...fid-mismatch-resolution-with-fav-child-policy.t |  8 +-
14f8ab
 xlators/cluster/afr/src/afr-common.c               | 92 +++++-----------------
14f8ab
 xlators/cluster/afr/src/afr-dir-write.c            |  6 +-
14f8ab
 xlators/cluster/afr/src/afr.h                      |  5 +-
14f8ab
 4 files changed, 29 insertions(+), 82 deletions(-)
14f8ab
14f8ab
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
14f8ab
index f4aa351..12af0c8 100644
14f8ab
--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
14f8ab
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
14f8ab
@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
14f8ab
 #We know that second brick has the bigger size file
14f8ab
 BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\  -f1)
14f8ab
 
14f8ab
-TEST ls $M0/f3
14f8ab
-TEST cat $M0/f3
14f8ab
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
14f8ab
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
14f8ab
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
14f8ab
 
14f8ab
 #gfid split-brain should be resolved
14f8ab
@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
14f8ab
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
14f8ab
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
14f8ab
 
14f8ab
-TEST ls $M0/f4
14f8ab
-TEST cat $M0/f4
14f8ab
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
14f8ab
+TEST cat $M0/f4  #Trigger data heal via readv inode refresh
14f8ab
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
14f8ab
 
14f8ab
 #gfid split-brain should be resolved
14f8ab
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
14f8ab
index fca2cd5..90b4f14 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-common.c
14f8ab
+++ b/xlators/cluster/afr/src/afr-common.c
14f8ab
@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
14f8ab
                 metadatamap |= (1 << index);
14f8ab
             }
14f8ab
             if (metadatamap_old != metadatamap) {
14f8ab
-                event = 0;
14f8ab
+                __afr_inode_need_refresh_set(inode, this);
14f8ab
             }
14f8ab
             break;
14f8ab
 
14f8ab
@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
14f8ab
                 datamap |= (1 << index);
14f8ab
             }
14f8ab
             if (datamap_old != datamap)
14f8ab
-                event = 0;
14f8ab
+                __afr_inode_need_refresh_set(inode, this);
14f8ab
             break;
14f8ab
 
14f8ab
         default:
14f8ab
@@ -461,34 +461,6 @@ out:
14f8ab
 }
14f8ab
 
14f8ab
 int
14f8ab
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
14f8ab
-{
14f8ab
-    int ret = -1;
14f8ab
-    uint16_t datamap = 0;
14f8ab
-    uint16_t metadatamap = 0;
14f8ab
-    uint32_t event = 0;
14f8ab
-    uint64_t val = 0;
14f8ab
-    afr_inode_ctx_t *ctx = NULL;
14f8ab
-
14f8ab
-    ret = __afr_inode_ctx_get(this, inode, &ctx;;
14f8ab
-    if (ret)
14f8ab
-        return ret;
14f8ab
-
14f8ab
-    val = ctx->read_subvol;
14f8ab
-
14f8ab
-    metadatamap = (val & 0x000000000000ffff) >> 0;
14f8ab
-    datamap = (val & 0x00000000ffff0000) >> 16;
14f8ab
-    event = 0;
14f8ab
-
14f8ab
-    val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
14f8ab
-          (((uint64_t)event) << 32);
14f8ab
-
14f8ab
-    ctx->read_subvol = val;
14f8ab
-
14f8ab
-    return ret;
14f8ab
-}
14f8ab
-
14f8ab
-int
14f8ab
 __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
14f8ab
                             unsigned char *metadata, int *event_p)
14f8ab
 {
14f8ab
@@ -559,22 +531,6 @@ out:
14f8ab
 }
14f8ab
 
14f8ab
 int
14f8ab
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
14f8ab
-{
14f8ab
-    afr_private_t *priv = NULL;
14f8ab
-    int ret = -1;
14f8ab
-
14f8ab
-    priv = this->private;
14f8ab
-
14f8ab
-    if (priv->child_count <= 16)
14f8ab
-        ret = __afr_inode_event_gen_reset_small(inode, this);
14f8ab
-    else
14f8ab
-        ret = -1;
14f8ab
-
14f8ab
-    return ret;
14f8ab
-}
14f8ab
-
14f8ab
-int
14f8ab
 afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
14f8ab
                           unsigned char *metadata, int *event_p)
14f8ab
 {
14f8ab
@@ -723,30 +679,22 @@ out:
14f8ab
     return need_refresh;
14f8ab
 }
14f8ab
 
14f8ab
-static int
14f8ab
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
14f8ab
+int
14f8ab
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
14f8ab
 {
14f8ab
     int ret = -1;
14f8ab
     afr_inode_ctx_t *ctx = NULL;
14f8ab
 
14f8ab
-    GF_VALIDATE_OR_GOTO(this->name, inode, out);
14f8ab
-
14f8ab
-    LOCK(&inode->lock);
14f8ab
-    {
14f8ab
-        ret = __afr_inode_ctx_get(this, inode, &ctx;;
14f8ab
-        if (ret)
14f8ab
-            goto unlock;
14f8ab
-
14f8ab
+    ret = __afr_inode_ctx_get(this, inode, &ctx;;
14f8ab
+    if (ret == 0) {
14f8ab
         ctx->need_refresh = _gf_true;
14f8ab
     }
14f8ab
-unlock:
14f8ab
-    UNLOCK(&inode->lock);
14f8ab
-out:
14f8ab
+
14f8ab
     return ret;
14f8ab
 }
14f8ab
 
14f8ab
 int
14f8ab
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
14f8ab
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
14f8ab
 {
14f8ab
     int ret = -1;
14f8ab
 
14f8ab
@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
14f8ab
 
14f8ab
     LOCK(&inode->lock);
14f8ab
     {
14f8ab
-        ret = __afr_inode_event_gen_reset(inode, this);
14f8ab
+        ret = __afr_inode_need_refresh_set(inode, this);
14f8ab
     }
14f8ab
     UNLOCK(&inode->lock);
14f8ab
 out:
14f8ab
@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
14f8ab
     ret = afr_inode_get_readable(frame, inode, this, local->readable,
14f8ab
                                  &event_generation, local->transaction.type);
14f8ab
 
14f8ab
-    if (ret == -EIO || (local->is_read_txn && !event_generation)) {
14f8ab
+    if (ret == -EIO) {
14f8ab
         /* No readable subvolume even after refresh ==> splitbrain.*/
14f8ab
         if (!priv->fav_child_policy) {
14f8ab
             err = EIO;
14f8ab
@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
14f8ab
         if (read_subvol == -1)
14f8ab
             goto cant_interpret;
14f8ab
         if (ret) {
14f8ab
-            afr_inode_event_gen_reset(local->inode, this);
14f8ab
+            afr_inode_need_refresh_set(local->inode, this);
14f8ab
             dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
14f8ab
         }
14f8ab
     } else {
14f8ab
@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
14f8ab
     afr_private_t *priv = NULL;
14f8ab
     afr_local_t *local = NULL;
14f8ab
     int read_subvol = -1;
14f8ab
+    int ret = 0;
14f8ab
     unsigned char *data_readable = NULL;
14f8ab
     unsigned char *success_replies = NULL;
14f8ab
 
14f8ab
@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
14f8ab
     if (!afr_has_quorum(success_replies, this, frame))
14f8ab
         goto unwind;
14f8ab
 
14f8ab
-    afr_replies_interpret(frame, this, local->inode, NULL);
14f8ab
+    ret = afr_replies_interpret(frame, this, local->inode, NULL);
14f8ab
+    if (ret) {
14f8ab
+        afr_inode_need_refresh_set(local->inode, this);
14f8ab
+    }
14f8ab
 
14f8ab
     read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
14f8ab
                                          data_readable);
14f8ab
@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
14f8ab
     afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
14f8ab
                         AFR_DATA_TRANSACTION, NULL);
14f8ab
 
14f8ab
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
14f8ab
-                                  local->event_generation))
14f8ab
-        afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
14f8ab
-    else
14f8ab
-        afr_discover_do(frame, this, 0);
14f8ab
+    afr_discover_do(frame, this, 0);
14f8ab
 
14f8ab
     return 0;
14f8ab
 out:
14f8ab
@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
14f8ab
     afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
14f8ab
                         AFR_DATA_TRANSACTION, NULL);
14f8ab
 
14f8ab
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
14f8ab
-                                  local->event_generation))
14f8ab
-        afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
14f8ab
-    else
14f8ab
-        afr_lookup_do(frame, this, 0);
14f8ab
+    afr_lookup_do(frame, this, 0);
14f8ab
 
14f8ab
     return 0;
14f8ab
 out:
14f8ab
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
14f8ab
index 416c19d..d419bfc 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-dir-write.c
14f8ab
+++ b/xlators/cluster/afr/src/afr-dir-write.c
14f8ab
@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
14f8ab
             continue;
14f8ab
         if (local->replies[i].op_ret < 0) {
14f8ab
             if (local->inode)
14f8ab
-                afr_inode_event_gen_reset(local->inode, this);
14f8ab
+                afr_inode_need_refresh_set(local->inode, this);
14f8ab
             if (local->parent)
14f8ab
-                afr_inode_event_gen_reset(local->parent, this);
14f8ab
+                afr_inode_need_refresh_set(local->parent, this);
14f8ab
             if (local->parent2)
14f8ab
-                afr_inode_event_gen_reset(local->parent2, this);
14f8ab
+                afr_inode_need_refresh_set(local->parent2, this);
14f8ab
             continue;
14f8ab
         }
14f8ab
 
14f8ab
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
14f8ab
index ed5096e..3a2b26d 100644
14f8ab
--- a/xlators/cluster/afr/src/afr.h
14f8ab
+++ b/xlators/cluster/afr/src/afr.h
14f8ab
@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
14f8ab
                           int event_generation);
14f8ab
 
14f8ab
 int
14f8ab
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
14f8ab
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
14f8ab
+
14f8ab
+int
14f8ab
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
14f8ab
 
14f8ab
 int
14f8ab
 afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab