b7d4d7
From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
b7d4d7
From: karthik-us <ksubrahm@redhat.com>
b7d4d7
Date: Wed, 23 Dec 2020 14:57:51 +0530
b7d4d7
Subject: [PATCH 514/517] afr: event gen changes
b7d4d7
b7d4d7
The general idea of the changes is to prevent resetting event generation
b7d4d7
to zero in the inode ctx, since event gen is something that should
b7d4d7
follow 'causal order'.
b7d4d7
b7d4d7
Change #1:
b7d4d7
For a read txn, in inode refresh cbk, if event_generation is
b7d4d7
found zero, we are failing the read fop. This is not needed
b7d4d7
because change in event gen is only a marker for the next inode refresh to
b7d4d7
happen and should not be taken into account by the current read txn.
b7d4d7
b7d4d7
Change #2:
b7d4d7
The event gen being zero above can happen if there is a racing lookup,
b7d4d7
which resets even get (in afr_lookup_done) if there are non zero afr
b7d4d7
xattrs. The resetting is done only to trigger an inode refresh and a
b7d4d7
possible client side heal on the next lookup. That can be acheived by
b7d4d7
setting the need_refresh flag in the inode ctx. So replaced all
b7d4d7
occurences of resetting even gen to zero with a call to
b7d4d7
afr_inode_need_refresh_set().
b7d4d7
b7d4d7
Change #3:
b7d4d7
In both lookup and discover path, we are doing an inode refresh which is
b7d4d7
not required since all 3 essentially do the same thing- update the inode
b7d4d7
ctx with the good/bad copies from the brick replies. Inode refresh also
b7d4d7
triggers background heals, but I think it is okay to do it when we call
b7d4d7
refresh during the read and write txns and not in the lookup path.
b7d4d7
b7d4d7
The .ts which relied on inode refresh in lookup path to trigger heals are
b7d4d7
now changed to do read txn so that inode refresh and the heal happens.
b7d4d7
b7d4d7
Upstream patch details:
b7d4d7
> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
b7d4d7
> Fixes: #1179
b7d4d7
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
b7d4d7
> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
b7d4d7
Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
b7d4d7
b7d4d7
BUG: 1640148
b7d4d7
Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
b7d4d7
Signed-off-by: karthik-us <ksubrahm@redhat.com>
b7d4d7
Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
b7d4d7
Tested-by: RHGS Build Bot <nigelb@redhat.com>
b7d4d7
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
b7d4d7
---
b7d4d7
 ...fid-mismatch-resolution-with-fav-child-policy.t |  8 +-
b7d4d7
 xlators/cluster/afr/src/afr-common.c               | 92 +++++-----------------
b7d4d7
 xlators/cluster/afr/src/afr-dir-write.c            |  6 +-
b7d4d7
 xlators/cluster/afr/src/afr.h                      |  5 +-
b7d4d7
 4 files changed, 29 insertions(+), 82 deletions(-)
b7d4d7
b7d4d7
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
b7d4d7
index f4aa351..12af0c8 100644
b7d4d7
--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
b7d4d7
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
b7d4d7
@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
b7d4d7
 #We know that second brick has the bigger size file
b7d4d7
 BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\  -f1)
b7d4d7
 
b7d4d7
-TEST ls $M0/f3
b7d4d7
-TEST cat $M0/f3
b7d4d7
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
b7d4d7
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
b7d4d7
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
b7d4d7
 
b7d4d7
 #gfid split-brain should be resolved
b7d4d7
@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
b7d4d7
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
b7d4d7
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
b7d4d7
 
b7d4d7
-TEST ls $M0/f4
b7d4d7
-TEST cat $M0/f4
b7d4d7
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
b7d4d7
+TEST cat $M0/f4  #Trigger data heal via readv inode refresh
b7d4d7
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
b7d4d7
 
b7d4d7
 #gfid split-brain should be resolved
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
b7d4d7
index fca2cd5..90b4f14 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-common.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr-common.c
b7d4d7
@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
b7d4d7
                 metadatamap |= (1 << index);
b7d4d7
             }
b7d4d7
             if (metadatamap_old != metadatamap) {
b7d4d7
-                event = 0;
b7d4d7
+                __afr_inode_need_refresh_set(inode, this);
b7d4d7
             }
b7d4d7
             break;
b7d4d7
 
b7d4d7
@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
b7d4d7
                 datamap |= (1 << index);
b7d4d7
             }
b7d4d7
             if (datamap_old != datamap)
b7d4d7
-                event = 0;
b7d4d7
+                __afr_inode_need_refresh_set(inode, this);
b7d4d7
             break;
b7d4d7
 
b7d4d7
         default:
b7d4d7
@@ -461,34 +461,6 @@ out:
b7d4d7
 }
b7d4d7
 
b7d4d7
 int
b7d4d7
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
b7d4d7
-{
b7d4d7
-    int ret = -1;
b7d4d7
-    uint16_t datamap = 0;
b7d4d7
-    uint16_t metadatamap = 0;
b7d4d7
-    uint32_t event = 0;
b7d4d7
-    uint64_t val = 0;
b7d4d7
-    afr_inode_ctx_t *ctx = NULL;
b7d4d7
-
b7d4d7
-    ret = __afr_inode_ctx_get(this, inode, &ctx;;
b7d4d7
-    if (ret)
b7d4d7
-        return ret;
b7d4d7
-
b7d4d7
-    val = ctx->read_subvol;
b7d4d7
-
b7d4d7
-    metadatamap = (val & 0x000000000000ffff) >> 0;
b7d4d7
-    datamap = (val & 0x00000000ffff0000) >> 16;
b7d4d7
-    event = 0;
b7d4d7
-
b7d4d7
-    val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
b7d4d7
-          (((uint64_t)event) << 32);
b7d4d7
-
b7d4d7
-    ctx->read_subvol = val;
b7d4d7
-
b7d4d7
-    return ret;
b7d4d7
-}
b7d4d7
-
b7d4d7
-int
b7d4d7
 __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
b7d4d7
                             unsigned char *metadata, int *event_p)
b7d4d7
 {
b7d4d7
@@ -559,22 +531,6 @@ out:
b7d4d7
 }
b7d4d7
 
b7d4d7
 int
b7d4d7
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
b7d4d7
-{
b7d4d7
-    afr_private_t *priv = NULL;
b7d4d7
-    int ret = -1;
b7d4d7
-
b7d4d7
-    priv = this->private;
b7d4d7
-
b7d4d7
-    if (priv->child_count <= 16)
b7d4d7
-        ret = __afr_inode_event_gen_reset_small(inode, this);
b7d4d7
-    else
b7d4d7
-        ret = -1;
b7d4d7
-
b7d4d7
-    return ret;
b7d4d7
-}
b7d4d7
-
b7d4d7
-int
b7d4d7
 afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
b7d4d7
                           unsigned char *metadata, int *event_p)
b7d4d7
 {
b7d4d7
@@ -723,30 +679,22 @@ out:
b7d4d7
     return need_refresh;
b7d4d7
 }
b7d4d7
 
b7d4d7
-static int
b7d4d7
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
b7d4d7
+int
b7d4d7
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
b7d4d7
 {
b7d4d7
     int ret = -1;
b7d4d7
     afr_inode_ctx_t *ctx = NULL;
b7d4d7
 
b7d4d7
-    GF_VALIDATE_OR_GOTO(this->name, inode, out);
b7d4d7
-
b7d4d7
-    LOCK(&inode->lock);
b7d4d7
-    {
b7d4d7
-        ret = __afr_inode_ctx_get(this, inode, &ctx;;
b7d4d7
-        if (ret)
b7d4d7
-            goto unlock;
b7d4d7
-
b7d4d7
+    ret = __afr_inode_ctx_get(this, inode, &ctx;;
b7d4d7
+    if (ret == 0) {
b7d4d7
         ctx->need_refresh = _gf_true;
b7d4d7
     }
b7d4d7
-unlock:
b7d4d7
-    UNLOCK(&inode->lock);
b7d4d7
-out:
b7d4d7
+
b7d4d7
     return ret;
b7d4d7
 }
b7d4d7
 
b7d4d7
 int
b7d4d7
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
b7d4d7
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
b7d4d7
 {
b7d4d7
     int ret = -1;
b7d4d7
 
b7d4d7
@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
b7d4d7
 
b7d4d7
     LOCK(&inode->lock);
b7d4d7
     {
b7d4d7
-        ret = __afr_inode_event_gen_reset(inode, this);
b7d4d7
+        ret = __afr_inode_need_refresh_set(inode, this);
b7d4d7
     }
b7d4d7
     UNLOCK(&inode->lock);
b7d4d7
 out:
b7d4d7
@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
b7d4d7
     ret = afr_inode_get_readable(frame, inode, this, local->readable,
b7d4d7
                                  &event_generation, local->transaction.type);
b7d4d7
 
b7d4d7
-    if (ret == -EIO || (local->is_read_txn && !event_generation)) {
b7d4d7
+    if (ret == -EIO) {
b7d4d7
         /* No readable subvolume even after refresh ==> splitbrain.*/
b7d4d7
         if (!priv->fav_child_policy) {
b7d4d7
             err = EIO;
b7d4d7
@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
b7d4d7
         if (read_subvol == -1)
b7d4d7
             goto cant_interpret;
b7d4d7
         if (ret) {
b7d4d7
-            afr_inode_event_gen_reset(local->inode, this);
b7d4d7
+            afr_inode_need_refresh_set(local->inode, this);
b7d4d7
             dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
b7d4d7
         }
b7d4d7
     } else {
b7d4d7
@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
b7d4d7
     afr_private_t *priv = NULL;
b7d4d7
     afr_local_t *local = NULL;
b7d4d7
     int read_subvol = -1;
b7d4d7
+    int ret = 0;
b7d4d7
     unsigned char *data_readable = NULL;
b7d4d7
     unsigned char *success_replies = NULL;
b7d4d7
 
b7d4d7
@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
b7d4d7
     if (!afr_has_quorum(success_replies, this, frame))
b7d4d7
         goto unwind;
b7d4d7
 
b7d4d7
-    afr_replies_interpret(frame, this, local->inode, NULL);
b7d4d7
+    ret = afr_replies_interpret(frame, this, local->inode, NULL);
b7d4d7
+    if (ret) {
b7d4d7
+        afr_inode_need_refresh_set(local->inode, this);
b7d4d7
+    }
b7d4d7
 
b7d4d7
     read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
b7d4d7
                                          data_readable);
b7d4d7
@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
b7d4d7
     afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
b7d4d7
                         AFR_DATA_TRANSACTION, NULL);
b7d4d7
 
b7d4d7
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
b7d4d7
-                                  local->event_generation))
b7d4d7
-        afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
b7d4d7
-    else
b7d4d7
-        afr_discover_do(frame, this, 0);
b7d4d7
+    afr_discover_do(frame, this, 0);
b7d4d7
 
b7d4d7
     return 0;
b7d4d7
 out:
b7d4d7
@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
b7d4d7
     afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
b7d4d7
                         AFR_DATA_TRANSACTION, NULL);
b7d4d7
 
b7d4d7
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
b7d4d7
-                                  local->event_generation))
b7d4d7
-        afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
b7d4d7
-    else
b7d4d7
-        afr_lookup_do(frame, this, 0);
b7d4d7
+    afr_lookup_do(frame, this, 0);
b7d4d7
 
b7d4d7
     return 0;
b7d4d7
 out:
b7d4d7
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
b7d4d7
index 416c19d..d419bfc 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr-dir-write.c
b7d4d7
+++ b/xlators/cluster/afr/src/afr-dir-write.c
b7d4d7
@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
b7d4d7
             continue;
b7d4d7
         if (local->replies[i].op_ret < 0) {
b7d4d7
             if (local->inode)
b7d4d7
-                afr_inode_event_gen_reset(local->inode, this);
b7d4d7
+                afr_inode_need_refresh_set(local->inode, this);
b7d4d7
             if (local->parent)
b7d4d7
-                afr_inode_event_gen_reset(local->parent, this);
b7d4d7
+                afr_inode_need_refresh_set(local->parent, this);
b7d4d7
             if (local->parent2)
b7d4d7
-                afr_inode_event_gen_reset(local->parent2, this);
b7d4d7
+                afr_inode_need_refresh_set(local->parent2, this);
b7d4d7
             continue;
b7d4d7
         }
b7d4d7
 
b7d4d7
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
b7d4d7
index ed5096e..3a2b26d 100644
b7d4d7
--- a/xlators/cluster/afr/src/afr.h
b7d4d7
+++ b/xlators/cluster/afr/src/afr.h
b7d4d7
@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
b7d4d7
                           int event_generation);
b7d4d7
 
b7d4d7
 int
b7d4d7
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
b7d4d7
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
b7d4d7
+
b7d4d7
+int
b7d4d7
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
b7d4d7
 
b7d4d7
 int
b7d4d7
 afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
b7d4d7
-- 
b7d4d7
1.8.3.1
b7d4d7