17b94a
From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
17b94a
From: karthik-us <ksubrahm@redhat.com>
17b94a
Date: Wed, 23 Dec 2020 14:57:51 +0530
17b94a
Subject: [PATCH 514/517] afr: event gen changes
17b94a
17b94a
The general idea of the changes is to prevent resetting event generation
17b94a
to zero in the inode ctx, since event gen is something that should
17b94a
follow 'causal order'.
17b94a
17b94a
Change #1:
17b94a
For a read txn, in inode refresh cbk, if event_generation is
17b94a
found zero, we are failing the read fop. This is not needed
17b94a
because change in event gen is only a marker for the next inode refresh to
17b94a
happen and should not be taken into account by the current read txn.
17b94a
17b94a
Change #2:
17b94a
The event gen being zero above can happen if there is a racing lookup,
17b94a
which resets even get (in afr_lookup_done) if there are non zero afr
17b94a
xattrs. The resetting is done only to trigger an inode refresh and a
17b94a
possible client side heal on the next lookup. That can be acheived by
17b94a
setting the need_refresh flag in the inode ctx. So replaced all
17b94a
occurences of resetting even gen to zero with a call to
17b94a
afr_inode_need_refresh_set().
17b94a
17b94a
Change #3:
17b94a
In both lookup and discover path, we are doing an inode refresh which is
17b94a
not required since all 3 essentially do the same thing- update the inode
17b94a
ctx with the good/bad copies from the brick replies. Inode refresh also
17b94a
triggers background heals, but I think it is okay to do it when we call
17b94a
refresh during the read and write txns and not in the lookup path.
17b94a
17b94a
The .ts which relied on inode refresh in lookup path to trigger heals are
17b94a
now changed to do read txn so that inode refresh and the heal happens.
17b94a
17b94a
Upstream patch details:
17b94a
> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
17b94a
> Fixes: #1179
17b94a
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
17b94a
> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
17b94a
Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
17b94a
17b94a
BUG: 1640148
17b94a
Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
17b94a
Signed-off-by: karthik-us <ksubrahm@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
17b94a
---
17b94a
 ...fid-mismatch-resolution-with-fav-child-policy.t |  8 +-
17b94a
 xlators/cluster/afr/src/afr-common.c               | 92 +++++-----------------
17b94a
 xlators/cluster/afr/src/afr-dir-write.c            |  6 +-
17b94a
 xlators/cluster/afr/src/afr.h                      |  5 +-
17b94a
 4 files changed, 29 insertions(+), 82 deletions(-)
17b94a
17b94a
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
17b94a
index f4aa351..12af0c8 100644
17b94a
--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
17b94a
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
17b94a
@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
17b94a
 #We know that second brick has the bigger size file
17b94a
 BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\  -f1)
17b94a
 
17b94a
-TEST ls $M0/f3
17b94a
-TEST cat $M0/f3
17b94a
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
17b94a
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
17b94a
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
17b94a
 
17b94a
 #gfid split-brain should be resolved
17b94a
@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
17b94a
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
17b94a
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
17b94a
 
17b94a
-TEST ls $M0/f4
17b94a
-TEST cat $M0/f4
17b94a
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
17b94a
+TEST cat $M0/f4  #Trigger data heal via readv inode refresh
17b94a
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
17b94a
 
17b94a
 #gfid split-brain should be resolved
17b94a
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
17b94a
index fca2cd5..90b4f14 100644
17b94a
--- a/xlators/cluster/afr/src/afr-common.c
17b94a
+++ b/xlators/cluster/afr/src/afr-common.c
17b94a
@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
17b94a
                 metadatamap |= (1 << index);
17b94a
             }
17b94a
             if (metadatamap_old != metadatamap) {
17b94a
-                event = 0;
17b94a
+                __afr_inode_need_refresh_set(inode, this);
17b94a
             }
17b94a
             break;
17b94a
 
17b94a
@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
17b94a
                 datamap |= (1 << index);
17b94a
             }
17b94a
             if (datamap_old != datamap)
17b94a
-                event = 0;
17b94a
+                __afr_inode_need_refresh_set(inode, this);
17b94a
             break;
17b94a
 
17b94a
         default:
17b94a
@@ -461,34 +461,6 @@ out:
17b94a
 }
17b94a
 
17b94a
 int
17b94a
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
17b94a
-{
17b94a
-    int ret = -1;
17b94a
-    uint16_t datamap = 0;
17b94a
-    uint16_t metadatamap = 0;
17b94a
-    uint32_t event = 0;
17b94a
-    uint64_t val = 0;
17b94a
-    afr_inode_ctx_t *ctx = NULL;
17b94a
-
17b94a
-    ret = __afr_inode_ctx_get(this, inode, &ctx;;
17b94a
-    if (ret)
17b94a
-        return ret;
17b94a
-
17b94a
-    val = ctx->read_subvol;
17b94a
-
17b94a
-    metadatamap = (val & 0x000000000000ffff) >> 0;
17b94a
-    datamap = (val & 0x00000000ffff0000) >> 16;
17b94a
-    event = 0;
17b94a
-
17b94a
-    val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
17b94a
-          (((uint64_t)event) << 32);
17b94a
-
17b94a
-    ctx->read_subvol = val;
17b94a
-
17b94a
-    return ret;
17b94a
-}
17b94a
-
17b94a
-int
17b94a
 __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
17b94a
                             unsigned char *metadata, int *event_p)
17b94a
 {
17b94a
@@ -559,22 +531,6 @@ out:
17b94a
 }
17b94a
 
17b94a
 int
17b94a
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
17b94a
-{
17b94a
-    afr_private_t *priv = NULL;
17b94a
-    int ret = -1;
17b94a
-
17b94a
-    priv = this->private;
17b94a
-
17b94a
-    if (priv->child_count <= 16)
17b94a
-        ret = __afr_inode_event_gen_reset_small(inode, this);
17b94a
-    else
17b94a
-        ret = -1;
17b94a
-
17b94a
-    return ret;
17b94a
-}
17b94a
-
17b94a
-int
17b94a
 afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
17b94a
                           unsigned char *metadata, int *event_p)
17b94a
 {
17b94a
@@ -723,30 +679,22 @@ out:
17b94a
     return need_refresh;
17b94a
 }
17b94a
 
17b94a
-static int
17b94a
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
17b94a
+int
17b94a
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
17b94a
 {
17b94a
     int ret = -1;
17b94a
     afr_inode_ctx_t *ctx = NULL;
17b94a
 
17b94a
-    GF_VALIDATE_OR_GOTO(this->name, inode, out);
17b94a
-
17b94a
-    LOCK(&inode->lock);
17b94a
-    {
17b94a
-        ret = __afr_inode_ctx_get(this, inode, &ctx;;
17b94a
-        if (ret)
17b94a
-            goto unlock;
17b94a
-
17b94a
+    ret = __afr_inode_ctx_get(this, inode, &ctx;;
17b94a
+    if (ret == 0) {
17b94a
         ctx->need_refresh = _gf_true;
17b94a
     }
17b94a
-unlock:
17b94a
-    UNLOCK(&inode->lock);
17b94a
-out:
17b94a
+
17b94a
     return ret;
17b94a
 }
17b94a
 
17b94a
 int
17b94a
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
17b94a
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
17b94a
 {
17b94a
     int ret = -1;
17b94a
 
17b94a
@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
17b94a
 
17b94a
     LOCK(&inode->lock);
17b94a
     {
17b94a
-        ret = __afr_inode_event_gen_reset(inode, this);
17b94a
+        ret = __afr_inode_need_refresh_set(inode, this);
17b94a
     }
17b94a
     UNLOCK(&inode->lock);
17b94a
 out:
17b94a
@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
17b94a
     ret = afr_inode_get_readable(frame, inode, this, local->readable,
17b94a
                                  &event_generation, local->transaction.type);
17b94a
 
17b94a
-    if (ret == -EIO || (local->is_read_txn && !event_generation)) {
17b94a
+    if (ret == -EIO) {
17b94a
         /* No readable subvolume even after refresh ==> splitbrain.*/
17b94a
         if (!priv->fav_child_policy) {
17b94a
             err = EIO;
17b94a
@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
17b94a
         if (read_subvol == -1)
17b94a
             goto cant_interpret;
17b94a
         if (ret) {
17b94a
-            afr_inode_event_gen_reset(local->inode, this);
17b94a
+            afr_inode_need_refresh_set(local->inode, this);
17b94a
             dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
17b94a
         }
17b94a
     } else {
17b94a
@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
17b94a
     afr_private_t *priv = NULL;
17b94a
     afr_local_t *local = NULL;
17b94a
     int read_subvol = -1;
17b94a
+    int ret = 0;
17b94a
     unsigned char *data_readable = NULL;
17b94a
     unsigned char *success_replies = NULL;
17b94a
 
17b94a
@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
17b94a
     if (!afr_has_quorum(success_replies, this, frame))
17b94a
         goto unwind;
17b94a
 
17b94a
-    afr_replies_interpret(frame, this, local->inode, NULL);
17b94a
+    ret = afr_replies_interpret(frame, this, local->inode, NULL);
17b94a
+    if (ret) {
17b94a
+        afr_inode_need_refresh_set(local->inode, this);
17b94a
+    }
17b94a
 
17b94a
     read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
17b94a
                                          data_readable);
17b94a
@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
17b94a
     afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
17b94a
                         AFR_DATA_TRANSACTION, NULL);
17b94a
 
17b94a
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
17b94a
-                                  local->event_generation))
17b94a
-        afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
17b94a
-    else
17b94a
-        afr_discover_do(frame, this, 0);
17b94a
+    afr_discover_do(frame, this, 0);
17b94a
 
17b94a
     return 0;
17b94a
 out:
17b94a
@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
17b94a
     afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
17b94a
                         AFR_DATA_TRANSACTION, NULL);
17b94a
 
17b94a
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
17b94a
-                                  local->event_generation))
17b94a
-        afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
17b94a
-    else
17b94a
-        afr_lookup_do(frame, this, 0);
17b94a
+    afr_lookup_do(frame, this, 0);
17b94a
 
17b94a
     return 0;
17b94a
 out:
17b94a
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
17b94a
index 416c19d..d419bfc 100644
17b94a
--- a/xlators/cluster/afr/src/afr-dir-write.c
17b94a
+++ b/xlators/cluster/afr/src/afr-dir-write.c
17b94a
@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
17b94a
             continue;
17b94a
         if (local->replies[i].op_ret < 0) {
17b94a
             if (local->inode)
17b94a
-                afr_inode_event_gen_reset(local->inode, this);
17b94a
+                afr_inode_need_refresh_set(local->inode, this);
17b94a
             if (local->parent)
17b94a
-                afr_inode_event_gen_reset(local->parent, this);
17b94a
+                afr_inode_need_refresh_set(local->parent, this);
17b94a
             if (local->parent2)
17b94a
-                afr_inode_event_gen_reset(local->parent2, this);
17b94a
+                afr_inode_need_refresh_set(local->parent2, this);
17b94a
             continue;
17b94a
         }
17b94a
 
17b94a
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
17b94a
index ed5096e..3a2b26d 100644
17b94a
--- a/xlators/cluster/afr/src/afr.h
17b94a
+++ b/xlators/cluster/afr/src/afr.h
17b94a
@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
17b94a
                           int event_generation);
17b94a
 
17b94a
 int
17b94a
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
17b94a
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
17b94a
+
17b94a
+int
17b94a
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
17b94a
 
17b94a
 int
17b94a
 afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
17b94a
-- 
17b94a
1.8.3.1
17b94a