b51a1f
From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
b51a1f
From: karthik-us <ksubrahm@redhat.com>
b51a1f
Date: Wed, 23 Dec 2020 14:57:51 +0530
b51a1f
Subject: [PATCH 514/517] afr: event gen changes
b51a1f
b51a1f
The general idea of the changes is to prevent resetting event generation
b51a1f
to zero in the inode ctx, since event gen is something that should
b51a1f
follow 'causal order'.
b51a1f
b51a1f
Change #1:
b51a1f
For a read txn, in inode refresh cbk, if event_generation is
b51a1f
found zero, we are failing the read fop. This is not needed
b51a1f
because change in event gen is only a marker for the next inode refresh to
b51a1f
happen and should not be taken into account by the current read txn.
b51a1f
b51a1f
Change #2:
b51a1f
The event gen being zero above can happen if there is a racing lookup,
b51a1f
which resets even get (in afr_lookup_done) if there are non zero afr
b51a1f
xattrs. The resetting is done only to trigger an inode refresh and a
b51a1f
possible client side heal on the next lookup. That can be acheived by
b51a1f
setting the need_refresh flag in the inode ctx. So replaced all
b51a1f
occurences of resetting even gen to zero with a call to
b51a1f
afr_inode_need_refresh_set().
b51a1f
b51a1f
Change #3:
b51a1f
In both lookup and discover path, we are doing an inode refresh which is
b51a1f
not required since all 3 essentially do the same thing- update the inode
b51a1f
ctx with the good/bad copies from the brick replies. Inode refresh also
b51a1f
triggers background heals, but I think it is okay to do it when we call
b51a1f
refresh during the read and write txns and not in the lookup path.
b51a1f
b51a1f
The .ts which relied on inode refresh in lookup path to trigger heals are
b51a1f
now changed to do read txn so that inode refresh and the heal happens.
b51a1f
b51a1f
Upstream patch details:
b51a1f
> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
b51a1f
> Fixes: #1179
b51a1f
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
b51a1f
> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
b51a1f
Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
b51a1f
b51a1f
BUG: 1640148
b51a1f
Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
b51a1f
Signed-off-by: karthik-us <ksubrahm@redhat.com>
b51a1f
Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
b51a1f
Tested-by: RHGS Build Bot <nigelb@redhat.com>
b51a1f
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
b51a1f
---
b51a1f
 ...fid-mismatch-resolution-with-fav-child-policy.t |  8 +-
b51a1f
 xlators/cluster/afr/src/afr-common.c               | 92 +++++-----------------
b51a1f
 xlators/cluster/afr/src/afr-dir-write.c            |  6 +-
b51a1f
 xlators/cluster/afr/src/afr.h                      |  5 +-
b51a1f
 4 files changed, 29 insertions(+), 82 deletions(-)
b51a1f
b51a1f
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
b51a1f
index f4aa351..12af0c8 100644
b51a1f
--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
b51a1f
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
b51a1f
@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
b51a1f
 #We know that second brick has the bigger size file
b51a1f
 BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\  -f1)
b51a1f
 
b51a1f
-TEST ls $M0/f3
b51a1f
-TEST cat $M0/f3
b51a1f
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
b51a1f
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
b51a1f
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
b51a1f
 
b51a1f
 #gfid split-brain should be resolved
b51a1f
@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
b51a1f
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
b51a1f
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
b51a1f
 
b51a1f
-TEST ls $M0/f4
b51a1f
-TEST cat $M0/f4
b51a1f
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
b51a1f
+TEST cat $M0/f4  #Trigger data heal via readv inode refresh
b51a1f
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
b51a1f
 
b51a1f
 #gfid split-brain should be resolved
b51a1f
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
b51a1f
index fca2cd5..90b4f14 100644
b51a1f
--- a/xlators/cluster/afr/src/afr-common.c
b51a1f
+++ b/xlators/cluster/afr/src/afr-common.c
b51a1f
@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
b51a1f
                 metadatamap |= (1 << index);
b51a1f
             }
b51a1f
             if (metadatamap_old != metadatamap) {
b51a1f
-                event = 0;
b51a1f
+                __afr_inode_need_refresh_set(inode, this);
b51a1f
             }
b51a1f
             break;
b51a1f
 
b51a1f
@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
b51a1f
                 datamap |= (1 << index);
b51a1f
             }
b51a1f
             if (datamap_old != datamap)
b51a1f
-                event = 0;
b51a1f
+                __afr_inode_need_refresh_set(inode, this);
b51a1f
             break;
b51a1f
 
b51a1f
         default:
b51a1f
@@ -461,34 +461,6 @@ out:
b51a1f
 }
b51a1f
 
b51a1f
 int
b51a1f
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
b51a1f
-{
b51a1f
-    int ret = -1;
b51a1f
-    uint16_t datamap = 0;
b51a1f
-    uint16_t metadatamap = 0;
b51a1f
-    uint32_t event = 0;
b51a1f
-    uint64_t val = 0;
b51a1f
-    afr_inode_ctx_t *ctx = NULL;
b51a1f
-
b51a1f
-    ret = __afr_inode_ctx_get(this, inode, &ctx;;
b51a1f
-    if (ret)
b51a1f
-        return ret;
b51a1f
-
b51a1f
-    val = ctx->read_subvol;
b51a1f
-
b51a1f
-    metadatamap = (val & 0x000000000000ffff) >> 0;
b51a1f
-    datamap = (val & 0x00000000ffff0000) >> 16;
b51a1f
-    event = 0;
b51a1f
-
b51a1f
-    val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
b51a1f
-          (((uint64_t)event) << 32);
b51a1f
-
b51a1f
-    ctx->read_subvol = val;
b51a1f
-
b51a1f
-    return ret;
b51a1f
-}
b51a1f
-
b51a1f
-int
b51a1f
 __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
b51a1f
                             unsigned char *metadata, int *event_p)
b51a1f
 {
b51a1f
@@ -559,22 +531,6 @@ out:
b51a1f
 }
b51a1f
 
b51a1f
 int
b51a1f
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
b51a1f
-{
b51a1f
-    afr_private_t *priv = NULL;
b51a1f
-    int ret = -1;
b51a1f
-
b51a1f
-    priv = this->private;
b51a1f
-
b51a1f
-    if (priv->child_count <= 16)
b51a1f
-        ret = __afr_inode_event_gen_reset_small(inode, this);
b51a1f
-    else
b51a1f
-        ret = -1;
b51a1f
-
b51a1f
-    return ret;
b51a1f
-}
b51a1f
-
b51a1f
-int
b51a1f
 afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
b51a1f
                           unsigned char *metadata, int *event_p)
b51a1f
 {
b51a1f
@@ -723,30 +679,22 @@ out:
b51a1f
     return need_refresh;
b51a1f
 }
b51a1f
 
b51a1f
-static int
b51a1f
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
b51a1f
+int
b51a1f
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
b51a1f
 {
b51a1f
     int ret = -1;
b51a1f
     afr_inode_ctx_t *ctx = NULL;
b51a1f
 
b51a1f
-    GF_VALIDATE_OR_GOTO(this->name, inode, out);
b51a1f
-
b51a1f
-    LOCK(&inode->lock);
b51a1f
-    {
b51a1f
-        ret = __afr_inode_ctx_get(this, inode, &ctx;;
b51a1f
-        if (ret)
b51a1f
-            goto unlock;
b51a1f
-
b51a1f
+    ret = __afr_inode_ctx_get(this, inode, &ctx;;
b51a1f
+    if (ret == 0) {
b51a1f
         ctx->need_refresh = _gf_true;
b51a1f
     }
b51a1f
-unlock:
b51a1f
-    UNLOCK(&inode->lock);
b51a1f
-out:
b51a1f
+
b51a1f
     return ret;
b51a1f
 }
b51a1f
 
b51a1f
 int
b51a1f
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
b51a1f
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
b51a1f
 {
b51a1f
     int ret = -1;
b51a1f
 
b51a1f
@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
b51a1f
 
b51a1f
     LOCK(&inode->lock);
b51a1f
     {
b51a1f
-        ret = __afr_inode_event_gen_reset(inode, this);
b51a1f
+        ret = __afr_inode_need_refresh_set(inode, this);
b51a1f
     }
b51a1f
     UNLOCK(&inode->lock);
b51a1f
 out:
b51a1f
@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
b51a1f
     ret = afr_inode_get_readable(frame, inode, this, local->readable,
b51a1f
                                  &event_generation, local->transaction.type);
b51a1f
 
b51a1f
-    if (ret == -EIO || (local->is_read_txn && !event_generation)) {
b51a1f
+    if (ret == -EIO) {
b51a1f
         /* No readable subvolume even after refresh ==> splitbrain.*/
b51a1f
         if (!priv->fav_child_policy) {
b51a1f
             err = EIO;
b51a1f
@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
b51a1f
         if (read_subvol == -1)
b51a1f
             goto cant_interpret;
b51a1f
         if (ret) {
b51a1f
-            afr_inode_event_gen_reset(local->inode, this);
b51a1f
+            afr_inode_need_refresh_set(local->inode, this);
b51a1f
             dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
b51a1f
         }
b51a1f
     } else {
b51a1f
@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
b51a1f
     afr_private_t *priv = NULL;
b51a1f
     afr_local_t *local = NULL;
b51a1f
     int read_subvol = -1;
b51a1f
+    int ret = 0;
b51a1f
     unsigned char *data_readable = NULL;
b51a1f
     unsigned char *success_replies = NULL;
b51a1f
 
b51a1f
@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
b51a1f
     if (!afr_has_quorum(success_replies, this, frame))
b51a1f
         goto unwind;
b51a1f
 
b51a1f
-    afr_replies_interpret(frame, this, local->inode, NULL);
b51a1f
+    ret = afr_replies_interpret(frame, this, local->inode, NULL);
b51a1f
+    if (ret) {
b51a1f
+        afr_inode_need_refresh_set(local->inode, this);
b51a1f
+    }
b51a1f
 
b51a1f
     read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
b51a1f
                                          data_readable);
b51a1f
@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
b51a1f
     afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
b51a1f
                         AFR_DATA_TRANSACTION, NULL);
b51a1f
 
b51a1f
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
b51a1f
-                                  local->event_generation))
b51a1f
-        afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
b51a1f
-    else
b51a1f
-        afr_discover_do(frame, this, 0);
b51a1f
+    afr_discover_do(frame, this, 0);
b51a1f
 
b51a1f
     return 0;
b51a1f
 out:
b51a1f
@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
b51a1f
     afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
b51a1f
                         AFR_DATA_TRANSACTION, NULL);
b51a1f
 
b51a1f
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
b51a1f
-                                  local->event_generation))
b51a1f
-        afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
b51a1f
-    else
b51a1f
-        afr_lookup_do(frame, this, 0);
b51a1f
+    afr_lookup_do(frame, this, 0);
b51a1f
 
b51a1f
     return 0;
b51a1f
 out:
b51a1f
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
b51a1f
index 416c19d..d419bfc 100644
b51a1f
--- a/xlators/cluster/afr/src/afr-dir-write.c
b51a1f
+++ b/xlators/cluster/afr/src/afr-dir-write.c
b51a1f
@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
b51a1f
             continue;
b51a1f
         if (local->replies[i].op_ret < 0) {
b51a1f
             if (local->inode)
b51a1f
-                afr_inode_event_gen_reset(local->inode, this);
b51a1f
+                afr_inode_need_refresh_set(local->inode, this);
b51a1f
             if (local->parent)
b51a1f
-                afr_inode_event_gen_reset(local->parent, this);
b51a1f
+                afr_inode_need_refresh_set(local->parent, this);
b51a1f
             if (local->parent2)
b51a1f
-                afr_inode_event_gen_reset(local->parent2, this);
b51a1f
+                afr_inode_need_refresh_set(local->parent2, this);
b51a1f
             continue;
b51a1f
         }
b51a1f
 
b51a1f
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
b51a1f
index ed5096e..3a2b26d 100644
b51a1f
--- a/xlators/cluster/afr/src/afr.h
b51a1f
+++ b/xlators/cluster/afr/src/afr.h
b51a1f
@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
b51a1f
                           int event_generation);
b51a1f
 
b51a1f
 int
b51a1f
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
b51a1f
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
b51a1f
+
b51a1f
+int
b51a1f
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
b51a1f
 
b51a1f
 int
b51a1f
 afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
b51a1f
-- 
b51a1f
1.8.3.1
b51a1f