74096c
From 4c47d6dd7c5ddcaa2a1e159427c0f6713fd33907 Mon Sep 17 00:00:00 2001
74096c
From: karthik-us <ksubrahm@redhat.com>
74096c
Date: Wed, 23 Dec 2020 14:57:51 +0530
74096c
Subject: [PATCH 514/517] afr: event gen changes
74096c
74096c
The general idea of the changes is to prevent resetting event generation
74096c
to zero in the inode ctx, since event gen is something that should
74096c
follow 'causal order'.
74096c
74096c
Change #1:
74096c
For a read txn, in inode refresh cbk, if event_generation is
74096c
found zero, we are failing the read fop. This is not needed
74096c
because change in event gen is only a marker for the next inode refresh to
74096c
happen and should not be taken into account by the current read txn.
74096c
74096c
Change #2:
74096c
The event gen being zero above can happen if there is a racing lookup,
74096c
which resets even get (in afr_lookup_done) if there are non zero afr
74096c
xattrs. The resetting is done only to trigger an inode refresh and a
74096c
possible client side heal on the next lookup. That can be acheived by
74096c
setting the need_refresh flag in the inode ctx. So replaced all
74096c
occurences of resetting even gen to zero with a call to
74096c
afr_inode_need_refresh_set().
74096c
74096c
Change #3:
74096c
In both lookup and discover path, we are doing an inode refresh which is
74096c
not required since all 3 essentially do the same thing- update the inode
74096c
ctx with the good/bad copies from the brick replies. Inode refresh also
74096c
triggers background heals, but I think it is okay to do it when we call
74096c
refresh during the read and write txns and not in the lookup path.
74096c
74096c
The .ts which relied on inode refresh in lookup path to trigger heals are
74096c
now changed to do read txn so that inode refresh and the heal happens.
74096c
74096c
Upstream patch details:
74096c
> Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
74096c
> Fixes: #1179
74096c
> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
74096c
> Reported-by: Erik Jacobson <erik.jacobson at hpe.com>
74096c
Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24316/
74096c
74096c
BUG: 1640148
74096c
Change-Id: Iebf39a9be6ffd7ffd6e4046c96b0fa78ade6c5ec
74096c
Signed-off-by: karthik-us <ksubrahm@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/222074
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Ravishankar Narayanankutty <ravishankar@redhat.com>
74096c
---
74096c
 ...fid-mismatch-resolution-with-fav-child-policy.t |  8 +-
74096c
 xlators/cluster/afr/src/afr-common.c               | 92 +++++-----------------
74096c
 xlators/cluster/afr/src/afr-dir-write.c            |  6 +-
74096c
 xlators/cluster/afr/src/afr.h                      |  5 +-
74096c
 4 files changed, 29 insertions(+), 82 deletions(-)
74096c
74096c
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
74096c
index f4aa351..12af0c8 100644
74096c
--- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
74096c
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
74096c
@@ -168,8 +168,8 @@ TEST [ "$gfid_1" != "$gfid_2" ]
74096c
 #We know that second brick has the bigger size file
74096c
 BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\  -f1)
74096c
 
74096c
-TEST ls $M0/f3
74096c
-TEST cat $M0/f3
74096c
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
74096c
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
74096c
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
 
74096c
 #gfid split-brain should be resolved
74096c
@@ -215,8 +215,8 @@ TEST $CLI volume start $V0 force
74096c
 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
74096c
 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
74096c
 
74096c
-TEST ls $M0/f4
74096c
-TEST cat $M0/f4
74096c
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
74096c
+TEST cat $M0/f4  #Trigger data heal via readv inode refresh
74096c
 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
74096c
 
74096c
 #gfid split-brain should be resolved
74096c
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
74096c
index fca2cd5..90b4f14 100644
74096c
--- a/xlators/cluster/afr/src/afr-common.c
74096c
+++ b/xlators/cluster/afr/src/afr-common.c
74096c
@@ -284,7 +284,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
74096c
                 metadatamap |= (1 << index);
74096c
             }
74096c
             if (metadatamap_old != metadatamap) {
74096c
-                event = 0;
74096c
+                __afr_inode_need_refresh_set(inode, this);
74096c
             }
74096c
             break;
74096c
 
74096c
@@ -297,7 +297,7 @@ __afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
74096c
                 datamap |= (1 << index);
74096c
             }
74096c
             if (datamap_old != datamap)
74096c
-                event = 0;
74096c
+                __afr_inode_need_refresh_set(inode, this);
74096c
             break;
74096c
 
74096c
         default:
74096c
@@ -461,34 +461,6 @@ out:
74096c
 }
74096c
 
74096c
 int
74096c
-__afr_inode_event_gen_reset_small(inode_t *inode, xlator_t *this)
74096c
-{
74096c
-    int ret = -1;
74096c
-    uint16_t datamap = 0;
74096c
-    uint16_t metadatamap = 0;
74096c
-    uint32_t event = 0;
74096c
-    uint64_t val = 0;
74096c
-    afr_inode_ctx_t *ctx = NULL;
74096c
-
74096c
-    ret = __afr_inode_ctx_get(this, inode, &ctx;;
74096c
-    if (ret)
74096c
-        return ret;
74096c
-
74096c
-    val = ctx->read_subvol;
74096c
-
74096c
-    metadatamap = (val & 0x000000000000ffff) >> 0;
74096c
-    datamap = (val & 0x00000000ffff0000) >> 16;
74096c
-    event = 0;
74096c
-
74096c
-    val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
74096c
-          (((uint64_t)event) << 32);
74096c
-
74096c
-    ctx->read_subvol = val;
74096c
-
74096c
-    return ret;
74096c
-}
74096c
-
74096c
-int
74096c
 __afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
74096c
                             unsigned char *metadata, int *event_p)
74096c
 {
74096c
@@ -559,22 +531,6 @@ out:
74096c
 }
74096c
 
74096c
 int
74096c
-__afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
74096c
-{
74096c
-    afr_private_t *priv = NULL;
74096c
-    int ret = -1;
74096c
-
74096c
-    priv = this->private;
74096c
-
74096c
-    if (priv->child_count <= 16)
74096c
-        ret = __afr_inode_event_gen_reset_small(inode, this);
74096c
-    else
74096c
-        ret = -1;
74096c
-
74096c
-    return ret;
74096c
-}
74096c
-
74096c
-int
74096c
 afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
74096c
                           unsigned char *metadata, int *event_p)
74096c
 {
74096c
@@ -723,30 +679,22 @@ out:
74096c
     return need_refresh;
74096c
 }
74096c
 
74096c
-static int
74096c
-afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
74096c
+int
74096c
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
74096c
 {
74096c
     int ret = -1;
74096c
     afr_inode_ctx_t *ctx = NULL;
74096c
 
74096c
-    GF_VALIDATE_OR_GOTO(this->name, inode, out);
74096c
-
74096c
-    LOCK(&inode->lock);
74096c
-    {
74096c
-        ret = __afr_inode_ctx_get(this, inode, &ctx;;
74096c
-        if (ret)
74096c
-            goto unlock;
74096c
-
74096c
+    ret = __afr_inode_ctx_get(this, inode, &ctx;;
74096c
+    if (ret == 0) {
74096c
         ctx->need_refresh = _gf_true;
74096c
     }
74096c
-unlock:
74096c
-    UNLOCK(&inode->lock);
74096c
-out:
74096c
+
74096c
     return ret;
74096c
 }
74096c
 
74096c
 int
74096c
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
74096c
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
74096c
 {
74096c
     int ret = -1;
74096c
 
74096c
@@ -754,7 +702,7 @@ afr_inode_event_gen_reset(inode_t *inode, xlator_t *this)
74096c
 
74096c
     LOCK(&inode->lock);
74096c
     {
74096c
-        ret = __afr_inode_event_gen_reset(inode, this);
74096c
+        ret = __afr_inode_need_refresh_set(inode, this);
74096c
     }
74096c
     UNLOCK(&inode->lock);
74096c
 out:
74096c
@@ -1191,7 +1139,7 @@ afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
74096c
     ret = afr_inode_get_readable(frame, inode, this, local->readable,
74096c
                                  &event_generation, local->transaction.type);
74096c
 
74096c
-    if (ret == -EIO || (local->is_read_txn && !event_generation)) {
74096c
+    if (ret == -EIO) {
74096c
         /* No readable subvolume even after refresh ==> splitbrain.*/
74096c
         if (!priv->fav_child_policy) {
74096c
             err = EIO;
74096c
@@ -2413,7 +2361,7 @@ afr_lookup_done(call_frame_t *frame, xlator_t *this)
74096c
         if (read_subvol == -1)
74096c
             goto cant_interpret;
74096c
         if (ret) {
74096c
-            afr_inode_event_gen_reset(local->inode, this);
74096c
+            afr_inode_need_refresh_set(local->inode, this);
74096c
             dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
74096c
         }
74096c
     } else {
74096c
@@ -2971,6 +2919,7 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
74096c
     afr_private_t *priv = NULL;
74096c
     afr_local_t *local = NULL;
74096c
     int read_subvol = -1;
74096c
+    int ret = 0;
74096c
     unsigned char *data_readable = NULL;
74096c
     unsigned char *success_replies = NULL;
74096c
 
74096c
@@ -2992,7 +2941,10 @@ afr_discover_unwind(call_frame_t *frame, xlator_t *this)
74096c
     if (!afr_has_quorum(success_replies, this, frame))
74096c
         goto unwind;
74096c
 
74096c
-    afr_replies_interpret(frame, this, local->inode, NULL);
74096c
+    ret = afr_replies_interpret(frame, this, local->inode, NULL);
74096c
+    if (ret) {
74096c
+        afr_inode_need_refresh_set(local->inode, this);
74096c
+    }
74096c
 
74096c
     read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
74096c
                                          data_readable);
74096c
@@ -3248,11 +3200,7 @@ afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
74096c
     afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
74096c
                         AFR_DATA_TRANSACTION, NULL);
74096c
 
74096c
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
74096c
-                                  local->event_generation))
74096c
-        afr_inode_refresh(frame, this, loc->inode, NULL, afr_discover_do);
74096c
-    else
74096c
-        afr_discover_do(frame, this, 0);
74096c
+    afr_discover_do(frame, this, 0);
74096c
 
74096c
     return 0;
74096c
 out:
74096c
@@ -3393,11 +3341,7 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
74096c
     afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
74096c
                         AFR_DATA_TRANSACTION, NULL);
74096c
 
74096c
-    if (afr_is_inode_refresh_reqd(loc->inode, this, event,
74096c
-                                  local->event_generation))
74096c
-        afr_inode_refresh(frame, this, loc->parent, NULL, afr_lookup_do);
74096c
-    else
74096c
-        afr_lookup_do(frame, this, 0);
74096c
+    afr_lookup_do(frame, this, 0);
74096c
 
74096c
     return 0;
74096c
 out:
74096c
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
74096c
index 416c19d..d419bfc 100644
74096c
--- a/xlators/cluster/afr/src/afr-dir-write.c
74096c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
74096c
@@ -123,11 +123,11 @@ __afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
74096c
             continue;
74096c
         if (local->replies[i].op_ret < 0) {
74096c
             if (local->inode)
74096c
-                afr_inode_event_gen_reset(local->inode, this);
74096c
+                afr_inode_need_refresh_set(local->inode, this);
74096c
             if (local->parent)
74096c
-                afr_inode_event_gen_reset(local->parent, this);
74096c
+                afr_inode_need_refresh_set(local->parent, this);
74096c
             if (local->parent2)
74096c
-                afr_inode_event_gen_reset(local->parent2, this);
74096c
+                afr_inode_need_refresh_set(local->parent2, this);
74096c
             continue;
74096c
         }
74096c
 
74096c
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
74096c
index ed5096e..3a2b26d 100644
74096c
--- a/xlators/cluster/afr/src/afr.h
74096c
+++ b/xlators/cluster/afr/src/afr.h
74096c
@@ -948,7 +948,10 @@ afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
74096c
                           int event_generation);
74096c
 
74096c
 int
74096c
-afr_inode_event_gen_reset(inode_t *inode, xlator_t *this);
74096c
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
74096c
+
74096c
+int
74096c
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
74096c
 
74096c
 int
74096c
 afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
74096c
-- 
74096c
1.8.3.1
74096c