256ebe
From c2b1c50f06cc59b47c9c834617dff2aed7177a78 Mon Sep 17 00:00:00 2001
256ebe
From: Ashish Pandey <aspandey@redhat.com>
256ebe
Date: Mon, 18 Mar 2019 12:54:54 +0530
256ebe
Subject: [PATCH 164/169] cluster/ec: Fix handling of heal info cases without
256ebe
 locks
256ebe
256ebe
When we use heal info command, it takes lot of time as in
256ebe
some cases it takes lock on entries to find out if the
256ebe
entry actually needs heal or not.
256ebe
256ebe
There are some cases where we can avoid these locks and
256ebe
can conclude if the entry needs heal or not.
256ebe
256ebe
1 - We do a lookup (without lock) on an entry, which we found in
256ebe
.glusterfs/indices/xattrop, and find that lock count is
256ebe
zero. Now if the file contains dirty bit set on all or any
256ebe
brick, we can say that this entry needs heal.
256ebe
256ebe
2 - If the lock count is one and dirty is greater than 1,
256ebe
then it also means that some fop had left the dirty bit set
256ebe
which made the dirty count of current fop (which has taken lock)
256ebe
more than one. At this point also we can definitely say that
256ebe
this entry needs heal.
256ebe
256ebe
This patch is modifying code to take into consideration above two
256ebe
points.
256ebe
It is also changing code to not to call ec_heal_inspect if ec_heal_do
256ebe
was called from client side heal. Client side heal triggeres heal
256ebe
only when it is sure that it requires heal.
256ebe
256ebe
[We have changed the code to not to call heal for lookup]
256ebe
256ebe
Upstream patch -
256ebe
https://review.gluster.org/#/c/glusterfs/+/22372/
256ebe
256ebe
Fixes: bz#1716385
256ebe
Change-Id: I7f09f0ecd12f65a353297aefd57026fd2bebdf9c
256ebe
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
256ebe
Reviewed-on: https://code.engineering.redhat.com/gerrit/172579
256ebe
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
256ebe
Tested-by: RHGS Build Bot <nigelb@redhat.com>
256ebe
---
256ebe
 xlators/cluster/ec/src/ec-heal.c | 42 ++++++++++++++++------------------------
256ebe
 1 file changed, 17 insertions(+), 25 deletions(-)
256ebe
256ebe
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
256ebe
index 3aa04fb..2fa1f11 100644
256ebe
--- a/xlators/cluster/ec/src/ec-heal.c
256ebe
+++ b/xlators/cluster/ec/src/ec-heal.c
256ebe
@@ -2541,13 +2541,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
256ebe
 
256ebe
     /* Mount triggers heal only when it detects that it must need heal, shd
256ebe
      * triggers heals periodically which need not be thorough*/
256ebe
-    ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false,
256ebe
-                    !ec->shd.iamshd, &need_heal);
256ebe
+    if (ec->shd.iamshd) {
256ebe
+        ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
256ebe
+                        &need_heal);
256ebe
 
256ebe
-    if (need_heal == EC_HEAL_NONEED) {
256ebe
-        gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
256ebe
-               "Heal is not required for : %s ", uuid_utoa(loc->gfid));
256ebe
-        goto out;
256ebe
+        if (need_heal == EC_HEAL_NONEED) {
256ebe
+            gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
256ebe
+                   "Heal is not required for : %s ", uuid_utoa(loc->gfid));
256ebe
+            goto out;
256ebe
+        }
256ebe
     }
256ebe
     sources = alloca0(ec->nodes);
256ebe
     healed_sinks = alloca0(ec->nodes);
256ebe
@@ -2902,7 +2904,7 @@ out:
256ebe
 static int32_t
256ebe
 _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
256ebe
                      gf_boolean_t self_locked, int32_t lock_count,
256ebe
-                     ec_heal_need_t *need_heal)
256ebe
+                     ec_heal_need_t *need_heal, uint64_t *versions)
256ebe
 {
256ebe
     int i = 0;
256ebe
     int source_count = 0;
256ebe
@@ -2912,7 +2914,7 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
256ebe
         *need_heal = EC_HEAL_NONEED;
256ebe
         if (self_locked || lock_count == 0) {
256ebe
             for (i = 0; i < ec->nodes; i++) {
256ebe
-                if (dirty[i]) {
256ebe
+                if (dirty[i] || (versions[i] != versions[0])) {
256ebe
                     *need_heal = EC_HEAL_MUST;
256ebe
                     goto out;
256ebe
                 }
256ebe
@@ -2928,6 +2930,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
256ebe
                     *need_heal = EC_HEAL_MUST;
256ebe
                     goto out;
256ebe
                 }
256ebe
+                if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
256ebe
+                    *need_heal = EC_HEAL_MAYBE;
256ebe
+                }
256ebe
             }
256ebe
         }
256ebe
     } else {
256ebe
@@ -2948,7 +2953,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
256ebe
     unsigned char *healed_sinks = NULL;
256ebe
     uint64_t *meta_versions = NULL;
256ebe
     int ret = 0;
256ebe
-    int i = 0;
256ebe
 
256ebe
     sources = alloca0(ec->nodes);
256ebe
     healed_sinks = alloca0(ec->nodes);
256ebe
@@ -2961,15 +2965,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
256ebe
     }
256ebe
 
256ebe
     ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
256ebe
-                               need_heal);
256ebe
-    if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {
256ebe
-        for (i = 1; i < ec->nodes; i++) {
256ebe
-            if (meta_versions[i] != meta_versions[0]) {
256ebe
-                *need_heal = EC_HEAL_MUST;
256ebe
-                goto out;
256ebe
-            }
256ebe
-        }
256ebe
-    }
256ebe
+                               need_heal, meta_versions);
256ebe
 out:
256ebe
     return ret;
256ebe
 }
256ebe
@@ -3005,7 +3001,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
256ebe
     }
256ebe
 
256ebe
     ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
256ebe
-                               need_heal);
256ebe
+                               need_heal, data_versions);
256ebe
 out:
256ebe
     return ret;
256ebe
 }
256ebe
@@ -3033,7 +3029,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
256ebe
     }
256ebe
 
256ebe
     ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
256ebe
-                               need_heal);
256ebe
+                               need_heal, data_versions);
256ebe
 out:
256ebe
     return ret;
256ebe
 }
256ebe
@@ -3131,10 +3127,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
256ebe
 need_heal:
256ebe
     ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
256ebe
                        need_heal);
256ebe
-
256ebe
-    if (!self_locked && *need_heal == EC_HEAL_MUST) {
256ebe
-        *need_heal = EC_HEAL_MAYBE;
256ebe
-    }
256ebe
 out:
256ebe
     cluster_replies_wipe(replies, ec->nodes);
256ebe
     loc_wipe(&loc;;
256ebe
@@ -3220,7 +3212,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
256ebe
 
256ebe
     ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
256ebe
                           _gf_false, &need_heal);
256ebe
-    if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {
256ebe
+    if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
256ebe
         goto set_heal;
256ebe
     }
256ebe
     need_heal = EC_HEAL_NONEED;
256ebe
-- 
256ebe
1.8.3.1
256ebe