e3c68b
From c2b1c50f06cc59b47c9c834617dff2aed7177a78 Mon Sep 17 00:00:00 2001
e3c68b
From: Ashish Pandey <aspandey@redhat.com>
e3c68b
Date: Mon, 18 Mar 2019 12:54:54 +0530
e3c68b
Subject: [PATCH 164/169] cluster/ec: Fix handling of heal info cases without
e3c68b
 locks
e3c68b
e3c68b
When we use heal info command, it takes lot of time as in
e3c68b
some cases it takes lock on entries to find out if the
e3c68b
entry actually needs heal or not.
e3c68b
e3c68b
There are some cases where we can avoid these locks and
e3c68b
can conclude if the entry needs heal or not.
e3c68b
e3c68b
1 - We do a lookup (without lock) on an entry, which we found in
e3c68b
.glusterfs/indices/xattrop, and find that lock count is
e3c68b
zero. Now if the file contains dirty bit set on all or any
e3c68b
brick, we can say that this entry needs heal.
e3c68b
e3c68b
2 - If the lock count is one and dirty is greater than 1,
e3c68b
then it also means that some fop had left the dirty bit set
e3c68b
which made the dirty count of current fop (which has taken lock)
e3c68b
more than one. At this point also we can definitely say that
e3c68b
this entry needs heal.
e3c68b
e3c68b
This patch is modifying code to take into consideration above two
e3c68b
points.
e3c68b
It is also changing code to not to call ec_heal_inspect if ec_heal_do
e3c68b
was called from client side heal. Client side heal triggeres heal
e3c68b
only when it is sure that it requires heal.
e3c68b
e3c68b
[We have changed the code to not to call heal for lookup]
e3c68b
e3c68b
Upstream patch -
e3c68b
https://review.gluster.org/#/c/glusterfs/+/22372/
e3c68b
e3c68b
Fixes: bz#1716385
e3c68b
Change-Id: I7f09f0ecd12f65a353297aefd57026fd2bebdf9c
e3c68b
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
e3c68b
Reviewed-on: https://code.engineering.redhat.com/gerrit/172579
e3c68b
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
e3c68b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e3c68b
---
e3c68b
 xlators/cluster/ec/src/ec-heal.c | 42 ++++++++++++++++------------------------
e3c68b
 1 file changed, 17 insertions(+), 25 deletions(-)
e3c68b
e3c68b
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
e3c68b
index 3aa04fb..2fa1f11 100644
e3c68b
--- a/xlators/cluster/ec/src/ec-heal.c
e3c68b
+++ b/xlators/cluster/ec/src/ec-heal.c
e3c68b
@@ -2541,13 +2541,15 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
e3c68b
 
e3c68b
     /* Mount triggers heal only when it detects that it must need heal, shd
e3c68b
      * triggers heals periodically which need not be thorough*/
e3c68b
-    ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false,
e3c68b
-                    !ec->shd.iamshd, &need_heal);
e3c68b
+    if (ec->shd.iamshd) {
e3c68b
+        ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
e3c68b
+                        &need_heal);
e3c68b
 
e3c68b
-    if (need_heal == EC_HEAL_NONEED) {
e3c68b
-        gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
e3c68b
-               "Heal is not required for : %s ", uuid_utoa(loc->gfid));
e3c68b
-        goto out;
e3c68b
+        if (need_heal == EC_HEAL_NONEED) {
e3c68b
+            gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
e3c68b
+                   "Heal is not required for : %s ", uuid_utoa(loc->gfid));
e3c68b
+            goto out;
e3c68b
+        }
e3c68b
     }
e3c68b
     sources = alloca0(ec->nodes);
e3c68b
     healed_sinks = alloca0(ec->nodes);
e3c68b
@@ -2902,7 +2904,7 @@ out:
e3c68b
 static int32_t
e3c68b
 _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
e3c68b
                      gf_boolean_t self_locked, int32_t lock_count,
e3c68b
-                     ec_heal_need_t *need_heal)
e3c68b
+                     ec_heal_need_t *need_heal, uint64_t *versions)
e3c68b
 {
e3c68b
     int i = 0;
e3c68b
     int source_count = 0;
e3c68b
@@ -2912,7 +2914,7 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
e3c68b
         *need_heal = EC_HEAL_NONEED;
e3c68b
         if (self_locked || lock_count == 0) {
e3c68b
             for (i = 0; i < ec->nodes; i++) {
e3c68b
-                if (dirty[i]) {
e3c68b
+                if (dirty[i] || (versions[i] != versions[0])) {
e3c68b
                     *need_heal = EC_HEAL_MUST;
e3c68b
                     goto out;
e3c68b
                 }
e3c68b
@@ -2928,6 +2930,9 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
e3c68b
                     *need_heal = EC_HEAL_MUST;
e3c68b
                     goto out;
e3c68b
                 }
e3c68b
+                if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
e3c68b
+                    *need_heal = EC_HEAL_MAYBE;
e3c68b
+                }
e3c68b
             }
e3c68b
         }
e3c68b
     } else {
e3c68b
@@ -2948,7 +2953,6 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
e3c68b
     unsigned char *healed_sinks = NULL;
e3c68b
     uint64_t *meta_versions = NULL;
e3c68b
     int ret = 0;
e3c68b
-    int i = 0;
e3c68b
 
e3c68b
     sources = alloca0(ec->nodes);
e3c68b
     healed_sinks = alloca0(ec->nodes);
e3c68b
@@ -2961,15 +2965,7 @@ ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
e3c68b
     }
e3c68b
 
e3c68b
     ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
e3c68b
-                               need_heal);
e3c68b
-    if (ret == ec->nodes && *need_heal == EC_HEAL_NONEED) {
e3c68b
-        for (i = 1; i < ec->nodes; i++) {
e3c68b
-            if (meta_versions[i] != meta_versions[0]) {
e3c68b
-                *need_heal = EC_HEAL_MUST;
e3c68b
-                goto out;
e3c68b
-            }
e3c68b
-        }
e3c68b
-    }
e3c68b
+                               need_heal, meta_versions);
e3c68b
 out:
e3c68b
     return ret;
e3c68b
 }
e3c68b
@@ -3005,7 +3001,7 @@ ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
e3c68b
     }
e3c68b
 
e3c68b
     ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
e3c68b
-                               need_heal);
e3c68b
+                               need_heal, data_versions);
e3c68b
 out:
e3c68b
     return ret;
e3c68b
 }
e3c68b
@@ -3033,7 +3029,7 @@ ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
e3c68b
     }
e3c68b
 
e3c68b
     ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
e3c68b
-                               need_heal);
e3c68b
+                               need_heal, data_versions);
e3c68b
 out:
e3c68b
     return ret;
e3c68b
 }
e3c68b
@@ -3131,10 +3127,6 @@ ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
e3c68b
 need_heal:
e3c68b
     ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
e3c68b
                        need_heal);
e3c68b
-
e3c68b
-    if (!self_locked && *need_heal == EC_HEAL_MUST) {
e3c68b
-        *need_heal = EC_HEAL_MAYBE;
e3c68b
-    }
e3c68b
 out:
e3c68b
     cluster_replies_wipe(replies, ec->nodes);
e3c68b
     loc_wipe(&loc;;
e3c68b
@@ -3220,7 +3212,7 @@ ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
e3c68b
 
e3c68b
     ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
e3c68b
                           _gf_false, &need_heal);
e3c68b
-    if (ret == ec->nodes && need_heal == EC_HEAL_NONEED) {
e3c68b
+    if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
e3c68b
         goto set_heal;
e3c68b
     }
e3c68b
     need_heal = EC_HEAL_NONEED;
e3c68b
-- 
e3c68b
1.8.3.1
e3c68b