190130
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
190130
From: Ashish Pandey <aspandey@redhat.com>
190130
Date: Thu, 23 Jul 2020 11:07:32 +0530
190130
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
190130
 folder
190130
190130
Problem:
190130
If a gfid is present in indices/xattrop folder while
190130
the file/dir is actaully healthy and all the xattrs are healthy,
190130
it causes lot of lookups by shd on an entry which does not need
190130
to be healed.
190130
This whole process eats up lot of CPU usage without doing meaningful
190130
work.
190130
190130
Solution:
190130
Set trusted.ec.dirty xattr of the entry so that actual heal process
190130
happens and at the end of it, during unset of dirty, gfid enrty from
190130
indices/xattrop will be removed.
190130
190130
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
190130
>Fixes: #1385
190130
190130
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
190130
BUG: 1785714
190130
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
190130
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
190130
Tested-by: RHGS Build Bot <nigelb@redhat.com>
190130
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
190130
---
190130
 xlators/cluster/ec/src/ec-heal.c  | 73 ++++++++++++++++++++++++++++++++++++++-
190130
 xlators/cluster/ec/src/ec-types.h |  7 +++-
190130
 2 files changed, 78 insertions(+), 2 deletions(-)
190130
190130
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
190130
index e2de879..7d25853 100644
190130
--- a/xlators/cluster/ec/src/ec-heal.c
190130
+++ b/xlators/cluster/ec/src/ec-heal.c
190130
@@ -2488,6 +2488,59 @@ out:
190130
     return ret;
190130
 }
190130
 
190130
+int
190130
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
190130
+{
190130
+    int i = 0;
190130
+    int ret = 0;
190130
+    dict_t **xattr = NULL;
190130
+    loc_t loc = {0};
190130
+    uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
190130
+    unsigned char *on = NULL;
190130
+    default_args_cbk_t *replies = NULL;
190130
+    dict_t *dict = NULL;
190130
+
190130
+    /* Allocate the required memory */
190130
+    loc.inode = inode_ref(inode);
190130
+    gf_uuid_copy(loc.gfid, inode->gfid);
190130
+    on = alloca0(ec->nodes);
190130
+    EC_REPLIES_ALLOC(replies, ec->nodes);
190130
+    xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
190130
+    if (!xattr) {
190130
+        ret = -ENOMEM;
190130
+        goto out;
190130
+    }
190130
+    dict = dict_new();
190130
+    if (!dict) {
190130
+        ret = -ENOMEM;
190130
+        goto out;
190130
+    }
190130
+    for (i = 0; i < ec->nodes; i++) {
190130
+        xattr[i] = dict;
190130
+        on[i] = 1;
190130
+    }
190130
+    dirty_xattr[EC_METADATA_TXN] = hton64(1);
190130
+    ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
190130
+                              (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
190130
+    if (ret < 0) {
190130
+        ret = -ENOMEM;
190130
+        goto out;
190130
+    }
190130
+    PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
190130
+                        ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
190130
+                        xattr, NULL);
190130
+out:
190130
+    if (dict) {
190130
+        dict_unref(dict);
190130
+    }
190130
+    if (xattr) {
190130
+        GF_FREE(xattr);
190130
+    }
190130
+    cluster_replies_wipe(replies, ec->nodes);
190130
+    loc_wipe(&loc;;
190130
+    return ret;
190130
+}
190130
+
190130
 void
190130
 ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
190130
 {
190130
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
190130
         ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
190130
                         &need_heal);
190130
 
190130
-        if (need_heal == EC_HEAL_NONEED) {
190130
+        if (need_heal == EC_HEAL_PURGE_INDEX) {
190130
+            gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
190130
+                   "Index entry needs to be purged for: %s ",
190130
+                   uuid_utoa(loc->gfid));
190130
+            /* We need to send xattrop to set dirty flag so that it can be
190130
+             * healed and index entry could be removed. We need not to take lock
190130
+             * on this entry to do so as we are just setting dirty flag which
190130
+             * actually increases the trusted.ec.dirty count and does not set
190130
+             * the new value.
190130
+             * This will make sure that it is not interfering in other fops.*/
190130
+            ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
190130
+        } else if (need_heal == EC_HEAL_NONEED) {
190130
             gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
190130
                    "Heal is not required for : %s ", uuid_utoa(loc->gfid));
190130
             goto out;
190130
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
190130
                     goto out;
190130
                 }
190130
             }
190130
+            /* If lock count is 0, all dirty flags are 0 and all the
190130
+             * versions are macthing then why are we here. It looks
190130
+             * like something went wrong while removing the index entries
190130
+             * after completing a successful heal or fop. In this case
190130
+             * we need to remove this index entry to avoid triggering heal
190130
+             * in a loop and causing lookups again and again*/
190130
+            *need_heal = EC_HEAL_PURGE_INDEX;
190130
         } else {
190130
             for (i = 0; i < ec->nodes; i++) {
190130
                 /* Since each lock can only increment the dirty
190130
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
190130
index f15429d..700dc39 100644
190130
--- a/xlators/cluster/ec/src/ec-types.h
190130
+++ b/xlators/cluster/ec/src/ec-types.h
190130
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
190130
 
190130
 enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
190130
 
190130
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
190130
+enum _ec_heal_need {
190130
+    EC_HEAL_NONEED,
190130
+    EC_HEAL_MAYBE,
190130
+    EC_HEAL_MUST,
190130
+    EC_HEAL_PURGE_INDEX
190130
+};
190130
 
190130
 enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
190130
 
190130
-- 
190130
1.8.3.1
190130