74096c
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
74096c
From: Ashish Pandey <aspandey@redhat.com>
74096c
Date: Thu, 23 Jul 2020 11:07:32 +0530
74096c
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
74096c
 folder
74096c
74096c
Problem:
74096c
If a gfid is present in indices/xattrop folder while
74096c
the file/dir is actaully healthy and all the xattrs are healthy,
74096c
it causes lot of lookups by shd on an entry which does not need
74096c
to be healed.
74096c
This whole process eats up lot of CPU usage without doing meaningful
74096c
work.
74096c
74096c
Solution:
74096c
Set trusted.ec.dirty xattr of the entry so that actual heal process
74096c
happens and at the end of it, during unset of dirty, gfid enrty from
74096c
indices/xattrop will be removed.
74096c
74096c
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
74096c
>Fixes: #1385
74096c
74096c
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
74096c
BUG: 1785714
74096c
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
74096c
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
74096c
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74096c
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74096c
---
74096c
 xlators/cluster/ec/src/ec-heal.c  | 73 ++++++++++++++++++++++++++++++++++++++-
74096c
 xlators/cluster/ec/src/ec-types.h |  7 +++-
74096c
 2 files changed, 78 insertions(+), 2 deletions(-)
74096c
74096c
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
74096c
index e2de879..7d25853 100644
74096c
--- a/xlators/cluster/ec/src/ec-heal.c
74096c
+++ b/xlators/cluster/ec/src/ec-heal.c
74096c
@@ -2488,6 +2488,59 @@ out:
74096c
     return ret;
74096c
 }
74096c
 
74096c
+int
74096c
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
74096c
+{
74096c
+    int i = 0;
74096c
+    int ret = 0;
74096c
+    dict_t **xattr = NULL;
74096c
+    loc_t loc = {0};
74096c
+    uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
74096c
+    unsigned char *on = NULL;
74096c
+    default_args_cbk_t *replies = NULL;
74096c
+    dict_t *dict = NULL;
74096c
+
74096c
+    /* Allocate the required memory */
74096c
+    loc.inode = inode_ref(inode);
74096c
+    gf_uuid_copy(loc.gfid, inode->gfid);
74096c
+    on = alloca0(ec->nodes);
74096c
+    EC_REPLIES_ALLOC(replies, ec->nodes);
74096c
+    xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
74096c
+    if (!xattr) {
74096c
+        ret = -ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+    dict = dict_new();
74096c
+    if (!dict) {
74096c
+        ret = -ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+    for (i = 0; i < ec->nodes; i++) {
74096c
+        xattr[i] = dict;
74096c
+        on[i] = 1;
74096c
+    }
74096c
+    dirty_xattr[EC_METADATA_TXN] = hton64(1);
74096c
+    ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
74096c
+                              (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
74096c
+    if (ret < 0) {
74096c
+        ret = -ENOMEM;
74096c
+        goto out;
74096c
+    }
74096c
+    PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
74096c
+                        ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
74096c
+                        xattr, NULL);
74096c
+out:
74096c
+    if (dict) {
74096c
+        dict_unref(dict);
74096c
+    }
74096c
+    if (xattr) {
74096c
+        GF_FREE(xattr);
74096c
+    }
74096c
+    cluster_replies_wipe(replies, ec->nodes);
74096c
+    loc_wipe(&loc;;
74096c
+    return ret;
74096c
+}
74096c
+
74096c
 void
74096c
 ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
74096c
 {
74096c
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
74096c
         ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
74096c
                         &need_heal);
74096c
 
74096c
-        if (need_heal == EC_HEAL_NONEED) {
74096c
+        if (need_heal == EC_HEAL_PURGE_INDEX) {
74096c
+            gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
74096c
+                   "Index entry needs to be purged for: %s ",
74096c
+                   uuid_utoa(loc->gfid));
74096c
+            /* We need to send xattrop to set dirty flag so that it can be
74096c
+             * healed and index entry could be removed. We need not to take lock
74096c
+             * on this entry to do so as we are just setting dirty flag which
74096c
+             * actually increases the trusted.ec.dirty count and does not set
74096c
+             * the new value.
74096c
+             * This will make sure that it is not interfering in other fops.*/
74096c
+            ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
74096c
+        } else if (need_heal == EC_HEAL_NONEED) {
74096c
             gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
74096c
                    "Heal is not required for : %s ", uuid_utoa(loc->gfid));
74096c
             goto out;
74096c
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
74096c
                     goto out;
74096c
                 }
74096c
             }
74096c
+            /* If lock count is 0, all dirty flags are 0 and all the
74096c
+             * versions are macthing then why are we here. It looks
74096c
+             * like something went wrong while removing the index entries
74096c
+             * after completing a successful heal or fop. In this case
74096c
+             * we need to remove this index entry to avoid triggering heal
74096c
+             * in a loop and causing lookups again and again*/
74096c
+            *need_heal = EC_HEAL_PURGE_INDEX;
74096c
         } else {
74096c
             for (i = 0; i < ec->nodes; i++) {
74096c
                 /* Since each lock can only increment the dirty
74096c
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
74096c
index f15429d..700dc39 100644
74096c
--- a/xlators/cluster/ec/src/ec-types.h
74096c
+++ b/xlators/cluster/ec/src/ec-types.h
74096c
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
74096c
 
74096c
 enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
74096c
 
74096c
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
74096c
+enum _ec_heal_need {
74096c
+    EC_HEAL_NONEED,
74096c
+    EC_HEAL_MAYBE,
74096c
+    EC_HEAL_MUST,
74096c
+    EC_HEAL_PURGE_INDEX
74096c
+};
74096c
 
74096c
 enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
74096c
 
74096c
-- 
74096c
1.8.3.1
74096c