d84cf8
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
d84cf8
From: Ashish Pandey <aspandey@redhat.com>
d84cf8
Date: Thu, 23 Jul 2020 11:07:32 +0530
d84cf8
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
d84cf8
 folder
d84cf8
d84cf8
Problem:
d84cf8
If a gfid is present in indices/xattrop folder while
d84cf8
the file/dir is actaully healthy and all the xattrs are healthy,
d84cf8
it causes lot of lookups by shd on an entry which does not need
d84cf8
to be healed.
d84cf8
This whole process eats up lot of CPU usage without doing meaningful
d84cf8
work.
d84cf8
d84cf8
Solution:
d84cf8
Set trusted.ec.dirty xattr of the entry so that actual heal process
d84cf8
happens and at the end of it, during unset of dirty, gfid enrty from
d84cf8
indices/xattrop will be removed.
d84cf8
d84cf8
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
d84cf8
>Fixes: #1385
d84cf8
d84cf8
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
d84cf8
BUG: 1785714
d84cf8
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
d84cf8
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
d84cf8
Tested-by: RHGS Build Bot <nigelb@redhat.com>
d84cf8
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
d84cf8
---
d84cf8
 xlators/cluster/ec/src/ec-heal.c  | 73 ++++++++++++++++++++++++++++++++++++++-
d84cf8
 xlators/cluster/ec/src/ec-types.h |  7 +++-
d84cf8
 2 files changed, 78 insertions(+), 2 deletions(-)
d84cf8
d84cf8
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
d84cf8
index e2de879..7d25853 100644
d84cf8
--- a/xlators/cluster/ec/src/ec-heal.c
d84cf8
+++ b/xlators/cluster/ec/src/ec-heal.c
d84cf8
@@ -2488,6 +2488,59 @@ out:
d84cf8
     return ret;
d84cf8
 }
d84cf8
 
d84cf8
+int
d84cf8
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
d84cf8
+{
d84cf8
+    int i = 0;
d84cf8
+    int ret = 0;
d84cf8
+    dict_t **xattr = NULL;
d84cf8
+    loc_t loc = {0};
d84cf8
+    uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
d84cf8
+    unsigned char *on = NULL;
d84cf8
+    default_args_cbk_t *replies = NULL;
d84cf8
+    dict_t *dict = NULL;
d84cf8
+
d84cf8
+    /* Allocate the required memory */
d84cf8
+    loc.inode = inode_ref(inode);
d84cf8
+    gf_uuid_copy(loc.gfid, inode->gfid);
d84cf8
+    on = alloca0(ec->nodes);
d84cf8
+    EC_REPLIES_ALLOC(replies, ec->nodes);
d84cf8
+    xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
d84cf8
+    if (!xattr) {
d84cf8
+        ret = -ENOMEM;
d84cf8
+        goto out;
d84cf8
+    }
d84cf8
+    dict = dict_new();
d84cf8
+    if (!dict) {
d84cf8
+        ret = -ENOMEM;
d84cf8
+        goto out;
d84cf8
+    }
d84cf8
+    for (i = 0; i < ec->nodes; i++) {
d84cf8
+        xattr[i] = dict;
d84cf8
+        on[i] = 1;
d84cf8
+    }
d84cf8
+    dirty_xattr[EC_METADATA_TXN] = hton64(1);
d84cf8
+    ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
d84cf8
+                              (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
d84cf8
+    if (ret < 0) {
d84cf8
+        ret = -ENOMEM;
d84cf8
+        goto out;
d84cf8
+    }
d84cf8
+    PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
d84cf8
+                        ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
d84cf8
+                        xattr, NULL);
d84cf8
+out:
d84cf8
+    if (dict) {
d84cf8
+        dict_unref(dict);
d84cf8
+    }
d84cf8
+    if (xattr) {
d84cf8
+        GF_FREE(xattr);
d84cf8
+    }
d84cf8
+    cluster_replies_wipe(replies, ec->nodes);
d84cf8
+    loc_wipe(&loc;;
d84cf8
+    return ret;
d84cf8
+}
d84cf8
+
d84cf8
 void
d84cf8
 ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
d84cf8
 {
d84cf8
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
d84cf8
         ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
d84cf8
                         &need_heal);
d84cf8
 
d84cf8
-        if (need_heal == EC_HEAL_NONEED) {
d84cf8
+        if (need_heal == EC_HEAL_PURGE_INDEX) {
d84cf8
+            gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
d84cf8
+                   "Index entry needs to be purged for: %s ",
d84cf8
+                   uuid_utoa(loc->gfid));
d84cf8
+            /* We need to send xattrop to set dirty flag so that it can be
d84cf8
+             * healed and index entry could be removed. We need not to take lock
d84cf8
+             * on this entry to do so as we are just setting dirty flag which
d84cf8
+             * actually increases the trusted.ec.dirty count and does not set
d84cf8
+             * the new value.
d84cf8
+             * This will make sure that it is not interfering in other fops.*/
d84cf8
+            ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
d84cf8
+        } else if (need_heal == EC_HEAL_NONEED) {
d84cf8
             gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
d84cf8
                    "Heal is not required for : %s ", uuid_utoa(loc->gfid));
d84cf8
             goto out;
d84cf8
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
d84cf8
                     goto out;
d84cf8
                 }
d84cf8
             }
d84cf8
+            /* If lock count is 0, all dirty flags are 0 and all the
d84cf8
+             * versions are macthing then why are we here. It looks
d84cf8
+             * like something went wrong while removing the index entries
d84cf8
+             * after completing a successful heal or fop. In this case
d84cf8
+             * we need to remove this index entry to avoid triggering heal
d84cf8
+             * in a loop and causing lookups again and again*/
d84cf8
+            *need_heal = EC_HEAL_PURGE_INDEX;
d84cf8
         } else {
d84cf8
             for (i = 0; i < ec->nodes; i++) {
d84cf8
                 /* Since each lock can only increment the dirty
d84cf8
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
d84cf8
index f15429d..700dc39 100644
d84cf8
--- a/xlators/cluster/ec/src/ec-types.h
d84cf8
+++ b/xlators/cluster/ec/src/ec-types.h
d84cf8
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
d84cf8
 
d84cf8
 enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
d84cf8
 
d84cf8
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
d84cf8
+enum _ec_heal_need {
d84cf8
+    EC_HEAL_NONEED,
d84cf8
+    EC_HEAL_MAYBE,
d84cf8
+    EC_HEAL_MUST,
d84cf8
+    EC_HEAL_PURGE_INDEX
d84cf8
+};
d84cf8
 
d84cf8
 enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
d84cf8
 
d84cf8
-- 
d84cf8
1.8.3.1
d84cf8