17b94a
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
17b94a
From: Ashish Pandey <aspandey@redhat.com>
17b94a
Date: Thu, 23 Jul 2020 11:07:32 +0530
17b94a
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
17b94a
 folder
17b94a
17b94a
Problem:
17b94a
If a gfid is present in indices/xattrop folder while
17b94a
the file/dir is actaully healthy and all the xattrs are healthy,
17b94a
it causes lot of lookups by shd on an entry which does not need
17b94a
to be healed.
17b94a
This whole process eats up lot of CPU usage without doing meaningful
17b94a
work.
17b94a
17b94a
Solution:
17b94a
Set trusted.ec.dirty xattr of the entry so that actual heal process
17b94a
happens and at the end of it, during unset of dirty, gfid enrty from
17b94a
indices/xattrop will be removed.
17b94a
17b94a
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
17b94a
>Fixes: #1385
17b94a
17b94a
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
17b94a
BUG: 1785714
17b94a
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 xlators/cluster/ec/src/ec-heal.c  | 73 ++++++++++++++++++++++++++++++++++++++-
17b94a
 xlators/cluster/ec/src/ec-types.h |  7 +++-
17b94a
 2 files changed, 78 insertions(+), 2 deletions(-)
17b94a
17b94a
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
17b94a
index e2de879..7d25853 100644
17b94a
--- a/xlators/cluster/ec/src/ec-heal.c
17b94a
+++ b/xlators/cluster/ec/src/ec-heal.c
17b94a
@@ -2488,6 +2488,59 @@ out:
17b94a
     return ret;
17b94a
 }
17b94a
 
17b94a
+int
17b94a
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
17b94a
+{
17b94a
+    int i = 0;
17b94a
+    int ret = 0;
17b94a
+    dict_t **xattr = NULL;
17b94a
+    loc_t loc = {0};
17b94a
+    uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
17b94a
+    unsigned char *on = NULL;
17b94a
+    default_args_cbk_t *replies = NULL;
17b94a
+    dict_t *dict = NULL;
17b94a
+
17b94a
+    /* Allocate the required memory */
17b94a
+    loc.inode = inode_ref(inode);
17b94a
+    gf_uuid_copy(loc.gfid, inode->gfid);
17b94a
+    on = alloca0(ec->nodes);
17b94a
+    EC_REPLIES_ALLOC(replies, ec->nodes);
17b94a
+    xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
17b94a
+    if (!xattr) {
17b94a
+        ret = -ENOMEM;
17b94a
+        goto out;
17b94a
+    }
17b94a
+    dict = dict_new();
17b94a
+    if (!dict) {
17b94a
+        ret = -ENOMEM;
17b94a
+        goto out;
17b94a
+    }
17b94a
+    for (i = 0; i < ec->nodes; i++) {
17b94a
+        xattr[i] = dict;
17b94a
+        on[i] = 1;
17b94a
+    }
17b94a
+    dirty_xattr[EC_METADATA_TXN] = hton64(1);
17b94a
+    ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
17b94a
+                              (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
17b94a
+    if (ret < 0) {
17b94a
+        ret = -ENOMEM;
17b94a
+        goto out;
17b94a
+    }
17b94a
+    PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
17b94a
+                        ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
17b94a
+                        xattr, NULL);
17b94a
+out:
17b94a
+    if (dict) {
17b94a
+        dict_unref(dict);
17b94a
+    }
17b94a
+    if (xattr) {
17b94a
+        GF_FREE(xattr);
17b94a
+    }
17b94a
+    cluster_replies_wipe(replies, ec->nodes);
17b94a
+    loc_wipe(&loc;;
17b94a
+    return ret;
17b94a
+}
17b94a
+
17b94a
 void
17b94a
 ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
17b94a
 {
17b94a
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
17b94a
         ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
17b94a
                         &need_heal);
17b94a
 
17b94a
-        if (need_heal == EC_HEAL_NONEED) {
17b94a
+        if (need_heal == EC_HEAL_PURGE_INDEX) {
17b94a
+            gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
17b94a
+                   "Index entry needs to be purged for: %s ",
17b94a
+                   uuid_utoa(loc->gfid));
17b94a
+            /* We need to send xattrop to set dirty flag so that it can be
17b94a
+             * healed and index entry could be removed. We need not to take lock
17b94a
+             * on this entry to do so as we are just setting dirty flag which
17b94a
+             * actually increases the trusted.ec.dirty count and does not set
17b94a
+             * the new value.
17b94a
+             * This will make sure that it is not interfering in other fops.*/
17b94a
+            ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
17b94a
+        } else if (need_heal == EC_HEAL_NONEED) {
17b94a
             gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
17b94a
                    "Heal is not required for : %s ", uuid_utoa(loc->gfid));
17b94a
             goto out;
17b94a
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
17b94a
                     goto out;
17b94a
                 }
17b94a
             }
17b94a
+            /* If lock count is 0, all dirty flags are 0 and all the
17b94a
+             * versions are macthing then why are we here. It looks
17b94a
+             * like something went wrong while removing the index entries
17b94a
+             * after completing a successful heal or fop. In this case
17b94a
+             * we need to remove this index entry to avoid triggering heal
17b94a
+             * in a loop and causing lookups again and again*/
17b94a
+            *need_heal = EC_HEAL_PURGE_INDEX;
17b94a
         } else {
17b94a
             for (i = 0; i < ec->nodes; i++) {
17b94a
                 /* Since each lock can only increment the dirty
17b94a
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
17b94a
index f15429d..700dc39 100644
17b94a
--- a/xlators/cluster/ec/src/ec-types.h
17b94a
+++ b/xlators/cluster/ec/src/ec-types.h
17b94a
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
17b94a
 
17b94a
 enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
17b94a
 
17b94a
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
17b94a
+enum _ec_heal_need {
17b94a
+    EC_HEAL_NONEED,
17b94a
+    EC_HEAL_MAYBE,
17b94a
+    EC_HEAL_MUST,
17b94a
+    EC_HEAL_PURGE_INDEX
17b94a
+};
17b94a
 
17b94a
 enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
17b94a
 
17b94a
-- 
17b94a
1.8.3.1
17b94a