14f8ab
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
14f8ab
From: Ashish Pandey <aspandey@redhat.com>
14f8ab
Date: Thu, 23 Jul 2020 11:07:32 +0530
14f8ab
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
14f8ab
 folder
14f8ab
14f8ab
Problem:
14f8ab
If a gfid is present in indices/xattrop folder while
14f8ab
the file/dir is actaully healthy and all the xattrs are healthy,
14f8ab
it causes lot of lookups by shd on an entry which does not need
14f8ab
to be healed.
14f8ab
This whole process eats up lot of CPU usage without doing meaningful
14f8ab
work.
14f8ab
14f8ab
Solution:
14f8ab
Set trusted.ec.dirty xattr of the entry so that actual heal process
14f8ab
happens and at the end of it, during unset of dirty, gfid enrty from
14f8ab
indices/xattrop will be removed.
14f8ab
14f8ab
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
14f8ab
>Fixes: #1385
14f8ab
14f8ab
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
14f8ab
BUG: 1785714
14f8ab
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 xlators/cluster/ec/src/ec-heal.c  | 73 ++++++++++++++++++++++++++++++++++++++-
14f8ab
 xlators/cluster/ec/src/ec-types.h |  7 +++-
14f8ab
 2 files changed, 78 insertions(+), 2 deletions(-)
14f8ab
14f8ab
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
14f8ab
index e2de879..7d25853 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-heal.c
14f8ab
+++ b/xlators/cluster/ec/src/ec-heal.c
14f8ab
@@ -2488,6 +2488,59 @@ out:
14f8ab
     return ret;
14f8ab
 }
14f8ab
 
14f8ab
+int
14f8ab
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
14f8ab
+{
14f8ab
+    int i = 0;
14f8ab
+    int ret = 0;
14f8ab
+    dict_t **xattr = NULL;
14f8ab
+    loc_t loc = {0};
14f8ab
+    uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
14f8ab
+    unsigned char *on = NULL;
14f8ab
+    default_args_cbk_t *replies = NULL;
14f8ab
+    dict_t *dict = NULL;
14f8ab
+
14f8ab
+    /* Allocate the required memory */
14f8ab
+    loc.inode = inode_ref(inode);
14f8ab
+    gf_uuid_copy(loc.gfid, inode->gfid);
14f8ab
+    on = alloca0(ec->nodes);
14f8ab
+    EC_REPLIES_ALLOC(replies, ec->nodes);
14f8ab
+    xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
14f8ab
+    if (!xattr) {
14f8ab
+        ret = -ENOMEM;
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+    dict = dict_new();
14f8ab
+    if (!dict) {
14f8ab
+        ret = -ENOMEM;
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+    for (i = 0; i < ec->nodes; i++) {
14f8ab
+        xattr[i] = dict;
14f8ab
+        on[i] = 1;
14f8ab
+    }
14f8ab
+    dirty_xattr[EC_METADATA_TXN] = hton64(1);
14f8ab
+    ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
14f8ab
+                              (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
14f8ab
+    if (ret < 0) {
14f8ab
+        ret = -ENOMEM;
14f8ab
+        goto out;
14f8ab
+    }
14f8ab
+    PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
14f8ab
+                        ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
14f8ab
+                        xattr, NULL);
14f8ab
+out:
14f8ab
+    if (dict) {
14f8ab
+        dict_unref(dict);
14f8ab
+    }
14f8ab
+    if (xattr) {
14f8ab
+        GF_FREE(xattr);
14f8ab
+    }
14f8ab
+    cluster_replies_wipe(replies, ec->nodes);
14f8ab
+    loc_wipe(&loc;;
14f8ab
+    return ret;
14f8ab
+}
14f8ab
+
14f8ab
 void
14f8ab
 ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
14f8ab
 {
14f8ab
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
14f8ab
         ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
14f8ab
                         &need_heal);
14f8ab
 
14f8ab
-        if (need_heal == EC_HEAL_NONEED) {
14f8ab
+        if (need_heal == EC_HEAL_PURGE_INDEX) {
14f8ab
+            gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
14f8ab
+                   "Index entry needs to be purged for: %s ",
14f8ab
+                   uuid_utoa(loc->gfid));
14f8ab
+            /* We need to send xattrop to set dirty flag so that it can be
14f8ab
+             * healed and index entry could be removed. We need not to take lock
14f8ab
+             * on this entry to do so as we are just setting dirty flag which
14f8ab
+             * actually increases the trusted.ec.dirty count and does not set
14f8ab
+             * the new value.
14f8ab
+             * This will make sure that it is not interfering in other fops.*/
14f8ab
+            ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
14f8ab
+        } else if (need_heal == EC_HEAL_NONEED) {
14f8ab
             gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
14f8ab
                    "Heal is not required for : %s ", uuid_utoa(loc->gfid));
14f8ab
             goto out;
14f8ab
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
14f8ab
                     goto out;
14f8ab
                 }
14f8ab
             }
14f8ab
+            /* If lock count is 0, all dirty flags are 0 and all the
14f8ab
+             * versions are macthing then why are we here. It looks
14f8ab
+             * like something went wrong while removing the index entries
14f8ab
+             * after completing a successful heal or fop. In this case
14f8ab
+             * we need to remove this index entry to avoid triggering heal
14f8ab
+             * in a loop and causing lookups again and again*/
14f8ab
+            *need_heal = EC_HEAL_PURGE_INDEX;
14f8ab
         } else {
14f8ab
             for (i = 0; i < ec->nodes; i++) {
14f8ab
                 /* Since each lock can only increment the dirty
14f8ab
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
14f8ab
index f15429d..700dc39 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-types.h
14f8ab
+++ b/xlators/cluster/ec/src/ec-types.h
14f8ab
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
14f8ab
 
14f8ab
 enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
14f8ab
 
14f8ab
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
14f8ab
+enum _ec_heal_need {
14f8ab
+    EC_HEAL_NONEED,
14f8ab
+    EC_HEAL_MAYBE,
14f8ab
+    EC_HEAL_MUST,
14f8ab
+    EC_HEAL_PURGE_INDEX
14f8ab
+};
14f8ab
 
14f8ab
 enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
14f8ab
 
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab