9ae3f9
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
9ae3f9
From: Ashish Pandey <aspandey@redhat.com>
9ae3f9
Date: Thu, 23 Jul 2020 11:07:32 +0530
9ae3f9
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
9ae3f9
 folder
9ae3f9
9ae3f9
Problem:
9ae3f9
If a gfid is present in indices/xattrop folder while
9ae3f9
the file/dir is actaully healthy and all the xattrs are healthy,
9ae3f9
it causes lot of lookups by shd on an entry which does not need
9ae3f9
to be healed.
9ae3f9
This whole process eats up lot of CPU usage without doing meaningful
9ae3f9
work.
9ae3f9
9ae3f9
Solution:
9ae3f9
Set trusted.ec.dirty xattr of the entry so that actual heal process
9ae3f9
happens and at the end of it, during unset of dirty, gfid enrty from
9ae3f9
indices/xattrop will be removed.
9ae3f9
9ae3f9
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
9ae3f9
>Fixes: #1385
9ae3f9
9ae3f9
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
9ae3f9
BUG: 1785714
9ae3f9
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
9ae3f9
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
9ae3f9
Tested-by: RHGS Build Bot <nigelb@redhat.com>
9ae3f9
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
9ae3f9
---
9ae3f9
 xlators/cluster/ec/src/ec-heal.c  | 73 ++++++++++++++++++++++++++++++++++++++-
9ae3f9
 xlators/cluster/ec/src/ec-types.h |  7 +++-
9ae3f9
 2 files changed, 78 insertions(+), 2 deletions(-)
9ae3f9
9ae3f9
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
9ae3f9
index e2de879..7d25853 100644
9ae3f9
--- a/xlators/cluster/ec/src/ec-heal.c
9ae3f9
+++ b/xlators/cluster/ec/src/ec-heal.c
9ae3f9
@@ -2488,6 +2488,59 @@ out:
9ae3f9
     return ret;
9ae3f9
 }
9ae3f9
 
9ae3f9
+int
9ae3f9
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
9ae3f9
+{
9ae3f9
+    int i = 0;
9ae3f9
+    int ret = 0;
9ae3f9
+    dict_t **xattr = NULL;
9ae3f9
+    loc_t loc = {0};
9ae3f9
+    uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
9ae3f9
+    unsigned char *on = NULL;
9ae3f9
+    default_args_cbk_t *replies = NULL;
9ae3f9
+    dict_t *dict = NULL;
9ae3f9
+
9ae3f9
+    /* Allocate the required memory */
9ae3f9
+    loc.inode = inode_ref(inode);
9ae3f9
+    gf_uuid_copy(loc.gfid, inode->gfid);
9ae3f9
+    on = alloca0(ec->nodes);
9ae3f9
+    EC_REPLIES_ALLOC(replies, ec->nodes);
9ae3f9
+    xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
9ae3f9
+    if (!xattr) {
9ae3f9
+        ret = -ENOMEM;
9ae3f9
+        goto out;
9ae3f9
+    }
9ae3f9
+    dict = dict_new();
9ae3f9
+    if (!dict) {
9ae3f9
+        ret = -ENOMEM;
9ae3f9
+        goto out;
9ae3f9
+    }
9ae3f9
+    for (i = 0; i < ec->nodes; i++) {
9ae3f9
+        xattr[i] = dict;
9ae3f9
+        on[i] = 1;
9ae3f9
+    }
9ae3f9
+    dirty_xattr[EC_METADATA_TXN] = hton64(1);
9ae3f9
+    ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
9ae3f9
+                              (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
9ae3f9
+    if (ret < 0) {
9ae3f9
+        ret = -ENOMEM;
9ae3f9
+        goto out;
9ae3f9
+    }
9ae3f9
+    PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
9ae3f9
+                        ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
9ae3f9
+                        xattr, NULL);
9ae3f9
+out:
9ae3f9
+    if (dict) {
9ae3f9
+        dict_unref(dict);
9ae3f9
+    }
9ae3f9
+    if (xattr) {
9ae3f9
+        GF_FREE(xattr);
9ae3f9
+    }
9ae3f9
+    cluster_replies_wipe(replies, ec->nodes);
9ae3f9
+    loc_wipe(&loc;;
9ae3f9
+    return ret;
9ae3f9
+}
9ae3f9
+
9ae3f9
 void
9ae3f9
 ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
9ae3f9
 {
9ae3f9
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
9ae3f9
         ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
9ae3f9
                         &need_heal);
9ae3f9
 
9ae3f9
-        if (need_heal == EC_HEAL_NONEED) {
9ae3f9
+        if (need_heal == EC_HEAL_PURGE_INDEX) {
9ae3f9
+            gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
9ae3f9
+                   "Index entry needs to be purged for: %s ",
9ae3f9
+                   uuid_utoa(loc->gfid));
9ae3f9
+            /* We need to send xattrop to set dirty flag so that it can be
9ae3f9
+             * healed and index entry could be removed. We need not to take lock
9ae3f9
+             * on this entry to do so as we are just setting dirty flag which
9ae3f9
+             * actually increases the trusted.ec.dirty count and does not set
9ae3f9
+             * the new value.
9ae3f9
+             * This will make sure that it is not interfering in other fops.*/
9ae3f9
+            ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
9ae3f9
+        } else if (need_heal == EC_HEAL_NONEED) {
9ae3f9
             gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
9ae3f9
                    "Heal is not required for : %s ", uuid_utoa(loc->gfid));
9ae3f9
             goto out;
9ae3f9
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
9ae3f9
                     goto out;
9ae3f9
                 }
9ae3f9
             }
9ae3f9
+            /* If lock count is 0, all dirty flags are 0 and all the
9ae3f9
+             * versions are macthing then why are we here. It looks
9ae3f9
+             * like something went wrong while removing the index entries
9ae3f9
+             * after completing a successful heal or fop. In this case
9ae3f9
+             * we need to remove this index entry to avoid triggering heal
9ae3f9
+             * in a loop and causing lookups again and again*/
9ae3f9
+            *need_heal = EC_HEAL_PURGE_INDEX;
9ae3f9
         } else {
9ae3f9
             for (i = 0; i < ec->nodes; i++) {
9ae3f9
                 /* Since each lock can only increment the dirty
9ae3f9
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
9ae3f9
index f15429d..700dc39 100644
9ae3f9
--- a/xlators/cluster/ec/src/ec-types.h
9ae3f9
+++ b/xlators/cluster/ec/src/ec-types.h
9ae3f9
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
9ae3f9
 
9ae3f9
 enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
9ae3f9
 
9ae3f9
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
9ae3f9
+enum _ec_heal_need {
9ae3f9
+    EC_HEAL_NONEED,
9ae3f9
+    EC_HEAL_MAYBE,
9ae3f9
+    EC_HEAL_MUST,
9ae3f9
+    EC_HEAL_PURGE_INDEX
9ae3f9
+};
9ae3f9
 
9ae3f9
 enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
9ae3f9
 
9ae3f9
-- 
9ae3f9
1.8.3.1
9ae3f9