|
|
9ae3f9 |
From b166826b283d9071532174ebbec857dea600064b Mon Sep 17 00:00:00 2001
|
|
|
9ae3f9 |
From: Ashish Pandey <aspandey@redhat.com>
|
|
|
9ae3f9 |
Date: Thu, 23 Jul 2020 11:07:32 +0530
|
|
|
9ae3f9 |
Subject: [PATCH 460/465] cluster/ec: Remove stale entries from indices/xattrop
|
|
|
9ae3f9 |
folder
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
Problem:
|
|
|
9ae3f9 |
If a gfid is present in indices/xattrop folder while
|
|
|
9ae3f9 |
the file/dir is actaully healthy and all the xattrs are healthy,
|
|
|
9ae3f9 |
it causes lot of lookups by shd on an entry which does not need
|
|
|
9ae3f9 |
to be healed.
|
|
|
9ae3f9 |
This whole process eats up lot of CPU usage without doing meaningful
|
|
|
9ae3f9 |
work.
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
Solution:
|
|
|
9ae3f9 |
Set trusted.ec.dirty xattr of the entry so that actual heal process
|
|
|
9ae3f9 |
happens and at the end of it, during unset of dirty, gfid enrty from
|
|
|
9ae3f9 |
indices/xattrop will be removed.
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
>Upstream patch : https://review.gluster.org/#/c/glusterfs/+/24765/
|
|
|
9ae3f9 |
>Fixes: #1385
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
Change-Id: Ib1b9377d8dda384bba49523e9ff6ba9f0699cc1b
|
|
|
9ae3f9 |
BUG: 1785714
|
|
|
9ae3f9 |
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
|
|
|
9ae3f9 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/208591
|
|
|
9ae3f9 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
9ae3f9 |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
9ae3f9 |
---
|
|
|
9ae3f9 |
xlators/cluster/ec/src/ec-heal.c | 73 ++++++++++++++++++++++++++++++++++++++-
|
|
|
9ae3f9 |
xlators/cluster/ec/src/ec-types.h | 7 +++-
|
|
|
9ae3f9 |
2 files changed, 78 insertions(+), 2 deletions(-)
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
|
|
|
9ae3f9 |
index e2de879..7d25853 100644
|
|
|
9ae3f9 |
--- a/xlators/cluster/ec/src/ec-heal.c
|
|
|
9ae3f9 |
+++ b/xlators/cluster/ec/src/ec-heal.c
|
|
|
9ae3f9 |
@@ -2488,6 +2488,59 @@ out:
|
|
|
9ae3f9 |
return ret;
|
|
|
9ae3f9 |
}
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
+int
|
|
|
9ae3f9 |
+ec_heal_set_dirty_without_lock(call_frame_t *frame, ec_t *ec, inode_t *inode)
|
|
|
9ae3f9 |
+{
|
|
|
9ae3f9 |
+ int i = 0;
|
|
|
9ae3f9 |
+ int ret = 0;
|
|
|
9ae3f9 |
+ dict_t **xattr = NULL;
|
|
|
9ae3f9 |
+ loc_t loc = {0};
|
|
|
9ae3f9 |
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
|
|
|
9ae3f9 |
+ unsigned char *on = NULL;
|
|
|
9ae3f9 |
+ default_args_cbk_t *replies = NULL;
|
|
|
9ae3f9 |
+ dict_t *dict = NULL;
|
|
|
9ae3f9 |
+
|
|
|
9ae3f9 |
+ /* Allocate the required memory */
|
|
|
9ae3f9 |
+ loc.inode = inode_ref(inode);
|
|
|
9ae3f9 |
+ gf_uuid_copy(loc.gfid, inode->gfid);
|
|
|
9ae3f9 |
+ on = alloca0(ec->nodes);
|
|
|
9ae3f9 |
+ EC_REPLIES_ALLOC(replies, ec->nodes);
|
|
|
9ae3f9 |
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
|
|
|
9ae3f9 |
+ if (!xattr) {
|
|
|
9ae3f9 |
+ ret = -ENOMEM;
|
|
|
9ae3f9 |
+ goto out;
|
|
|
9ae3f9 |
+ }
|
|
|
9ae3f9 |
+ dict = dict_new();
|
|
|
9ae3f9 |
+ if (!dict) {
|
|
|
9ae3f9 |
+ ret = -ENOMEM;
|
|
|
9ae3f9 |
+ goto out;
|
|
|
9ae3f9 |
+ }
|
|
|
9ae3f9 |
+ for (i = 0; i < ec->nodes; i++) {
|
|
|
9ae3f9 |
+ xattr[i] = dict;
|
|
|
9ae3f9 |
+ on[i] = 1;
|
|
|
9ae3f9 |
+ }
|
|
|
9ae3f9 |
+ dirty_xattr[EC_METADATA_TXN] = hton64(1);
|
|
|
9ae3f9 |
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
|
|
|
9ae3f9 |
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
|
|
|
9ae3f9 |
+ if (ret < 0) {
|
|
|
9ae3f9 |
+ ret = -ENOMEM;
|
|
|
9ae3f9 |
+ goto out;
|
|
|
9ae3f9 |
+ }
|
|
|
9ae3f9 |
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
|
|
|
9ae3f9 |
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
|
|
|
9ae3f9 |
+ xattr, NULL);
|
|
|
9ae3f9 |
+out:
|
|
|
9ae3f9 |
+ if (dict) {
|
|
|
9ae3f9 |
+ dict_unref(dict);
|
|
|
9ae3f9 |
+ }
|
|
|
9ae3f9 |
+ if (xattr) {
|
|
|
9ae3f9 |
+ GF_FREE(xattr);
|
|
|
9ae3f9 |
+ }
|
|
|
9ae3f9 |
+ cluster_replies_wipe(replies, ec->nodes);
|
|
|
9ae3f9 |
+ loc_wipe(&loc;;
|
|
|
9ae3f9 |
+ return ret;
|
|
|
9ae3f9 |
+}
|
|
|
9ae3f9 |
+
|
|
|
9ae3f9 |
void
|
|
|
9ae3f9 |
ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
|
|
9ae3f9 |
{
|
|
|
9ae3f9 |
@@ -2563,7 +2616,18 @@ ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
|
|
|
9ae3f9 |
ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
|
|
|
9ae3f9 |
&need_heal);
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
- if (need_heal == EC_HEAL_NONEED) {
|
|
|
9ae3f9 |
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
|
|
|
9ae3f9 |
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
|
|
|
9ae3f9 |
+ "Index entry needs to be purged for: %s ",
|
|
|
9ae3f9 |
+ uuid_utoa(loc->gfid));
|
|
|
9ae3f9 |
+ /* We need to send xattrop to set dirty flag so that it can be
|
|
|
9ae3f9 |
+ * healed and index entry could be removed. We need not to take lock
|
|
|
9ae3f9 |
+ * on this entry to do so as we are just setting dirty flag which
|
|
|
9ae3f9 |
+ * actually increases the trusted.ec.dirty count and does not set
|
|
|
9ae3f9 |
+ * the new value.
|
|
|
9ae3f9 |
+ * This will make sure that it is not interfering in other fops.*/
|
|
|
9ae3f9 |
+ ec_heal_set_dirty_without_lock(frame, ec, loc->inode);
|
|
|
9ae3f9 |
+ } else if (need_heal == EC_HEAL_NONEED) {
|
|
|
9ae3f9 |
gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
|
|
|
9ae3f9 |
"Heal is not required for : %s ", uuid_utoa(loc->gfid));
|
|
|
9ae3f9 |
goto out;
|
|
|
9ae3f9 |
@@ -2958,6 +3022,13 @@ _need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
|
|
|
9ae3f9 |
goto out;
|
|
|
9ae3f9 |
}
|
|
|
9ae3f9 |
}
|
|
|
9ae3f9 |
+ /* If lock count is 0, all dirty flags are 0 and all the
|
|
|
9ae3f9 |
+ * versions are macthing then why are we here. It looks
|
|
|
9ae3f9 |
+ * like something went wrong while removing the index entries
|
|
|
9ae3f9 |
+ * after completing a successful heal or fop. In this case
|
|
|
9ae3f9 |
+ * we need to remove this index entry to avoid triggering heal
|
|
|
9ae3f9 |
+ * in a loop and causing lookups again and again*/
|
|
|
9ae3f9 |
+ *need_heal = EC_HEAL_PURGE_INDEX;
|
|
|
9ae3f9 |
} else {
|
|
|
9ae3f9 |
for (i = 0; i < ec->nodes; i++) {
|
|
|
9ae3f9 |
/* Since each lock can only increment the dirty
|
|
|
9ae3f9 |
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
|
|
|
9ae3f9 |
index f15429d..700dc39 100644
|
|
|
9ae3f9 |
--- a/xlators/cluster/ec/src/ec-types.h
|
|
|
9ae3f9 |
+++ b/xlators/cluster/ec/src/ec-types.h
|
|
|
9ae3f9 |
@@ -130,7 +130,12 @@ typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
-enum _ec_heal_need { EC_HEAL_NONEED, EC_HEAL_MAYBE, EC_HEAL_MUST };
|
|
|
9ae3f9 |
+enum _ec_heal_need {
|
|
|
9ae3f9 |
+ EC_HEAL_NONEED,
|
|
|
9ae3f9 |
+ EC_HEAL_MAYBE,
|
|
|
9ae3f9 |
+ EC_HEAL_MUST,
|
|
|
9ae3f9 |
+ EC_HEAL_PURGE_INDEX
|
|
|
9ae3f9 |
+};
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
|
|
|
9ae3f9 |
|
|
|
9ae3f9 |
--
|
|
|
9ae3f9 |
1.8.3.1
|
|
|
9ae3f9 |
|