From d806760f1d4c78a2519b01f1c2d07aba0c533755 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Fri, 28 Aug 2020 16:03:54 +0530
Subject: [PATCH 608/610] cluster/ec: Track heal statistics in shd
With this change we should be able to inspect number of heals
attempted and completed by each shd.
> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24926/
> fixes: #1453
> Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
BUG: 1853631
Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280208
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
xlators/cluster/ec/src/ec-heald.c | 49 ++++++++++++++++++++++++++++++++++++++-
xlators/cluster/ec/src/ec-types.h | 5 ++++
xlators/cluster/ec/src/ec.c | 6 +++++
3 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 4f4b6aa..cd4d3ad 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -152,15 +152,58 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
return ret;
}
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
int
ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
gf_boolean_t full)
{
dict_t *xdata = NULL;
+ dict_t *dict = NULL;
uint32_t count;
int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
- ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
if (!full && (loc->inode->ia_type == IA_IFDIR)) {
/* If we have just healed a directory, it's possible that
* other index entries have appeared to be healed. */
@@ -179,6 +222,10 @@ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
dict_unref(xdata);
}
+ if (dict) {
+ dict_unref(dict);
+ }
+
return ret;
}
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 700dc39..ef7a7fe 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -626,6 +626,11 @@ struct _ec_statistics {
requests. (Basically memory allocation
errors). */
} stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
};
struct _ec {
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 047cdd8..24de9e8 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -649,6 +649,8 @@ ec_statistics_init(ec_t *ec)
GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
}
int32_t
@@ -1445,6 +1447,10 @@ ec_dump_private(xlator_t *this)
GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
return 0;
}
--
1.8.3.1