Blob Blame History Raw
From d806760f1d4c78a2519b01f1c2d07aba0c533755 Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Fri, 28 Aug 2020 16:03:54 +0530
Subject: [PATCH 608/610] cluster/ec: Track heal statistics in shd

With this change we should be able to inspect number of heals
attempted and completed by each shd.

> Upstream patch: https://review.gluster.org/#/c/glusterfs/+/24926/
> fixes: #1453
> Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
> Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>

BUG: 1853631
Change-Id: I10f5d86efcc0a8e4d648da808751d37725682c39
Signed-off-by: Sheetal Pamecha <spamecha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/280208
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 xlators/cluster/ec/src/ec-heald.c | 49 ++++++++++++++++++++++++++++++++++++++-
 xlators/cluster/ec/src/ec-types.h |  5 ++++
 xlators/cluster/ec/src/ec.c       |  6 +++++
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 4f4b6aa..cd4d3ad 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -152,15 +152,58 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
     return ret;
 }
 
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+    char *bad_pos = NULL;
+    char *zero_pos = NULL;
+
+    if (!status) {
+        return _gf_false;
+    }
+
+    /*Logic:
+     * Status will be of the form Good: <binary>, Bad: <binary>
+     * If heal completes, if we do strchr for '0' it should be present after
+     * 'Bad:' i.e. strRchr for ':'
+     * */
+
+    zero_pos = strchr(status, '0');
+    bad_pos = strrchr(status, ':');
+    if (!zero_pos || !bad_pos) {
+        /*malformed status*/
+        return _gf_false;
+    }
+
+    if (zero_pos > bad_pos) {
+        return _gf_true;
+    }
+
+    return _gf_false;
+}
+
 int
 ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
                 gf_boolean_t full)
 {
     dict_t *xdata = NULL;
+    dict_t *dict = NULL;
     uint32_t count;
     int32_t ret;
+    char *heal_status = NULL;
+    ec_t *ec = healer->this->private;
+
+    GF_ATOMIC_INC(ec->stats.shd.attempted);
+    ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+                          &xdata);
+    if (ret == 0) {
+        if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+            if (ec_is_heal_completed(heal_status)) {
+                GF_ATOMIC_INC(ec->stats.shd.completed);
+            }
+        }
+    }
 
-    ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, &xdata);
     if (!full && (loc->inode->ia_type == IA_IFDIR)) {
         /* If we have just healed a directory, it's possible that
          * other index entries have appeared to be healed. */
@@ -179,6 +222,10 @@ ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
         dict_unref(xdata);
     }
 
+    if (dict) {
+        dict_unref(dict);
+    }
+
     return ret;
 }
 
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
index 700dc39..ef7a7fe 100644
--- a/xlators/cluster/ec/src/ec-types.h
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -626,6 +626,11 @@ struct _ec_statistics {
                                 requests. (Basically memory allocation
                                 errors). */
     } stripe_cache;
+    struct {
+        gf_atomic_t attempted; /*Number of heals attempted on
+                                files/directories*/
+        gf_atomic_t completed; /*Number of heals complted on files/directories*/
+    } shd;
 };
 
 struct _ec {
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 047cdd8..24de9e8 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -649,6 +649,8 @@ ec_statistics_init(ec_t *ec)
     GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
     GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
     GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+    GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+    GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
 }
 
 int32_t
@@ -1445,6 +1447,10 @@ ec_dump_private(xlator_t *this)
                        GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
     gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
                        GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+    gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+                       GF_ATOMIC_GET(ec->stats.shd.attempted));
+    gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+                       GF_ATOMIC_GET(ec->stats.shd.completed));
 
     return 0;
 }
-- 
1.8.3.1