Blob Blame History Raw
From 5c118fff47dc494a4fe0cbeaaeb9c291536b19ac Mon Sep 17 00:00:00 2001
From: Pranith Kumar K <pkarampu@redhat.com>
Date: Tue, 30 Jun 2015 18:45:36 +0530
Subject: [PATCH 194/200] cluster/ec: Add throttling in background healing

        Backport of http://review.gluster.org/11471

- 8 parallel heals can happen.
- 128 heals will wait for their turn
- Heals will be rejected if 128 heals are already waiting.

Change-Id: I2e99bf064db7bce71838ed9901a59ffd565ac390
BUG: 1227197
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/52178
---
 xlators/cluster/ec/src/ec-common.h |    1 +
 xlators/cluster/ec/src/ec-data.c   |    2 +
 xlators/cluster/ec/src/ec-data.h   |    1 +
 xlators/cluster/ec/src/ec-heal.c   |  109 ++++++++++++++++++++++++++++++++++--
 xlators/cluster/ec/src/ec.c        |    2 +
 xlators/cluster/ec/src/ec.h        |    4 +
 6 files changed, 114 insertions(+), 5 deletions(-)

diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
index 3334a7b..4c7fe0c 100644
--- a/xlators/cluster/ec/src/ec-common.h
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -112,5 +112,6 @@ void ec_resume_parent(ec_fop_data_t * fop, int32_t error);
 
 void ec_manager(ec_fop_data_t * fop, int32_t error);
 gf_boolean_t ec_is_recoverable_error (int32_t op_errno);
+void ec_handle_healers_done (ec_fop_data_t *fop);
 
 #endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
index 0632371..2a34f78 100644
--- a/xlators/cluster/ec/src/ec-data.c
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -135,6 +135,7 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
     }
 
     INIT_LIST_HEAD(&fop->cbk_list);
+    INIT_LIST_HEAD(&fop->healer);
     INIT_LIST_HEAD(&fop->answer_list);
     INIT_LIST_HEAD(&fop->pending_list);
     INIT_LIST_HEAD(&fop->locks[0].wait_list);
@@ -300,6 +301,7 @@ void ec_fop_data_release(ec_fop_data_t * fop)
 
         ec = fop->xl->private;
         ec_handle_last_pending_fop_completion (fop, &notify);
+        ec_handle_healers_done (fop);
         mem_put(fop);
         if (notify) {
             ec_pending_fops_completed(ec);
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index 670b3b8..135ccdf 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -213,6 +213,7 @@ struct _ec_fop_data
     ec_cbk_t           cbks;
     void              *data;
     ec_heal_t         *heal;
+    struct list_head   healer;
 
     uint64_t           user_size;
     uint32_t           head;
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index 7067ed9..bf4d0cd 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -26,6 +26,9 @@
 #include "syncop-utils.h"
 #include "cluster-syncop.h"
 
+#define EC_MAX_BACKGROUND_HEALS 8
+#define EC_MAX_HEAL_WAITERS 128
+
 #define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr; })
 #define EC_COUNT(array, max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res; })
 #define EC_INTERSECT(dst, src1, src2, max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i]; })
@@ -3692,6 +3695,106 @@ ec_heal_done (int ret, call_frame_t *heal, void *opaque)
         return 0;
 }
 
+ec_fop_data_t*
+__ec_dequeue_heals (ec_t *ec)
+{
+        ec_fop_data_t *fop = NULL;
+
+        if (list_empty (&ec->heal_waiting))
+                goto none;
+
+        if (ec->healers == EC_MAX_BACKGROUND_HEALS)
+                goto none;
+
+        GF_ASSERT (ec->healers < EC_MAX_BACKGROUND_HEALS);
+        fop = list_entry(ec->heal_waiting.next, ec_fop_data_t, healer);
+        ec->heal_waiters--;
+        list_del_init(&fop->healer);
+        list_add(&fop->healer, &ec->healing);
+        ec->healers++;
+        return fop;
+none:
+        gf_msg_debug (ec->xl->name, 0, "Num healers: %d, Num Waiters: %d",
+                      ec->healers, ec->heal_waiters);
+        return NULL;
+}
+
+void
+ec_heal_fail (ec_t *ec, ec_fop_data_t *fop)
+{
+        if (fop->cbks.heal) {
+            fop->cbks.heal (fop->req_frame, NULL, ec->xl, -1, EIO, 0, 0,
+                            0, NULL);
+        }
+        if (fop)
+            ec_fop_data_release (fop);
+}
+
+void
+ec_launch_heal (ec_t *ec, ec_fop_data_t *fop)
+{
+        int     ret = 0;
+
+        ret = synctask_new (ec->xl->ctx->env, ec_synctask_heal_wrap,
+                            ec_heal_done, NULL, fop);
+        if (ret < 0) {
+                ec_heal_fail (ec, fop);
+        }
+}
+
+void
+ec_handle_healers_done (ec_fop_data_t *fop)
+{
+        ec_t *ec = fop->xl->private;
+        ec_fop_data_t *heal_fop = NULL;
+
+        if (list_empty (&fop->healer))
+                return;
+
+        LOCK (&ec->lock);
+        {
+                list_del_init (&fop->healer);
+                ec->healers--;
+                heal_fop = __ec_dequeue_heals (ec);
+        }
+        UNLOCK (&ec->lock);
+
+        if (heal_fop)
+                ec_launch_heal (ec, heal_fop);
+
+}
+
+void
+ec_heal_throttle (xlator_t *this, ec_fop_data_t *fop)
+{
+        gf_boolean_t can_heal = _gf_true;
+        ec_t         *ec      = this->private;
+
+        if (fop->req_frame == NULL) {
+
+                LOCK (&ec->lock);
+                {
+                        if (ec->heal_waiters >= EC_MAX_HEAL_WAITERS) {
+                                can_heal = _gf_false;
+                        } else {
+                                list_add_tail(&fop->healer, &ec->heal_waiting);
+                                ec->heal_waiters++;
+                                fop = __ec_dequeue_heals (ec);
+                        }
+                }
+                UNLOCK (&ec->lock);
+        }
+
+        if (can_heal) {
+                if (fop)
+                        ec_launch_heal (ec, fop);
+        } else {
+               gf_msg_debug (this->name, 0, "Max number of heals are pending, "
+                             "background self-heal rejected");
+                ec_heal_fail (ec, fop);
+        }
+}
+
 void
 ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target,
          int32_t minimum, fop_heal_cbk_t func, void *data, loc_t *loc,
@@ -3699,7 +3802,6 @@ ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target,
 {
     ec_cbk_t callback = { .heal = func };
     ec_fop_data_t *fop = NULL;
-    int ret = 0;
 
     gf_msg_trace ("ec", 0, "EC(HEAL) %p", frame);
 
@@ -3727,10 +3829,7 @@ ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target,
     if (xdata)
         fop->xdata = dict_ref(xdata);
 
-    ret = synctask_new (this->ctx->env, ec_synctask_heal_wrap,
-                        ec_heal_done, NULL, fop);
-    if (ret < 0)
-            goto fail;
+    ec_heal_throttle (this, fop);
     return;
 fail:
     if (fop)
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 64ab91b..dd51630 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -543,6 +543,8 @@ init (xlator_t *this)
     LOCK_INIT(&ec->lock);
 
     INIT_LIST_HEAD(&ec->pending_fops);
+    INIT_LIST_HEAD(&ec->heal_waiting);
+    INIT_LIST_HEAD(&ec->healing);
 
     ec->fop_pool = mem_pool_new(ec_fop_data_t, 1024);
     ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096);
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
index fdedb89..7f14020 100644
--- a/xlators/cluster/ec/src/ec.h
+++ b/xlators/cluster/ec/src/ec.h
@@ -28,6 +28,8 @@
 struct _ec
 {
     xlator_t *        xl;
+    int32_t           healers;
+    int32_t           heal_waiters;
     int32_t           nodes;
     int32_t           bits_for_nodes;
     int32_t           fragments;
@@ -46,6 +48,8 @@ struct _ec
     gf_timer_t *      timer;
     gf_boolean_t      shutdown;
     struct list_head  pending_fops;
+    struct list_head  heal_waiting;
+    struct list_head  healing;
     struct mem_pool * fop_pool;
     struct mem_pool * cbk_pool;
     struct mem_pool * lock_pool;
-- 
1.7.1