7f4c2a
From 5c118fff47dc494a4fe0cbeaaeb9c291536b19ac Mon Sep 17 00:00:00 2001
7f4c2a
From: Pranith Kumar K <pkarampu@redhat.com>
7f4c2a
Date: Tue, 30 Jun 2015 18:45:36 +0530
7f4c2a
Subject: [PATCH 194/200] cluster/ec: Add throttling in background healing
7f4c2a
7f4c2a
        Backport of http://review.gluster.org/11471
7f4c2a
7f4c2a
- 8 parallel heals can happen.
7f4c2a
- 128 heals will wait for their turn
7f4c2a
- Heals will be rejected if 128 heals are already waiting.
7f4c2a
7f4c2a
Change-Id: I2e99bf064db7bce71838ed9901a59ffd565ac390
7f4c2a
BUG: 1227197
7f4c2a
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
7f4c2a
Reviewed-on: https://code.engineering.redhat.com/gerrit/52178
7f4c2a
---
7f4c2a
 xlators/cluster/ec/src/ec-common.h |    1 +
7f4c2a
 xlators/cluster/ec/src/ec-data.c   |    2 +
7f4c2a
 xlators/cluster/ec/src/ec-data.h   |    1 +
7f4c2a
 xlators/cluster/ec/src/ec-heal.c   |  109 ++++++++++++++++++++++++++++++++++--
7f4c2a
 xlators/cluster/ec/src/ec.c        |    2 +
7f4c2a
 xlators/cluster/ec/src/ec.h        |    4 +
7f4c2a
 6 files changed, 114 insertions(+), 5 deletions(-)
7f4c2a
7f4c2a
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
7f4c2a
index 3334a7b..4c7fe0c 100644
7f4c2a
--- a/xlators/cluster/ec/src/ec-common.h
7f4c2a
+++ b/xlators/cluster/ec/src/ec-common.h
7f4c2a
@@ -112,5 +112,6 @@ void ec_resume_parent(ec_fop_data_t * fop, int32_t error);
7f4c2a
 
7f4c2a
 void ec_manager(ec_fop_data_t * fop, int32_t error);
7f4c2a
 gf_boolean_t ec_is_recoverable_error (int32_t op_errno);
7f4c2a
+void ec_handle_healers_done (ec_fop_data_t *fop);
7f4c2a
 
7f4c2a
 #endif /* __EC_COMMON_H__ */
7f4c2a
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
7f4c2a
index 0632371..2a34f78 100644
7f4c2a
--- a/xlators/cluster/ec/src/ec-data.c
7f4c2a
+++ b/xlators/cluster/ec/src/ec-data.c
7f4c2a
@@ -135,6 +135,7 @@ ec_fop_data_t * ec_fop_data_allocate(call_frame_t * frame, xlator_t * this,
7f4c2a
     }
7f4c2a
 
7f4c2a
     INIT_LIST_HEAD(&fop->cbk_list);
7f4c2a
+    INIT_LIST_HEAD(&fop->healer);
7f4c2a
     INIT_LIST_HEAD(&fop->answer_list);
7f4c2a
     INIT_LIST_HEAD(&fop->pending_list);
7f4c2a
     INIT_LIST_HEAD(&fop->locks[0].wait_list);
7f4c2a
@@ -300,6 +301,7 @@ void ec_fop_data_release(ec_fop_data_t * fop)
7f4c2a
 
7f4c2a
         ec = fop->xl->private;
7f4c2a
         ec_handle_last_pending_fop_completion (fop, &notify);
7f4c2a
+        ec_handle_healers_done (fop);
7f4c2a
         mem_put(fop);
7f4c2a
         if (notify) {
7f4c2a
             ec_pending_fops_completed(ec);
7f4c2a
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
7f4c2a
index 670b3b8..135ccdf 100644
7f4c2a
--- a/xlators/cluster/ec/src/ec-data.h
7f4c2a
+++ b/xlators/cluster/ec/src/ec-data.h
7f4c2a
@@ -213,6 +213,7 @@ struct _ec_fop_data
7f4c2a
     ec_cbk_t           cbks;
7f4c2a
     void              *data;
7f4c2a
     ec_heal_t         *heal;
7f4c2a
+    struct list_head   healer;
7f4c2a
 
7f4c2a
     uint64_t           user_size;
7f4c2a
     uint32_t           head;
7f4c2a
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
7f4c2a
index 7067ed9..bf4d0cd 100644
7f4c2a
--- a/xlators/cluster/ec/src/ec-heal.c
7f4c2a
+++ b/xlators/cluster/ec/src/ec-heal.c
7f4c2a
@@ -26,6 +26,9 @@
7f4c2a
 #include "syncop-utils.h"
7f4c2a
 #include "cluster-syncop.h"
7f4c2a
 
7f4c2a
+#define EC_MAX_BACKGROUND_HEALS 8
7f4c2a
+#define EC_MAX_HEAL_WAITERS 128
7f4c2a
+
7f4c2a
 #define alloca0(size) ({void *__ptr; __ptr = alloca(size); memset(__ptr, 0, size); __ptr; })
7f4c2a
 #define EC_COUNT(array, max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res; })
7f4c2a
 #define EC_INTERSECT(dst, src1, src2, max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i]; })
7f4c2a
@@ -3692,6 +3695,106 @@ ec_heal_done (int ret, call_frame_t *heal, void *opaque)
7f4c2a
         return 0;
7f4c2a
 }
7f4c2a
 
7f4c2a
+ec_fop_data_t*
7f4c2a
+__ec_dequeue_heals (ec_t *ec)
7f4c2a
+{
7f4c2a
+        ec_fop_data_t *fop = NULL;
7f4c2a
+
7f4c2a
+        if (list_empty (&ec->heal_waiting))
7f4c2a
+                goto none;
7f4c2a
+
7f4c2a
+        if (ec->healers == EC_MAX_BACKGROUND_HEALS)
7f4c2a
+                goto none;
7f4c2a
+
7f4c2a
+        GF_ASSERT (ec->healers < EC_MAX_BACKGROUND_HEALS);
7f4c2a
+        fop = list_entry(ec->heal_waiting.next, ec_fop_data_t, healer);
7f4c2a
+        ec->heal_waiters--;
7f4c2a
+        list_del_init(&fop->healer);
7f4c2a
+        list_add(&fop->healer, &ec->healing);
7f4c2a
+        ec->healers++;
7f4c2a
+        return fop;
7f4c2a
+none:
7f4c2a
+        gf_msg_debug (ec->xl->name, 0, "Num healers: %d, Num Waiters: %d",
7f4c2a
+                      ec->healers, ec->heal_waiters);
7f4c2a
+        return NULL;
7f4c2a
+}
7f4c2a
+
7f4c2a
+void
7f4c2a
+ec_heal_fail (ec_t *ec, ec_fop_data_t *fop)
7f4c2a
+{
7f4c2a
+        if (fop->cbks.heal) {
7f4c2a
+            fop->cbks.heal (fop->req_frame, NULL, ec->xl, -1, EIO, 0, 0,
7f4c2a
+                            0, NULL);
7f4c2a
+        }
7f4c2a
+        if (fop)
7f4c2a
+            ec_fop_data_release (fop);
7f4c2a
+}
7f4c2a
+
7f4c2a
+void
7f4c2a
+ec_launch_heal (ec_t *ec, ec_fop_data_t *fop)
7f4c2a
+{
7f4c2a
+        int     ret = 0;
7f4c2a
+
7f4c2a
+        ret = synctask_new (ec->xl->ctx->env, ec_synctask_heal_wrap,
7f4c2a
+                            ec_heal_done, NULL, fop);
7f4c2a
+        if (ret < 0) {
7f4c2a
+                ec_heal_fail (ec, fop);
7f4c2a
+        }
7f4c2a
+}
7f4c2a
+
7f4c2a
+void
7f4c2a
+ec_handle_healers_done (ec_fop_data_t *fop)
7f4c2a
+{
7f4c2a
+        ec_t *ec = fop->xl->private;
7f4c2a
+        ec_fop_data_t *heal_fop = NULL;
7f4c2a
+
7f4c2a
+        if (list_empty (&fop->healer))
7f4c2a
+                return;
7f4c2a
+
7f4c2a
+        LOCK (&ec->lock);
7f4c2a
+        {
7f4c2a
+                list_del_init (&fop->healer);
7f4c2a
+                ec->healers--;
7f4c2a
+                heal_fop = __ec_dequeue_heals (ec);
7f4c2a
+        }
7f4c2a
+        UNLOCK (&ec->lock);
7f4c2a
+
7f4c2a
+        if (heal_fop)
7f4c2a
+                ec_launch_heal (ec, heal_fop);
7f4c2a
+
7f4c2a
+}
7f4c2a
+
7f4c2a
+void
7f4c2a
+ec_heal_throttle (xlator_t *this, ec_fop_data_t *fop)
7f4c2a
+{
7f4c2a
+        gf_boolean_t can_heal = _gf_true;
7f4c2a
+        ec_t         *ec      = this->private;
7f4c2a
+
7f4c2a
+        if (fop->req_frame == NULL) {
7f4c2a
+
7f4c2a
+                LOCK (&ec->lock);
7f4c2a
+                {
7f4c2a
+                        if (ec->heal_waiters >= EC_MAX_HEAL_WAITERS) {
7f4c2a
+                                can_heal = _gf_false;
7f4c2a
+                        } else {
7f4c2a
+                                list_add_tail(&fop->healer, &ec->heal_waiting);
7f4c2a
+                                ec->heal_waiters++;
7f4c2a
+                                fop = __ec_dequeue_heals (ec);
7f4c2a
+                        }
7f4c2a
+                }
7f4c2a
+                UNLOCK (&ec->lock);
7f4c2a
+        }
7f4c2a
+
7f4c2a
+        if (can_heal) {
7f4c2a
+                if (fop)
7f4c2a
+                        ec_launch_heal (ec, fop);
7f4c2a
+        } else {
7f4c2a
+               gf_msg_debug (this->name, 0, "Max number of heals are pending, "
7f4c2a
+                             "background self-heal rejected");
7f4c2a
+                ec_heal_fail (ec, fop);
7f4c2a
+        }
7f4c2a
+}
7f4c2a
+
7f4c2a
 void
7f4c2a
 ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target,
7f4c2a
          int32_t minimum, fop_heal_cbk_t func, void *data, loc_t *loc,
7f4c2a
@@ -3699,7 +3802,6 @@ ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target,
7f4c2a
 {
7f4c2a
     ec_cbk_t callback = { .heal = func };
7f4c2a
     ec_fop_data_t *fop = NULL;
7f4c2a
-    int ret = 0;
7f4c2a
 
7f4c2a
     gf_msg_trace ("ec", 0, "EC(HEAL) %p", frame);
7f4c2a
 
7f4c2a
@@ -3727,10 +3829,7 @@ ec_heal (call_frame_t *frame, xlator_t *this, uintptr_t target,
7f4c2a
     if (xdata)
7f4c2a
         fop->xdata = dict_ref(xdata);
7f4c2a
 
7f4c2a
-    ret = synctask_new (this->ctx->env, ec_synctask_heal_wrap,
7f4c2a
-                        ec_heal_done, NULL, fop);
7f4c2a
-    if (ret < 0)
7f4c2a
-            goto fail;
7f4c2a
+    ec_heal_throttle (this, fop);
7f4c2a
     return;
7f4c2a
 fail:
7f4c2a
     if (fop)
7f4c2a
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
7f4c2a
index 64ab91b..dd51630 100644
7f4c2a
--- a/xlators/cluster/ec/src/ec.c
7f4c2a
+++ b/xlators/cluster/ec/src/ec.c
7f4c2a
@@ -543,6 +543,8 @@ init (xlator_t *this)
7f4c2a
     LOCK_INIT(&ec->lock);
7f4c2a
 
7f4c2a
     INIT_LIST_HEAD(&ec->pending_fops);
7f4c2a
+    INIT_LIST_HEAD(&ec->heal_waiting);
7f4c2a
+    INIT_LIST_HEAD(&ec->healing);
7f4c2a
 
7f4c2a
     ec->fop_pool = mem_pool_new(ec_fop_data_t, 1024);
7f4c2a
     ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096);
7f4c2a
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
7f4c2a
index fdedb89..7f14020 100644
7f4c2a
--- a/xlators/cluster/ec/src/ec.h
7f4c2a
+++ b/xlators/cluster/ec/src/ec.h
7f4c2a
@@ -28,6 +28,8 @@
7f4c2a
 struct _ec
7f4c2a
 {
7f4c2a
     xlator_t *        xl;
7f4c2a
+    int32_t           healers;
7f4c2a
+    int32_t           heal_waiters;
7f4c2a
     int32_t           nodes;
7f4c2a
     int32_t           bits_for_nodes;
7f4c2a
     int32_t           fragments;
7f4c2a
@@ -46,6 +48,8 @@ struct _ec
7f4c2a
     gf_timer_t *      timer;
7f4c2a
     gf_boolean_t      shutdown;
7f4c2a
     struct list_head  pending_fops;
7f4c2a
+    struct list_head  heal_waiting;
7f4c2a
+    struct list_head  healing;
7f4c2a
     struct mem_pool * fop_pool;
7f4c2a
     struct mem_pool * cbk_pool;
7f4c2a
     struct mem_pool * lock_pool;
7f4c2a
-- 
7f4c2a
1.7.1
7f4c2a