e3c68b
From 9fd966aa6879ac9867381629f82eca24b950d731 Mon Sep 17 00:00:00 2001
e3c68b
From: Mohammed Rafi KC <rkavunga@redhat.com>
e3c68b
Date: Sun, 2 Jun 2019 01:36:33 +0530
e3c68b
Subject: [PATCH 175/178] ec/fini: Fix race between xlator cleanup and on going
e3c68b
 async fop
e3c68b
e3c68b
Problem:
e3c68b
While we process a cleanup, there is a chance for a race between
e3c68b
async operations, for example ec_launch_replace_heal. So this can
e3c68b
lead to invalid mem access.
e3c68b
e3c68b
Solution:
e3c68b
Just like we track on going heal fops, we can also track fops like
e3c68b
ec_launch_replace_heal, so that we can decide when to send a
e3c68b
PARENT_DOWN request.
e3c68b
e3c68b
> upstream patch : https://review.gluster.org/#/c/glusterfs/+/22798/
e3c68b
e3c68b
>Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298
e3c68b
>fixes: bz#1703948
e3c68b
>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
e3c68b
e3c68b
Change-Id: I055391c5c6c34d58aef7336847f3b570cb831298
e3c68b
BUG: 1714588
e3c68b
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
e3c68b
Reviewed-on: https://code.engineering.redhat.com/gerrit/172801
e3c68b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e3c68b
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
e3c68b
---
e3c68b
 xlators/cluster/ec/src/ec-common.c | 10 ++++++++++
e3c68b
 xlators/cluster/ec/src/ec-common.h |  2 ++
e3c68b
 xlators/cluster/ec/src/ec-data.c   |  4 +++-
e3c68b
 xlators/cluster/ec/src/ec-heal.c   | 17 +++++++++++++++--
e3c68b
 xlators/cluster/ec/src/ec-types.h  |  1 +
e3c68b
 xlators/cluster/ec/src/ec.c        | 37 +++++++++++++++++++++++++------------
e3c68b
 6 files changed, 56 insertions(+), 15 deletions(-)
e3c68b
e3c68b
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
e3c68b
index e85aa8b..9cc6395 100644
e3c68b
--- a/xlators/cluster/ec/src/ec-common.c
e3c68b
+++ b/xlators/cluster/ec/src/ec-common.c
e3c68b
@@ -2955,3 +2955,13 @@ ec_manager(ec_fop_data_t *fop, int32_t error)
e3c68b
 
e3c68b
     __ec_manager(fop, error);
e3c68b
 }
e3c68b
+
e3c68b
+gf_boolean_t
e3c68b
+__ec_is_last_fop(ec_t *ec)
e3c68b
+{
e3c68b
+    if ((list_empty(&ec->pending_fops)) &&
e3c68b
+        (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
e3c68b
+        return _gf_true;
e3c68b
+    }
e3c68b
+    return _gf_false;
e3c68b
+}
e3c68b
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
e3c68b
index e948342..bf6c97d 100644
e3c68b
--- a/xlators/cluster/ec/src/ec-common.h
e3c68b
+++ b/xlators/cluster/ec/src/ec-common.h
e3c68b
@@ -204,4 +204,6 @@ void
e3c68b
 ec_reset_entry_healing(ec_fop_data_t *fop);
e3c68b
 char *
e3c68b
 ec_msg_str(ec_fop_data_t *fop);
e3c68b
+gf_boolean_t
e3c68b
+__ec_is_last_fop(ec_t *ec);
e3c68b
 #endif /* __EC_COMMON_H__ */
e3c68b
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
e3c68b
index 6ef9340..8d2d9a1 100644
e3c68b
--- a/xlators/cluster/ec/src/ec-data.c
e3c68b
+++ b/xlators/cluster/ec/src/ec-data.c
e3c68b
@@ -202,11 +202,13 @@ ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
e3c68b
 {
e3c68b
     ec_t *ec = fop->xl->private;
e3c68b
 
e3c68b
+    *notify = _gf_false;
e3c68b
+
e3c68b
     if (!list_empty(&fop->pending_list)) {
e3c68b
         LOCK(&ec->lock);
e3c68b
         {
e3c68b
             list_del_init(&fop->pending_list);
e3c68b
-            *notify = list_empty(&ec->pending_fops);
e3c68b
+            *notify = __ec_is_last_fop(ec);
e3c68b
         }
e3c68b
         UNLOCK(&ec->lock);
e3c68b
     }
e3c68b
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
e3c68b
index 8844c29..237fea2 100644
e3c68b
--- a/xlators/cluster/ec/src/ec-heal.c
e3c68b
+++ b/xlators/cluster/ec/src/ec-heal.c
e3c68b
@@ -2814,8 +2814,20 @@ int
e3c68b
 ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
e3c68b
 {
e3c68b
     ec_t *ec = opaque;
e3c68b
+    gf_boolean_t last_fop = _gf_false;
e3c68b
 
e3c68b
+    if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
e3c68b
+        LOCK(&ec->lock);
e3c68b
+        {
e3c68b
+            last_fop = __ec_is_last_fop(ec);
e3c68b
+        }
e3c68b
+        UNLOCK(&ec->lock);
e3c68b
+    }
e3c68b
     gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
e3c68b
+
e3c68b
+    if (last_fop)
e3c68b
+        ec_pending_fops_completed(ec);
e3c68b
+
e3c68b
     return 0;
e3c68b
 }
e3c68b
 
e3c68b
@@ -2869,14 +2881,15 @@ ec_launch_replace_heal(ec_t *ec)
e3c68b
 {
e3c68b
     int ret = -1;
e3c68b
 
e3c68b
-    if (!ec)
e3c68b
-        return ret;
e3c68b
     ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
e3c68b
                        ec_replace_heal_done, NULL, ec);
e3c68b
+
e3c68b
     if (ret < 0) {
e3c68b
         gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
e3c68b
                      ret);
e3c68b
+        ec_replace_heal_done(-1, NULL, ec);
e3c68b
     }
e3c68b
+
e3c68b
     return ret;
e3c68b
 }
e3c68b
 
e3c68b
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
e3c68b
index 1c295c0..4dbf4a3 100644
e3c68b
--- a/xlators/cluster/ec/src/ec-types.h
e3c68b
+++ b/xlators/cluster/ec/src/ec-types.h
e3c68b
@@ -643,6 +643,7 @@ struct _ec {
e3c68b
     uintptr_t xl_notify;      /* Bit flag representing
e3c68b
                                  notification for bricks. */
e3c68b
     uintptr_t node_mask;
e3c68b
+    gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */
e3c68b
     xlator_t **xl_list;
e3c68b
     gf_lock_t lock;
e3c68b
     gf_timer_t *timer;
e3c68b
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
e3c68b
index df5912c..f0d58c0 100644
e3c68b
--- a/xlators/cluster/ec/src/ec.c
e3c68b
+++ b/xlators/cluster/ec/src/ec.c
e3c68b
@@ -355,6 +355,7 @@ ec_notify_cbk(void *data)
e3c68b
     ec_t *ec = data;
e3c68b
     glusterfs_event_t event = GF_EVENT_MAXVAL;
e3c68b
     gf_boolean_t propagate = _gf_false;
e3c68b
+    gf_boolean_t launch_heal = _gf_false;
e3c68b
 
e3c68b
     LOCK(&ec->lock);
e3c68b
     {
e3c68b
@@ -384,6 +385,11 @@ ec_notify_cbk(void *data)
e3c68b
              * still bricks DOWN, they will be healed when they
e3c68b
              * come up. */
e3c68b
             ec_up(ec->xl, ec);
e3c68b
+
e3c68b
+            if (ec->shd.iamshd && !ec->shutdown) {
e3c68b
+                launch_heal = _gf_true;
e3c68b
+                GF_ATOMIC_INC(ec->async_fop_count);
e3c68b
+            }
e3c68b
         }
e3c68b
 
e3c68b
         propagate = _gf_true;
e3c68b
@@ -391,13 +397,12 @@ ec_notify_cbk(void *data)
e3c68b
 unlock:
e3c68b
     UNLOCK(&ec->lock);
e3c68b
 
e3c68b
+    if (launch_heal) {
e3c68b
+        /* We have just brought the volume UP, so we trigger
e3c68b
+         * a self-heal check on the root directory. */
e3c68b
+        ec_launch_replace_heal(ec);
e3c68b
+    }
e3c68b
     if (propagate) {
e3c68b
-        if ((event == GF_EVENT_CHILD_UP) && ec->shd.iamshd) {
e3c68b
-            /* We have just brought the volume UP, so we trigger
e3c68b
-             * a self-heal check on the root directory. */
e3c68b
-            ec_launch_replace_heal(ec);
e3c68b
-        }
e3c68b
-
e3c68b
         default_notify(ec->xl, event, NULL);
e3c68b
     }
e3c68b
 }
e3c68b
@@ -425,7 +430,7 @@ ec_disable_delays(ec_t *ec)
e3c68b
 {
e3c68b
     ec->shutdown = _gf_true;
e3c68b
 
e3c68b
-    return list_empty(&ec->pending_fops);
e3c68b
+    return __ec_is_last_fop(ec);
e3c68b
 }
e3c68b
 
e3c68b
 void
e3c68b
@@ -603,7 +608,10 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
e3c68b
         if (event == GF_EVENT_CHILD_UP) {
e3c68b
             /* We need to trigger a selfheal if a brick changes
e3c68b
              * to UP state. */
e3c68b
-            needs_shd_check = ec_set_up_state(ec, mask, mask);
e3c68b
+            if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
e3c68b
+                !ec->shutdown) {
e3c68b
+                needs_shd_check = _gf_true;
e3c68b
+            }
e3c68b
         } else if (event == GF_EVENT_CHILD_DOWN) {
e3c68b
             ec_set_up_state(ec, mask, 0);
e3c68b
         }
e3c68b
@@ -633,17 +641,21 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
e3c68b
             }
e3c68b
         } else {
e3c68b
             propagate = _gf_false;
e3c68b
+            needs_shd_check = _gf_false;
e3c68b
+        }
e3c68b
+
e3c68b
+        if (needs_shd_check) {
e3c68b
+            GF_ATOMIC_INC(ec->async_fop_count);
e3c68b
         }
e3c68b
     }
e3c68b
 unlock:
e3c68b
     UNLOCK(&ec->lock);
e3c68b
 
e3c68b
 done:
e3c68b
+    if (needs_shd_check) {
e3c68b
+        ec_launch_replace_heal(ec);
e3c68b
+    }
e3c68b
     if (propagate) {
e3c68b
-        if (needs_shd_check && ec->shd.iamshd) {
e3c68b
-            ec_launch_replace_heal(ec);
e3c68b
-        }
e3c68b
-
e3c68b
         error = default_notify(this, event, data);
e3c68b
     }
e3c68b
 
e3c68b
@@ -705,6 +717,7 @@ init(xlator_t *this)
e3c68b
     ec->xl = this;
e3c68b
     LOCK_INIT(&ec->lock);
e3c68b
 
e3c68b
+    GF_ATOMIC_INIT(ec->async_fop_count, 0);
e3c68b
     INIT_LIST_HEAD(&ec->pending_fops);
e3c68b
     INIT_LIST_HEAD(&ec->heal_waiting);
e3c68b
     INIT_LIST_HEAD(&ec->healing);
e3c68b
-- 
e3c68b
1.8.3.1
e3c68b