233933
From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001
233933
From: Mohammed Rafi KC <rkavunga@redhat.com>
233933
Date: Mon, 29 Apr 2019 13:22:32 +0530
233933
Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec
233933
 fini
233933
233933
We were not properly cleaning self-heal daemon resources
233933
during ec fini. With shd multiplexing, it is absolutely
233933
necessary to cleanup all the resources during ec fini.
233933
233933
Back port of
233933
 upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/
233933
 >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
233933
 >fixes: bz#1703948
233933
 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
233933
233933
BUG: 1703434
233933
Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217
233933
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
233933
Reviewed-on: https://code.engineering.redhat.com/gerrit/169994
233933
Tested-by: RHGS Build Bot <nigelb@redhat.com>
233933
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
233933
---
233933
 libglusterfs/src/syncop-utils.c          |  2 +
233933
 xlators/cluster/afr/src/afr-self-heald.c |  5 +++
233933
 xlators/cluster/ec/src/ec-heald.c        | 77 +++++++++++++++++++++++++++-----
233933
 xlators/cluster/ec/src/ec-heald.h        |  3 ++
233933
 xlators/cluster/ec/src/ec-messages.h     |  3 +-
233933
 xlators/cluster/ec/src/ec.c              | 47 +++++++++++++++++++
233933
 6 files changed, 124 insertions(+), 13 deletions(-)
233933
233933
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
233933
index b842142..4167db4 100644
233933
--- a/libglusterfs/src/syncop-utils.c
233933
+++ b/libglusterfs/src/syncop-utils.c
233933
@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
233933
 
233933
     if (frame) {
233933
         this = frame->this;
233933
+    } else {
233933
+        this = THIS;
233933
     }
233933
 
233933
     /*For this functionality to be implemented in general, we need
233933
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
233933
index 8bc4720..522fe5d 100644
233933
--- a/xlators/cluster/afr/src/afr-self-heald.c
233933
+++ b/xlators/cluster/afr/src/afr-self-heald.c
233933
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
233933
     afr_private_t *priv = NULL;
233933
 
233933
     priv = this->private;
233933
+
233933
+    if (this->cleanup_starting) {
233933
+        return -ENOTCONN;
233933
+    }
233933
+
233933
     if (!priv->shd.enabled)
233933
         return -EBUSY;
233933
 
233933
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
233933
index cba111a..edf5e11 100644
233933
--- a/xlators/cluster/ec/src/ec-heald.c
233933
+++ b/xlators/cluster/ec/src/ec-heald.c
233933
@@ -71,6 +71,11 @@ disabled_loop:
233933
             break;
233933
     }
233933
 
233933
+    if (ec->shutdown) {
233933
+        healer->running = _gf_false;
233933
+        return -1;
233933
+    }
233933
+
233933
     ret = healer->rerun;
233933
     healer->rerun = 0;
233933
 
233933
@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
233933
         goto out;
233933
     }
233933
 
233933
+    _mask_cancellation();
233933
     ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
233933
                              healer, ec_shd_index_heal, xdata,
233933
                              ec->shd.max_threads, ec->shd.wait_qlength);
233933
+    _unmask_cancellation();
233933
 out:
233933
     if (xdata)
233933
         dict_unref(xdata);
233933
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
233933
     int ret = 0;
233933
 
233933
     ec = this->private;
233933
+
233933
+    if (this->cleanup_starting) {
233933
+        return -ENOTCONN;
233933
+    }
233933
+
233933
     if (ec->xl_up_count <= ec->fragments) {
233933
         return -ENOTCONN;
233933
     }
233933
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
233933
 {
233933
     ec_t *ec = NULL;
233933
     loc_t loc = {0};
233933
+    int ret = -1;
233933
 
233933
     ec = healer->this->private;
233933
     loc.inode = inode;
233933
-    return syncop_ftw(ec->xl_list[healer->subvol], &loc,
233933
-                      GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
233933
+    _mask_cancellation();
233933
+    ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
233933
+                     GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
233933
+    _unmask_cancellation();
233933
+    return ret;
233933
 }
233933
 
233933
 void *
233933
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
233933
 {
233933
     struct subvol_healer *healer = NULL;
233933
     xlator_t *this = NULL;
233933
+    int run = 0;
233933
 
233933
     healer = data;
233933
     THIS = this = healer->this;
233933
     ec_t *ec = this->private;
233933
 
233933
     for (;;) {
233933
-        ec_shd_healer_wait(healer);
233933
+        run = ec_shd_healer_wait(healer);
233933
+        if (run == -1)
233933
+            break;
233933
 
233933
         if (ec->xl_up_count > ec->fragments) {
233933
             gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
233933
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)
233933
 
233933
     rootloc.inode = this->itable->root;
233933
     for (;;) {
233933
-        pthread_mutex_lock(&healer->mutex);
233933
-        {
233933
-            run = __ec_shd_healer_wait(healer);
233933
-            if (!run)
233933
-                healer->running = _gf_false;
233933
-        }
233933
-        pthread_mutex_unlock(&healer->mutex);
233933
-
233933
-        if (!run)
233933
+        run = ec_shd_healer_wait(healer);
233933
+        if (run < 0) {
233933
             break;
233933
+        } else if (run == 0) {
233933
+            continue;
233933
+        }
233933
 
233933
         if (ec->xl_up_count > ec->fragments) {
233933
             gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
233933
@@ -562,3 +577,41 @@ out:
233933
     dict_del(output, this->name);
233933
     return ret;
233933
 }
233933
+
233933
+void
233933
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
233933
+{
233933
+    if (!healer)
233933
+        return;
233933
+
233933
+    pthread_cond_destroy(&healer->cond);
233933
+    pthread_mutex_destroy(&healer->mutex);
233933
+}
233933
+
233933
+void
233933
+ec_selfheal_daemon_fini(xlator_t *this)
233933
+{
233933
+    struct subvol_healer *healer = NULL;
233933
+    ec_self_heald_t *shd = NULL;
233933
+    ec_t *priv = NULL;
233933
+    int i = 0;
233933
+
233933
+    priv = this->private;
233933
+    if (!priv)
233933
+        return;
233933
+
233933
+    shd = &priv->shd;
233933
+    if (!shd->iamshd)
233933
+        return;
233933
+
233933
+    for (i = 0; i < priv->nodes; i++) {
233933
+        healer = &shd->index_healers[i];
233933
+        ec_destroy_healer_object(this, healer);
233933
+
233933
+        healer = &shd->full_healers[i];
233933
+        ec_destroy_healer_object(this, healer);
233933
+    }
233933
+
233933
+    GF_FREE(shd->index_healers);
233933
+    GF_FREE(shd->full_healers);
233933
+}
233933
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
233933
index 2eda2a7..8184cf4 100644
233933
--- a/xlators/cluster/ec/src/ec-heald.h
233933
+++ b/xlators/cluster/ec/src/ec-heald.h
233933
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
233933
 void
233933
 ec_shd_index_healer_wake(ec_t *ec);
233933
 
233933
+void
233933
+ec_selfheal_daemon_fini(xlator_t *this);
233933
+
233933
 #endif /* __EC_HEALD_H__ */
233933
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
233933
index 7c28808..ce299bb 100644
233933
--- a/xlators/cluster/ec/src/ec-messages.h
233933
+++ b/xlators/cluster/ec/src/ec-messages.h
233933
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
233933
            EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
233933
            EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
233933
            EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
233933
-           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
233933
+           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
233933
+           EC_MSG_THREAD_CLEANUP_FAILED);
233933
 
233933
 #endif /* !_EC_MESSAGES_H_ */
233933
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
233933
index 3c8013e..264582a 100644
233933
--- a/xlators/cluster/ec/src/ec.c
233933
+++ b/xlators/cluster/ec/src/ec.c
233933
@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)
233933
 }
233933
 
233933
 void
233933
+ec_cleanup_healer_object(ec_t *ec)
233933
+{
233933
+    struct subvol_healer *healer = NULL;
233933
+    ec_self_heald_t *shd = NULL;
233933
+    void *res = NULL;
233933
+    int i = 0;
233933
+    gf_boolean_t is_join = _gf_false;
233933
+
233933
+    shd = &ec->shd;
233933
+    if (!shd->iamshd)
233933
+        return;
233933
+
233933
+    for (i = 0; i < ec->nodes; i++) {
233933
+        healer = &shd->index_healers[i];
233933
+        pthread_mutex_lock(&healer->mutex);
233933
+        {
233933
+            healer->rerun = 1;
233933
+            if (healer->running) {
233933
+                pthread_cond_signal(&healer->cond);
233933
+                is_join = _gf_true;
233933
+            }
233933
+        }
233933
+        pthread_mutex_unlock(&healer->mutex);
233933
+        if (is_join) {
233933
+            pthread_join(healer->thread, &res;;
233933
+            is_join = _gf_false;
233933
+        }
233933
+
233933
+        healer = &shd->full_healers[i];
233933
+        pthread_mutex_lock(&healer->mutex);
233933
+        {
233933
+            healer->rerun = 1;
233933
+            if (healer->running) {
233933
+                pthread_cond_signal(&healer->cond);
233933
+                is_join = _gf_true;
233933
+            }
233933
+        }
233933
+        pthread_mutex_unlock(&healer->mutex);
233933
+        if (is_join) {
233933
+            pthread_join(healer->thread, &res;;
233933
+            is_join = _gf_false;
233933
+        }
233933
+    }
233933
+}
233933
+void
233933
 ec_pending_fops_completed(ec_t *ec)
233933
 {
233933
     if (ec->shutdown) {
233933
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
233933
         /* If there aren't pending fops running after we have waken up
233933
          * them, we immediately propagate the notification. */
233933
         propagate = ec_disable_delays(ec);
233933
+        ec_cleanup_healer_object(ec);
233933
         goto unlock;
233933
     }
233933
 
233933
@@ -759,6 +805,7 @@ failed:
233933
 void
233933
 fini(xlator_t *this)
233933
 {
233933
+    ec_selfheal_daemon_fini(this);
233933
     __ec_destroy_private(this);
233933
 }
233933
 
233933
-- 
233933
1.8.3.1
233933