14f8ab
From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001
14f8ab
From: Mohammed Rafi KC <rkavunga@redhat.com>
14f8ab
Date: Mon, 29 Apr 2019 13:22:32 +0530
14f8ab
Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec
14f8ab
 fini
14f8ab
14f8ab
We were not properly cleaning self-heal daemon resources
14f8ab
during ec fini. With shd multiplexing, it is absolutely
14f8ab
necessary to cleanup all the resources during ec fini.
14f8ab
14f8ab
Back port of
14f8ab
 upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/
14f8ab
 >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
14f8ab
 >fixes: bz#1703948
14f8ab
 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
14f8ab
14f8ab
BUG: 1703434
14f8ab
Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217
14f8ab
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/169994
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 libglusterfs/src/syncop-utils.c          |  2 +
14f8ab
 xlators/cluster/afr/src/afr-self-heald.c |  5 +++
14f8ab
 xlators/cluster/ec/src/ec-heald.c        | 77 +++++++++++++++++++++++++++-----
14f8ab
 xlators/cluster/ec/src/ec-heald.h        |  3 ++
14f8ab
 xlators/cluster/ec/src/ec-messages.h     |  3 +-
14f8ab
 xlators/cluster/ec/src/ec.c              | 47 +++++++++++++++++++
14f8ab
 6 files changed, 124 insertions(+), 13 deletions(-)
14f8ab
14f8ab
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
14f8ab
index b842142..4167db4 100644
14f8ab
--- a/libglusterfs/src/syncop-utils.c
14f8ab
+++ b/libglusterfs/src/syncop-utils.c
14f8ab
@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
14f8ab
 
14f8ab
     if (frame) {
14f8ab
         this = frame->this;
14f8ab
+    } else {
14f8ab
+        this = THIS;
14f8ab
     }
14f8ab
 
14f8ab
     /*For this functionality to be implemented in general, we need
14f8ab
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
14f8ab
index 8bc4720..522fe5d 100644
14f8ab
--- a/xlators/cluster/afr/src/afr-self-heald.c
14f8ab
+++ b/xlators/cluster/afr/src/afr-self-heald.c
14f8ab
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
14f8ab
     afr_private_t *priv = NULL;
14f8ab
 
14f8ab
     priv = this->private;
14f8ab
+
14f8ab
+    if (this->cleanup_starting) {
14f8ab
+        return -ENOTCONN;
14f8ab
+    }
14f8ab
+
14f8ab
     if (!priv->shd.enabled)
14f8ab
         return -EBUSY;
14f8ab
 
14f8ab
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
14f8ab
index cba111a..edf5e11 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-heald.c
14f8ab
+++ b/xlators/cluster/ec/src/ec-heald.c
14f8ab
@@ -71,6 +71,11 @@ disabled_loop:
14f8ab
             break;
14f8ab
     }
14f8ab
 
14f8ab
+    if (ec->shutdown) {
14f8ab
+        healer->running = _gf_false;
14f8ab
+        return -1;
14f8ab
+    }
14f8ab
+
14f8ab
     ret = healer->rerun;
14f8ab
     healer->rerun = 0;
14f8ab
 
14f8ab
@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
14f8ab
         goto out;
14f8ab
     }
14f8ab
 
14f8ab
+    _mask_cancellation();
14f8ab
     ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
14f8ab
                              healer, ec_shd_index_heal, xdata,
14f8ab
                              ec->shd.max_threads, ec->shd.wait_qlength);
14f8ab
+    _unmask_cancellation();
14f8ab
 out:
14f8ab
     if (xdata)
14f8ab
         dict_unref(xdata);
14f8ab
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
14f8ab
     int ret = 0;
14f8ab
 
14f8ab
     ec = this->private;
14f8ab
+
14f8ab
+    if (this->cleanup_starting) {
14f8ab
+        return -ENOTCONN;
14f8ab
+    }
14f8ab
+
14f8ab
     if (ec->xl_up_count <= ec->fragments) {
14f8ab
         return -ENOTCONN;
14f8ab
     }
14f8ab
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
14f8ab
 {
14f8ab
     ec_t *ec = NULL;
14f8ab
     loc_t loc = {0};
14f8ab
+    int ret = -1;
14f8ab
 
14f8ab
     ec = healer->this->private;
14f8ab
     loc.inode = inode;
14f8ab
-    return syncop_ftw(ec->xl_list[healer->subvol], &loc,
14f8ab
-                      GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
14f8ab
+    _mask_cancellation();
14f8ab
+    ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
14f8ab
+                     GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
14f8ab
+    _unmask_cancellation();
14f8ab
+    return ret;
14f8ab
 }
14f8ab
 
14f8ab
 void *
14f8ab
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
14f8ab
 {
14f8ab
     struct subvol_healer *healer = NULL;
14f8ab
     xlator_t *this = NULL;
14f8ab
+    int run = 0;
14f8ab
 
14f8ab
     healer = data;
14f8ab
     THIS = this = healer->this;
14f8ab
     ec_t *ec = this->private;
14f8ab
 
14f8ab
     for (;;) {
14f8ab
-        ec_shd_healer_wait(healer);
14f8ab
+        run = ec_shd_healer_wait(healer);
14f8ab
+        if (run == -1)
14f8ab
+            break;
14f8ab
 
14f8ab
         if (ec->xl_up_count > ec->fragments) {
14f8ab
             gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
14f8ab
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)
14f8ab
 
14f8ab
     rootloc.inode = this->itable->root;
14f8ab
     for (;;) {
14f8ab
-        pthread_mutex_lock(&healer->mutex);
14f8ab
-        {
14f8ab
-            run = __ec_shd_healer_wait(healer);
14f8ab
-            if (!run)
14f8ab
-                healer->running = _gf_false;
14f8ab
-        }
14f8ab
-        pthread_mutex_unlock(&healer->mutex);
14f8ab
-
14f8ab
-        if (!run)
14f8ab
+        run = ec_shd_healer_wait(healer);
14f8ab
+        if (run < 0) {
14f8ab
             break;
14f8ab
+        } else if (run == 0) {
14f8ab
+            continue;
14f8ab
+        }
14f8ab
 
14f8ab
         if (ec->xl_up_count > ec->fragments) {
14f8ab
             gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
14f8ab
@@ -562,3 +577,41 @@ out:
14f8ab
     dict_del(output, this->name);
14f8ab
     return ret;
14f8ab
 }
14f8ab
+
14f8ab
+void
14f8ab
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
14f8ab
+{
14f8ab
+    if (!healer)
14f8ab
+        return;
14f8ab
+
14f8ab
+    pthread_cond_destroy(&healer->cond);
14f8ab
+    pthread_mutex_destroy(&healer->mutex);
14f8ab
+}
14f8ab
+
14f8ab
+void
14f8ab
+ec_selfheal_daemon_fini(xlator_t *this)
14f8ab
+{
14f8ab
+    struct subvol_healer *healer = NULL;
14f8ab
+    ec_self_heald_t *shd = NULL;
14f8ab
+    ec_t *priv = NULL;
14f8ab
+    int i = 0;
14f8ab
+
14f8ab
+    priv = this->private;
14f8ab
+    if (!priv)
14f8ab
+        return;
14f8ab
+
14f8ab
+    shd = &priv->shd;
14f8ab
+    if (!shd->iamshd)
14f8ab
+        return;
14f8ab
+
14f8ab
+    for (i = 0; i < priv->nodes; i++) {
14f8ab
+        healer = &shd->index_healers[i];
14f8ab
+        ec_destroy_healer_object(this, healer);
14f8ab
+
14f8ab
+        healer = &shd->full_healers[i];
14f8ab
+        ec_destroy_healer_object(this, healer);
14f8ab
+    }
14f8ab
+
14f8ab
+    GF_FREE(shd->index_healers);
14f8ab
+    GF_FREE(shd->full_healers);
14f8ab
+}
14f8ab
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
14f8ab
index 2eda2a7..8184cf4 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-heald.h
14f8ab
+++ b/xlators/cluster/ec/src/ec-heald.h
14f8ab
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
14f8ab
 void
14f8ab
 ec_shd_index_healer_wake(ec_t *ec);
14f8ab
 
14f8ab
+void
14f8ab
+ec_selfheal_daemon_fini(xlator_t *this);
14f8ab
+
14f8ab
 #endif /* __EC_HEALD_H__ */
14f8ab
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
14f8ab
index 7c28808..ce299bb 100644
14f8ab
--- a/xlators/cluster/ec/src/ec-messages.h
14f8ab
+++ b/xlators/cluster/ec/src/ec-messages.h
14f8ab
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
14f8ab
            EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
14f8ab
            EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
14f8ab
            EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
14f8ab
-           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
14f8ab
+           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
14f8ab
+           EC_MSG_THREAD_CLEANUP_FAILED);
14f8ab
 
14f8ab
 #endif /* !_EC_MESSAGES_H_ */
14f8ab
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
14f8ab
index 3c8013e..264582a 100644
14f8ab
--- a/xlators/cluster/ec/src/ec.c
14f8ab
+++ b/xlators/cluster/ec/src/ec.c
14f8ab
@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)
14f8ab
 }
14f8ab
 
14f8ab
 void
14f8ab
+ec_cleanup_healer_object(ec_t *ec)
14f8ab
+{
14f8ab
+    struct subvol_healer *healer = NULL;
14f8ab
+    ec_self_heald_t *shd = NULL;
14f8ab
+    void *res = NULL;
14f8ab
+    int i = 0;
14f8ab
+    gf_boolean_t is_join = _gf_false;
14f8ab
+
14f8ab
+    shd = &ec->shd;
14f8ab
+    if (!shd->iamshd)
14f8ab
+        return;
14f8ab
+
14f8ab
+    for (i = 0; i < ec->nodes; i++) {
14f8ab
+        healer = &shd->index_healers[i];
14f8ab
+        pthread_mutex_lock(&healer->mutex);
14f8ab
+        {
14f8ab
+            healer->rerun = 1;
14f8ab
+            if (healer->running) {
14f8ab
+                pthread_cond_signal(&healer->cond);
14f8ab
+                is_join = _gf_true;
14f8ab
+            }
14f8ab
+        }
14f8ab
+        pthread_mutex_unlock(&healer->mutex);
14f8ab
+        if (is_join) {
14f8ab
+            pthread_join(healer->thread, &res;;
14f8ab
+            is_join = _gf_false;
14f8ab
+        }
14f8ab
+
14f8ab
+        healer = &shd->full_healers[i];
14f8ab
+        pthread_mutex_lock(&healer->mutex);
14f8ab
+        {
14f8ab
+            healer->rerun = 1;
14f8ab
+            if (healer->running) {
14f8ab
+                pthread_cond_signal(&healer->cond);
14f8ab
+                is_join = _gf_true;
14f8ab
+            }
14f8ab
+        }
14f8ab
+        pthread_mutex_unlock(&healer->mutex);
14f8ab
+        if (is_join) {
14f8ab
+            pthread_join(healer->thread, &res;;
14f8ab
+            is_join = _gf_false;
14f8ab
+        }
14f8ab
+    }
14f8ab
+}
14f8ab
+void
14f8ab
 ec_pending_fops_completed(ec_t *ec)
14f8ab
 {
14f8ab
     if (ec->shutdown) {
14f8ab
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
14f8ab
         /* If there aren't pending fops running after we have waken up
14f8ab
          * them, we immediately propagate the notification. */
14f8ab
         propagate = ec_disable_delays(ec);
14f8ab
+        ec_cleanup_healer_object(ec);
14f8ab
         goto unlock;
14f8ab
     }
14f8ab
 
14f8ab
@@ -759,6 +805,7 @@ failed:
14f8ab
 void
14f8ab
 fini(xlator_t *this)
14f8ab
 {
14f8ab
+    ec_selfheal_daemon_fini(this);
14f8ab
     __ec_destroy_private(this);
14f8ab
 }
14f8ab
 
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab