17b94a
From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001
17b94a
From: Mohammed Rafi KC <rkavunga@redhat.com>
17b94a
Date: Mon, 29 Apr 2019 13:22:32 +0530
17b94a
Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec
17b94a
 fini
17b94a
17b94a
We were not properly cleaning self-heal daemon resources
17b94a
during ec fini. With shd multiplexing, it is absolutely
17b94a
necessary to cleanup all the resources during ec fini.
17b94a
17b94a
Back port of
17b94a
 upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/
17b94a
 >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
17b94a
 >fixes: bz#1703948
17b94a
 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
17b94a
17b94a
BUG: 1703434
17b94a
Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217
17b94a
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
17b94a
Reviewed-on: https://code.engineering.redhat.com/gerrit/169994
17b94a
Tested-by: RHGS Build Bot <nigelb@redhat.com>
17b94a
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
17b94a
---
17b94a
 libglusterfs/src/syncop-utils.c          |  2 +
17b94a
 xlators/cluster/afr/src/afr-self-heald.c |  5 +++
17b94a
 xlators/cluster/ec/src/ec-heald.c        | 77 +++++++++++++++++++++++++++-----
17b94a
 xlators/cluster/ec/src/ec-heald.h        |  3 ++
17b94a
 xlators/cluster/ec/src/ec-messages.h     |  3 +-
17b94a
 xlators/cluster/ec/src/ec.c              | 47 +++++++++++++++++++
17b94a
 6 files changed, 124 insertions(+), 13 deletions(-)
17b94a
17b94a
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
17b94a
index b842142..4167db4 100644
17b94a
--- a/libglusterfs/src/syncop-utils.c
17b94a
+++ b/libglusterfs/src/syncop-utils.c
17b94a
@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
17b94a
 
17b94a
     if (frame) {
17b94a
         this = frame->this;
17b94a
+    } else {
17b94a
+        this = THIS;
17b94a
     }
17b94a
 
17b94a
     /*For this functionality to be implemented in general, we need
17b94a
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
17b94a
index 8bc4720..522fe5d 100644
17b94a
--- a/xlators/cluster/afr/src/afr-self-heald.c
17b94a
+++ b/xlators/cluster/afr/src/afr-self-heald.c
17b94a
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
17b94a
     afr_private_t *priv = NULL;
17b94a
 
17b94a
     priv = this->private;
17b94a
+
17b94a
+    if (this->cleanup_starting) {
17b94a
+        return -ENOTCONN;
17b94a
+    }
17b94a
+
17b94a
     if (!priv->shd.enabled)
17b94a
         return -EBUSY;
17b94a
 
17b94a
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
17b94a
index cba111a..edf5e11 100644
17b94a
--- a/xlators/cluster/ec/src/ec-heald.c
17b94a
+++ b/xlators/cluster/ec/src/ec-heald.c
17b94a
@@ -71,6 +71,11 @@ disabled_loop:
17b94a
             break;
17b94a
     }
17b94a
 
17b94a
+    if (ec->shutdown) {
17b94a
+        healer->running = _gf_false;
17b94a
+        return -1;
17b94a
+    }
17b94a
+
17b94a
     ret = healer->rerun;
17b94a
     healer->rerun = 0;
17b94a
 
17b94a
@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
17b94a
         goto out;
17b94a
     }
17b94a
 
17b94a
+    _mask_cancellation();
17b94a
     ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
17b94a
                              healer, ec_shd_index_heal, xdata,
17b94a
                              ec->shd.max_threads, ec->shd.wait_qlength);
17b94a
+    _unmask_cancellation();
17b94a
 out:
17b94a
     if (xdata)
17b94a
         dict_unref(xdata);
17b94a
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
17b94a
     int ret = 0;
17b94a
 
17b94a
     ec = this->private;
17b94a
+
17b94a
+    if (this->cleanup_starting) {
17b94a
+        return -ENOTCONN;
17b94a
+    }
17b94a
+
17b94a
     if (ec->xl_up_count <= ec->fragments) {
17b94a
         return -ENOTCONN;
17b94a
     }
17b94a
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
17b94a
 {
17b94a
     ec_t *ec = NULL;
17b94a
     loc_t loc = {0};
17b94a
+    int ret = -1;
17b94a
 
17b94a
     ec = healer->this->private;
17b94a
     loc.inode = inode;
17b94a
-    return syncop_ftw(ec->xl_list[healer->subvol], &loc,
17b94a
-                      GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
17b94a
+    _mask_cancellation();
17b94a
+    ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
17b94a
+                     GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
17b94a
+    _unmask_cancellation();
17b94a
+    return ret;
17b94a
 }
17b94a
 
17b94a
 void *
17b94a
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
17b94a
 {
17b94a
     struct subvol_healer *healer = NULL;
17b94a
     xlator_t *this = NULL;
17b94a
+    int run = 0;
17b94a
 
17b94a
     healer = data;
17b94a
     THIS = this = healer->this;
17b94a
     ec_t *ec = this->private;
17b94a
 
17b94a
     for (;;) {
17b94a
-        ec_shd_healer_wait(healer);
17b94a
+        run = ec_shd_healer_wait(healer);
17b94a
+        if (run == -1)
17b94a
+            break;
17b94a
 
17b94a
         if (ec->xl_up_count > ec->fragments) {
17b94a
             gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
17b94a
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)
17b94a
 
17b94a
     rootloc.inode = this->itable->root;
17b94a
     for (;;) {
17b94a
-        pthread_mutex_lock(&healer->mutex);
17b94a
-        {
17b94a
-            run = __ec_shd_healer_wait(healer);
17b94a
-            if (!run)
17b94a
-                healer->running = _gf_false;
17b94a
-        }
17b94a
-        pthread_mutex_unlock(&healer->mutex);
17b94a
-
17b94a
-        if (!run)
17b94a
+        run = ec_shd_healer_wait(healer);
17b94a
+        if (run < 0) {
17b94a
             break;
17b94a
+        } else if (run == 0) {
17b94a
+            continue;
17b94a
+        }
17b94a
 
17b94a
         if (ec->xl_up_count > ec->fragments) {
17b94a
             gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
17b94a
@@ -562,3 +577,41 @@ out:
17b94a
     dict_del(output, this->name);
17b94a
     return ret;
17b94a
 }
17b94a
+
17b94a
+void
17b94a
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
17b94a
+{
17b94a
+    if (!healer)
17b94a
+        return;
17b94a
+
17b94a
+    pthread_cond_destroy(&healer->cond);
17b94a
+    pthread_mutex_destroy(&healer->mutex);
17b94a
+}
17b94a
+
17b94a
+void
17b94a
+ec_selfheal_daemon_fini(xlator_t *this)
17b94a
+{
17b94a
+    struct subvol_healer *healer = NULL;
17b94a
+    ec_self_heald_t *shd = NULL;
17b94a
+    ec_t *priv = NULL;
17b94a
+    int i = 0;
17b94a
+
17b94a
+    priv = this->private;
17b94a
+    if (!priv)
17b94a
+        return;
17b94a
+
17b94a
+    shd = &priv->shd;
17b94a
+    if (!shd->iamshd)
17b94a
+        return;
17b94a
+
17b94a
+    for (i = 0; i < priv->nodes; i++) {
17b94a
+        healer = &shd->index_healers[i];
17b94a
+        ec_destroy_healer_object(this, healer);
17b94a
+
17b94a
+        healer = &shd->full_healers[i];
17b94a
+        ec_destroy_healer_object(this, healer);
17b94a
+    }
17b94a
+
17b94a
+    GF_FREE(shd->index_healers);
17b94a
+    GF_FREE(shd->full_healers);
17b94a
+}
17b94a
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
17b94a
index 2eda2a7..8184cf4 100644
17b94a
--- a/xlators/cluster/ec/src/ec-heald.h
17b94a
+++ b/xlators/cluster/ec/src/ec-heald.h
17b94a
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
17b94a
 void
17b94a
 ec_shd_index_healer_wake(ec_t *ec);
17b94a
 
17b94a
+void
17b94a
+ec_selfheal_daemon_fini(xlator_t *this);
17b94a
+
17b94a
 #endif /* __EC_HEALD_H__ */
17b94a
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
17b94a
index 7c28808..ce299bb 100644
17b94a
--- a/xlators/cluster/ec/src/ec-messages.h
17b94a
+++ b/xlators/cluster/ec/src/ec-messages.h
17b94a
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
17b94a
            EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
17b94a
            EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
17b94a
            EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
17b94a
-           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
17b94a
+           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
17b94a
+           EC_MSG_THREAD_CLEANUP_FAILED);
17b94a
 
17b94a
 #endif /* !_EC_MESSAGES_H_ */
17b94a
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
17b94a
index 3c8013e..264582a 100644
17b94a
--- a/xlators/cluster/ec/src/ec.c
17b94a
+++ b/xlators/cluster/ec/src/ec.c
17b94a
@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)
17b94a
 }
17b94a
 
17b94a
 void
17b94a
+ec_cleanup_healer_object(ec_t *ec)
17b94a
+{
17b94a
+    struct subvol_healer *healer = NULL;
17b94a
+    ec_self_heald_t *shd = NULL;
17b94a
+    void *res = NULL;
17b94a
+    int i = 0;
17b94a
+    gf_boolean_t is_join = _gf_false;
17b94a
+
17b94a
+    shd = &ec->shd;
17b94a
+    if (!shd->iamshd)
17b94a
+        return;
17b94a
+
17b94a
+    for (i = 0; i < ec->nodes; i++) {
17b94a
+        healer = &shd->index_healers[i];
17b94a
+        pthread_mutex_lock(&healer->mutex);
17b94a
+        {
17b94a
+            healer->rerun = 1;
17b94a
+            if (healer->running) {
17b94a
+                pthread_cond_signal(&healer->cond);
17b94a
+                is_join = _gf_true;
17b94a
+            }
17b94a
+        }
17b94a
+        pthread_mutex_unlock(&healer->mutex);
17b94a
+        if (is_join) {
17b94a
+            pthread_join(healer->thread, &res;;
17b94a
+            is_join = _gf_false;
17b94a
+        }
17b94a
+
17b94a
+        healer = &shd->full_healers[i];
17b94a
+        pthread_mutex_lock(&healer->mutex);
17b94a
+        {
17b94a
+            healer->rerun = 1;
17b94a
+            if (healer->running) {
17b94a
+                pthread_cond_signal(&healer->cond);
17b94a
+                is_join = _gf_true;
17b94a
+            }
17b94a
+        }
17b94a
+        pthread_mutex_unlock(&healer->mutex);
17b94a
+        if (is_join) {
17b94a
+            pthread_join(healer->thread, &res;;
17b94a
+            is_join = _gf_false;
17b94a
+        }
17b94a
+    }
17b94a
+}
17b94a
+void
17b94a
 ec_pending_fops_completed(ec_t *ec)
17b94a
 {
17b94a
     if (ec->shutdown) {
17b94a
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
17b94a
         /* If there aren't pending fops running after we have waken up
17b94a
          * them, we immediately propagate the notification. */
17b94a
         propagate = ec_disable_delays(ec);
17b94a
+        ec_cleanup_healer_object(ec);
17b94a
         goto unlock;
17b94a
     }
17b94a
 
17b94a
@@ -759,6 +805,7 @@ failed:
17b94a
 void
17b94a
 fini(xlator_t *this)
17b94a
 {
17b94a
+    ec_selfheal_daemon_fini(this);
17b94a
     __ec_destroy_private(this);
17b94a
 }
17b94a
 
17b94a
-- 
17b94a
1.8.3.1
17b94a