74b1de
From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001
74b1de
From: Mohammed Rafi KC <rkavunga@redhat.com>
74b1de
Date: Mon, 29 Apr 2019 13:22:32 +0530
74b1de
Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec
74b1de
 fini
74b1de
74b1de
We were not properly cleaning self-heal daemon resources
74b1de
during ec fini. With shd multiplexing, it is absolutely
74b1de
necessary to cleanup all the resources during ec fini.
74b1de
74b1de
Back port of
74b1de
 upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/
74b1de
 >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
74b1de
 >fixes: bz#1703948
74b1de
 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
74b1de
74b1de
BUG: 1703434
74b1de
Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217
74b1de
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
74b1de
Reviewed-on: https://code.engineering.redhat.com/gerrit/169994
74b1de
Tested-by: RHGS Build Bot <nigelb@redhat.com>
74b1de
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
74b1de
---
74b1de
 libglusterfs/src/syncop-utils.c          |  2 +
74b1de
 xlators/cluster/afr/src/afr-self-heald.c |  5 +++
74b1de
 xlators/cluster/ec/src/ec-heald.c        | 77 +++++++++++++++++++++++++++-----
74b1de
 xlators/cluster/ec/src/ec-heald.h        |  3 ++
74b1de
 xlators/cluster/ec/src/ec-messages.h     |  3 +-
74b1de
 xlators/cluster/ec/src/ec.c              | 47 +++++++++++++++++++
74b1de
 6 files changed, 124 insertions(+), 13 deletions(-)
74b1de
74b1de
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
74b1de
index b842142..4167db4 100644
74b1de
--- a/libglusterfs/src/syncop-utils.c
74b1de
+++ b/libglusterfs/src/syncop-utils.c
74b1de
@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
74b1de
 
74b1de
     if (frame) {
74b1de
         this = frame->this;
74b1de
+    } else {
74b1de
+        this = THIS;
74b1de
     }
74b1de
 
74b1de
     /*For this functionality to be implemented in general, we need
74b1de
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
74b1de
index 8bc4720..522fe5d 100644
74b1de
--- a/xlators/cluster/afr/src/afr-self-heald.c
74b1de
+++ b/xlators/cluster/afr/src/afr-self-heald.c
74b1de
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
74b1de
     afr_private_t *priv = NULL;
74b1de
 
74b1de
     priv = this->private;
74b1de
+
74b1de
+    if (this->cleanup_starting) {
74b1de
+        return -ENOTCONN;
74b1de
+    }
74b1de
+
74b1de
     if (!priv->shd.enabled)
74b1de
         return -EBUSY;
74b1de
 
74b1de
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
74b1de
index cba111a..edf5e11 100644
74b1de
--- a/xlators/cluster/ec/src/ec-heald.c
74b1de
+++ b/xlators/cluster/ec/src/ec-heald.c
74b1de
@@ -71,6 +71,11 @@ disabled_loop:
74b1de
             break;
74b1de
     }
74b1de
 
74b1de
+    if (ec->shutdown) {
74b1de
+        healer->running = _gf_false;
74b1de
+        return -1;
74b1de
+    }
74b1de
+
74b1de
     ret = healer->rerun;
74b1de
     healer->rerun = 0;
74b1de
 
74b1de
@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
74b1de
         goto out;
74b1de
     }
74b1de
 
74b1de
+    _mask_cancellation();
74b1de
     ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
74b1de
                              healer, ec_shd_index_heal, xdata,
74b1de
                              ec->shd.max_threads, ec->shd.wait_qlength);
74b1de
+    _unmask_cancellation();
74b1de
 out:
74b1de
     if (xdata)
74b1de
         dict_unref(xdata);
74b1de
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
74b1de
     int ret = 0;
74b1de
 
74b1de
     ec = this->private;
74b1de
+
74b1de
+    if (this->cleanup_starting) {
74b1de
+        return -ENOTCONN;
74b1de
+    }
74b1de
+
74b1de
     if (ec->xl_up_count <= ec->fragments) {
74b1de
         return -ENOTCONN;
74b1de
     }
74b1de
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
74b1de
 {
74b1de
     ec_t *ec = NULL;
74b1de
     loc_t loc = {0};
74b1de
+    int ret = -1;
74b1de
 
74b1de
     ec = healer->this->private;
74b1de
     loc.inode = inode;
74b1de
-    return syncop_ftw(ec->xl_list[healer->subvol], &loc,
74b1de
-                      GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
74b1de
+    _mask_cancellation();
74b1de
+    ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
74b1de
+                     GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
74b1de
+    _unmask_cancellation();
74b1de
+    return ret;
74b1de
 }
74b1de
 
74b1de
 void *
74b1de
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
74b1de
 {
74b1de
     struct subvol_healer *healer = NULL;
74b1de
     xlator_t *this = NULL;
74b1de
+    int run = 0;
74b1de
 
74b1de
     healer = data;
74b1de
     THIS = this = healer->this;
74b1de
     ec_t *ec = this->private;
74b1de
 
74b1de
     for (;;) {
74b1de
-        ec_shd_healer_wait(healer);
74b1de
+        run = ec_shd_healer_wait(healer);
74b1de
+        if (run == -1)
74b1de
+            break;
74b1de
 
74b1de
         if (ec->xl_up_count > ec->fragments) {
74b1de
             gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
74b1de
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)
74b1de
 
74b1de
     rootloc.inode = this->itable->root;
74b1de
     for (;;) {
74b1de
-        pthread_mutex_lock(&healer->mutex);
74b1de
-        {
74b1de
-            run = __ec_shd_healer_wait(healer);
74b1de
-            if (!run)
74b1de
-                healer->running = _gf_false;
74b1de
-        }
74b1de
-        pthread_mutex_unlock(&healer->mutex);
74b1de
-
74b1de
-        if (!run)
74b1de
+        run = ec_shd_healer_wait(healer);
74b1de
+        if (run < 0) {
74b1de
             break;
74b1de
+        } else if (run == 0) {
74b1de
+            continue;
74b1de
+        }
74b1de
 
74b1de
         if (ec->xl_up_count > ec->fragments) {
74b1de
             gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
74b1de
@@ -562,3 +577,41 @@ out:
74b1de
     dict_del(output, this->name);
74b1de
     return ret;
74b1de
 }
74b1de
+
74b1de
+void
74b1de
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
74b1de
+{
74b1de
+    if (!healer)
74b1de
+        return;
74b1de
+
74b1de
+    pthread_cond_destroy(&healer->cond);
74b1de
+    pthread_mutex_destroy(&healer->mutex);
74b1de
+}
74b1de
+
74b1de
+void
74b1de
+ec_selfheal_daemon_fini(xlator_t *this)
74b1de
+{
74b1de
+    struct subvol_healer *healer = NULL;
74b1de
+    ec_self_heald_t *shd = NULL;
74b1de
+    ec_t *priv = NULL;
74b1de
+    int i = 0;
74b1de
+
74b1de
+    priv = this->private;
74b1de
+    if (!priv)
74b1de
+        return;
74b1de
+
74b1de
+    shd = &priv->shd;
74b1de
+    if (!shd->iamshd)
74b1de
+        return;
74b1de
+
74b1de
+    for (i = 0; i < priv->nodes; i++) {
74b1de
+        healer = &shd->index_healers[i];
74b1de
+        ec_destroy_healer_object(this, healer);
74b1de
+
74b1de
+        healer = &shd->full_healers[i];
74b1de
+        ec_destroy_healer_object(this, healer);
74b1de
+    }
74b1de
+
74b1de
+    GF_FREE(shd->index_healers);
74b1de
+    GF_FREE(shd->full_healers);
74b1de
+}
74b1de
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
74b1de
index 2eda2a7..8184cf4 100644
74b1de
--- a/xlators/cluster/ec/src/ec-heald.h
74b1de
+++ b/xlators/cluster/ec/src/ec-heald.h
74b1de
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
74b1de
 void
74b1de
 ec_shd_index_healer_wake(ec_t *ec);
74b1de
 
74b1de
+void
74b1de
+ec_selfheal_daemon_fini(xlator_t *this);
74b1de
+
74b1de
 #endif /* __EC_HEALD_H__ */
74b1de
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
74b1de
index 7c28808..ce299bb 100644
74b1de
--- a/xlators/cluster/ec/src/ec-messages.h
74b1de
+++ b/xlators/cluster/ec/src/ec-messages.h
74b1de
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
74b1de
            EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
74b1de
            EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
74b1de
            EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
74b1de
-           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
74b1de
+           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
74b1de
+           EC_MSG_THREAD_CLEANUP_FAILED);
74b1de
 
74b1de
 #endif /* !_EC_MESSAGES_H_ */
74b1de
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
74b1de
index 3c8013e..264582a 100644
74b1de
--- a/xlators/cluster/ec/src/ec.c
74b1de
+++ b/xlators/cluster/ec/src/ec.c
74b1de
@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)
74b1de
 }
74b1de
 
74b1de
 void
74b1de
+ec_cleanup_healer_object(ec_t *ec)
74b1de
+{
74b1de
+    struct subvol_healer *healer = NULL;
74b1de
+    ec_self_heald_t *shd = NULL;
74b1de
+    void *res = NULL;
74b1de
+    int i = 0;
74b1de
+    gf_boolean_t is_join = _gf_false;
74b1de
+
74b1de
+    shd = &ec->shd;
74b1de
+    if (!shd->iamshd)
74b1de
+        return;
74b1de
+
74b1de
+    for (i = 0; i < ec->nodes; i++) {
74b1de
+        healer = &shd->index_healers[i];
74b1de
+        pthread_mutex_lock(&healer->mutex);
74b1de
+        {
74b1de
+            healer->rerun = 1;
74b1de
+            if (healer->running) {
74b1de
+                pthread_cond_signal(&healer->cond);
74b1de
+                is_join = _gf_true;
74b1de
+            }
74b1de
+        }
74b1de
+        pthread_mutex_unlock(&healer->mutex);
74b1de
+        if (is_join) {
74b1de
+            pthread_join(healer->thread, &res;;
74b1de
+            is_join = _gf_false;
74b1de
+        }
74b1de
+
74b1de
+        healer = &shd->full_healers[i];
74b1de
+        pthread_mutex_lock(&healer->mutex);
74b1de
+        {
74b1de
+            healer->rerun = 1;
74b1de
+            if (healer->running) {
74b1de
+                pthread_cond_signal(&healer->cond);
74b1de
+                is_join = _gf_true;
74b1de
+            }
74b1de
+        }
74b1de
+        pthread_mutex_unlock(&healer->mutex);
74b1de
+        if (is_join) {
74b1de
+            pthread_join(healer->thread, &res;;
74b1de
+            is_join = _gf_false;
74b1de
+        }
74b1de
+    }
74b1de
+}
74b1de
+void
74b1de
 ec_pending_fops_completed(ec_t *ec)
74b1de
 {
74b1de
     if (ec->shutdown) {
74b1de
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
74b1de
         /* If there aren't pending fops running after we have waken up
74b1de
          * them, we immediately propagate the notification. */
74b1de
         propagate = ec_disable_delays(ec);
74b1de
+        ec_cleanup_healer_object(ec);
74b1de
         goto unlock;
74b1de
     }
74b1de
 
74b1de
@@ -759,6 +805,7 @@ failed:
74b1de
 void
74b1de
 fini(xlator_t *this)
74b1de
 {
74b1de
+    ec_selfheal_daemon_fini(this);
74b1de
     __ec_destroy_private(this);
74b1de
 }
74b1de
 
74b1de
-- 
74b1de
1.8.3.1
74b1de