50dc83
From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001
50dc83
From: Mohammed Rafi KC <rkavunga@redhat.com>
50dc83
Date: Mon, 29 Apr 2019 13:22:32 +0530
50dc83
Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec
50dc83
 fini
50dc83
50dc83
We were not properly cleaning self-heal daemon resources
50dc83
during ec fini. With shd multiplexing, it is absolutely
50dc83
necessary to cleanup all the resources during ec fini.
50dc83
50dc83
Back port of
50dc83
 upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/
50dc83
 >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
50dc83
 >fixes: bz#1703948
50dc83
 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
50dc83
50dc83
BUG: 1703434
50dc83
Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217
50dc83
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
50dc83
Reviewed-on: https://code.engineering.redhat.com/gerrit/169994
50dc83
Tested-by: RHGS Build Bot <nigelb@redhat.com>
50dc83
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
50dc83
---
50dc83
 libglusterfs/src/syncop-utils.c          |  2 +
50dc83
 xlators/cluster/afr/src/afr-self-heald.c |  5 +++
50dc83
 xlators/cluster/ec/src/ec-heald.c        | 77 +++++++++++++++++++++++++++-----
50dc83
 xlators/cluster/ec/src/ec-heald.h        |  3 ++
50dc83
 xlators/cluster/ec/src/ec-messages.h     |  3 +-
50dc83
 xlators/cluster/ec/src/ec.c              | 47 +++++++++++++++++++
50dc83
 6 files changed, 124 insertions(+), 13 deletions(-)
50dc83
50dc83
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
50dc83
index b842142..4167db4 100644
50dc83
--- a/libglusterfs/src/syncop-utils.c
50dc83
+++ b/libglusterfs/src/syncop-utils.c
50dc83
@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
50dc83
 
50dc83
     if (frame) {
50dc83
         this = frame->this;
50dc83
+    } else {
50dc83
+        this = THIS;
50dc83
     }
50dc83
 
50dc83
     /*For this functionality to be implemented in general, we need
50dc83
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
50dc83
index 8bc4720..522fe5d 100644
50dc83
--- a/xlators/cluster/afr/src/afr-self-heald.c
50dc83
+++ b/xlators/cluster/afr/src/afr-self-heald.c
50dc83
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
50dc83
     afr_private_t *priv = NULL;
50dc83
 
50dc83
     priv = this->private;
50dc83
+
50dc83
+    if (this->cleanup_starting) {
50dc83
+        return -ENOTCONN;
50dc83
+    }
50dc83
+
50dc83
     if (!priv->shd.enabled)
50dc83
         return -EBUSY;
50dc83
 
50dc83
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
50dc83
index cba111a..edf5e11 100644
50dc83
--- a/xlators/cluster/ec/src/ec-heald.c
50dc83
+++ b/xlators/cluster/ec/src/ec-heald.c
50dc83
@@ -71,6 +71,11 @@ disabled_loop:
50dc83
             break;
50dc83
     }
50dc83
 
50dc83
+    if (ec->shutdown) {
50dc83
+        healer->running = _gf_false;
50dc83
+        return -1;
50dc83
+    }
50dc83
+
50dc83
     ret = healer->rerun;
50dc83
     healer->rerun = 0;
50dc83
 
50dc83
@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
50dc83
         goto out;
50dc83
     }
50dc83
 
50dc83
+    _mask_cancellation();
50dc83
     ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
50dc83
                              healer, ec_shd_index_heal, xdata,
50dc83
                              ec->shd.max_threads, ec->shd.wait_qlength);
50dc83
+    _unmask_cancellation();
50dc83
 out:
50dc83
     if (xdata)
50dc83
         dict_unref(xdata);
50dc83
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
50dc83
     int ret = 0;
50dc83
 
50dc83
     ec = this->private;
50dc83
+
50dc83
+    if (this->cleanup_starting) {
50dc83
+        return -ENOTCONN;
50dc83
+    }
50dc83
+
50dc83
     if (ec->xl_up_count <= ec->fragments) {
50dc83
         return -ENOTCONN;
50dc83
     }
50dc83
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
50dc83
 {
50dc83
     ec_t *ec = NULL;
50dc83
     loc_t loc = {0};
50dc83
+    int ret = -1;
50dc83
 
50dc83
     ec = healer->this->private;
50dc83
     loc.inode = inode;
50dc83
-    return syncop_ftw(ec->xl_list[healer->subvol], &loc,
50dc83
-                      GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
50dc83
+    _mask_cancellation();
50dc83
+    ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
50dc83
+                     GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
50dc83
+    _unmask_cancellation();
50dc83
+    return ret;
50dc83
 }
50dc83
 
50dc83
 void *
50dc83
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
50dc83
 {
50dc83
     struct subvol_healer *healer = NULL;
50dc83
     xlator_t *this = NULL;
50dc83
+    int run = 0;
50dc83
 
50dc83
     healer = data;
50dc83
     THIS = this = healer->this;
50dc83
     ec_t *ec = this->private;
50dc83
 
50dc83
     for (;;) {
50dc83
-        ec_shd_healer_wait(healer);
50dc83
+        run = ec_shd_healer_wait(healer);
50dc83
+        if (run == -1)
50dc83
+            break;
50dc83
 
50dc83
         if (ec->xl_up_count > ec->fragments) {
50dc83
             gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
50dc83
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)
50dc83
 
50dc83
     rootloc.inode = this->itable->root;
50dc83
     for (;;) {
50dc83
-        pthread_mutex_lock(&healer->mutex);
50dc83
-        {
50dc83
-            run = __ec_shd_healer_wait(healer);
50dc83
-            if (!run)
50dc83
-                healer->running = _gf_false;
50dc83
-        }
50dc83
-        pthread_mutex_unlock(&healer->mutex);
50dc83
-
50dc83
-        if (!run)
50dc83
+        run = ec_shd_healer_wait(healer);
50dc83
+        if (run < 0) {
50dc83
             break;
50dc83
+        } else if (run == 0) {
50dc83
+            continue;
50dc83
+        }
50dc83
 
50dc83
         if (ec->xl_up_count > ec->fragments) {
50dc83
             gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
50dc83
@@ -562,3 +577,41 @@ out:
50dc83
     dict_del(output, this->name);
50dc83
     return ret;
50dc83
 }
50dc83
+
50dc83
+void
50dc83
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
50dc83
+{
50dc83
+    if (!healer)
50dc83
+        return;
50dc83
+
50dc83
+    pthread_cond_destroy(&healer->cond);
50dc83
+    pthread_mutex_destroy(&healer->mutex);
50dc83
+}
50dc83
+
50dc83
+void
50dc83
+ec_selfheal_daemon_fini(xlator_t *this)
50dc83
+{
50dc83
+    struct subvol_healer *healer = NULL;
50dc83
+    ec_self_heald_t *shd = NULL;
50dc83
+    ec_t *priv = NULL;
50dc83
+    int i = 0;
50dc83
+
50dc83
+    priv = this->private;
50dc83
+    if (!priv)
50dc83
+        return;
50dc83
+
50dc83
+    shd = &priv->shd;
50dc83
+    if (!shd->iamshd)
50dc83
+        return;
50dc83
+
50dc83
+    for (i = 0; i < priv->nodes; i++) {
50dc83
+        healer = &shd->index_healers[i];
50dc83
+        ec_destroy_healer_object(this, healer);
50dc83
+
50dc83
+        healer = &shd->full_healers[i];
50dc83
+        ec_destroy_healer_object(this, healer);
50dc83
+    }
50dc83
+
50dc83
+    GF_FREE(shd->index_healers);
50dc83
+    GF_FREE(shd->full_healers);
50dc83
+}
50dc83
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
50dc83
index 2eda2a7..8184cf4 100644
50dc83
--- a/xlators/cluster/ec/src/ec-heald.h
50dc83
+++ b/xlators/cluster/ec/src/ec-heald.h
50dc83
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
50dc83
 void
50dc83
 ec_shd_index_healer_wake(ec_t *ec);
50dc83
 
50dc83
+void
50dc83
+ec_selfheal_daemon_fini(xlator_t *this);
50dc83
+
50dc83
 #endif /* __EC_HEALD_H__ */
50dc83
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
50dc83
index 7c28808..ce299bb 100644
50dc83
--- a/xlators/cluster/ec/src/ec-messages.h
50dc83
+++ b/xlators/cluster/ec/src/ec-messages.h
50dc83
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
50dc83
            EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
50dc83
            EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
50dc83
            EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
50dc83
-           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
50dc83
+           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
50dc83
+           EC_MSG_THREAD_CLEANUP_FAILED);
50dc83
 
50dc83
 #endif /* !_EC_MESSAGES_H_ */
50dc83
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
50dc83
index 3c8013e..264582a 100644
50dc83
--- a/xlators/cluster/ec/src/ec.c
50dc83
+++ b/xlators/cluster/ec/src/ec.c
50dc83
@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)
50dc83
 }
50dc83
 
50dc83
 void
50dc83
+ec_cleanup_healer_object(ec_t *ec)
50dc83
+{
50dc83
+    struct subvol_healer *healer = NULL;
50dc83
+    ec_self_heald_t *shd = NULL;
50dc83
+    void *res = NULL;
50dc83
+    int i = 0;
50dc83
+    gf_boolean_t is_join = _gf_false;
50dc83
+
50dc83
+    shd = &ec->shd;
50dc83
+    if (!shd->iamshd)
50dc83
+        return;
50dc83
+
50dc83
+    for (i = 0; i < ec->nodes; i++) {
50dc83
+        healer = &shd->index_healers[i];
50dc83
+        pthread_mutex_lock(&healer->mutex);
50dc83
+        {
50dc83
+            healer->rerun = 1;
50dc83
+            if (healer->running) {
50dc83
+                pthread_cond_signal(&healer->cond);
50dc83
+                is_join = _gf_true;
50dc83
+            }
50dc83
+        }
50dc83
+        pthread_mutex_unlock(&healer->mutex);
50dc83
+        if (is_join) {
50dc83
+            pthread_join(healer->thread, &res;;
50dc83
+            is_join = _gf_false;
50dc83
+        }
50dc83
+
50dc83
+        healer = &shd->full_healers[i];
50dc83
+        pthread_mutex_lock(&healer->mutex);
50dc83
+        {
50dc83
+            healer->rerun = 1;
50dc83
+            if (healer->running) {
50dc83
+                pthread_cond_signal(&healer->cond);
50dc83
+                is_join = _gf_true;
50dc83
+            }
50dc83
+        }
50dc83
+        pthread_mutex_unlock(&healer->mutex);
50dc83
+        if (is_join) {
50dc83
+            pthread_join(healer->thread, &res;;
50dc83
+            is_join = _gf_false;
50dc83
+        }
50dc83
+    }
50dc83
+}
50dc83
+void
50dc83
 ec_pending_fops_completed(ec_t *ec)
50dc83
 {
50dc83
     if (ec->shutdown) {
50dc83
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
50dc83
         /* If there aren't pending fops running after we have waken up
50dc83
          * them, we immediately propagate the notification. */
50dc83
         propagate = ec_disable_delays(ec);
50dc83
+        ec_cleanup_healer_object(ec);
50dc83
         goto unlock;
50dc83
     }
50dc83
 
50dc83
@@ -759,6 +805,7 @@ failed:
50dc83
 void
50dc83
 fini(xlator_t *this)
50dc83
 {
50dc83
+    ec_selfheal_daemon_fini(this);
50dc83
     __ec_destroy_private(this);
50dc83
 }
50dc83
 
50dc83
-- 
50dc83
1.8.3.1
50dc83