21ab4e
From 68aa01d409abe1ae4096461d020f5ba43e805c82 Mon Sep 17 00:00:00 2001
21ab4e
From: Atin Mukherjee <amukherj@redhat.com>
21ab4e
Date: Fri, 19 May 2017 21:04:53 +0530
21ab4e
Subject: [PATCH 481/486] glusterfsd: process attach and detach request inside
21ab4e
 lock
21ab4e
21ab4e
With brick multiplexing, there is a high possibility that attach and
21ab4e
detach requests might be parallely processed and to avoid a concurrent
21ab4e
update to the same graph list, a mutex lock is required.
21ab4e
21ab4e
Credits : Rafi (rkavunga@redhat.com) for the RCA of this issue
21ab4e
21ab4e
>Reviewed-on: https://review.gluster.org/17374
21ab4e
>Smoke: Gluster Build System <jenkins@build.gluster.org>
21ab4e
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
21ab4e
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
21ab4e
>Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
21ab4e
21ab4e
Change-Id: Ic8e6d1708655c8a143c5a3690968dfa572a32a9c
21ab4e
BUG: 1448833
21ab4e
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/107687
21ab4e
---
21ab4e
 glusterfsd/src/glusterfsd-mgmt.c               | 173 ++++++++++++++-----------
21ab4e
 xlators/protocol/server/src/server-handshake.c |   9 +-
21ab4e
 2 files changed, 105 insertions(+), 77 deletions(-)
21ab4e
21ab4e
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
21ab4e
index 2d84146..6256030 100644
21ab4e
--- a/glusterfsd/src/glusterfsd-mgmt.c
21ab4e
+++ b/glusterfsd/src/glusterfsd-mgmt.c
21ab4e
@@ -200,8 +200,9 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
21ab4e
 {
21ab4e
         gd1_mgmt_brick_op_req   xlator_req      = {0,};
21ab4e
         ssize_t                 ret;
21ab4e
-        xlator_t                *top;
21ab4e
-        xlator_t                *victim;
21ab4e
+        xlator_t                *top = NULL;
21ab4e
+        xlator_t                *victim = NULL;
21ab4e
+        glusterfs_ctx_t         *ctx    = NULL;
21ab4e
         xlator_list_t           **trav_p;
21ab4e
 
21ab4e
         ret = xdr_to_generic (req->msg[0], &xlator_req,
21ab4e
@@ -210,52 +211,62 @@ glusterfs_handle_terminate (rpcsvc_request_t *req)
21ab4e
                 req->rpc_err = GARBAGE_ARGS;
21ab4e
                 return -1;
21ab4e
         }
21ab4e
+        ctx = glusterfsd_ctx;
21ab4e
 
21ab4e
-        /* Find the xlator_list_t that points to our victim. */
21ab4e
-        top = glusterfsd_ctx->active->first;
21ab4e
-        for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
21ab4e
-                victim = (*trav_p)->xlator;
21ab4e
-                if (strcmp (victim->name, xlator_req.name) == 0) {
21ab4e
-                        break;
21ab4e
+        LOCK (&ctx->volfile_lock);
21ab4e
+        {
21ab4e
+                /* Find the xlator_list_t that points to our victim. */
21ab4e
+                top = glusterfsd_ctx->active->first;
21ab4e
+                for (trav_p = &top->children; *trav_p;
21ab4e
+                     trav_p = &(*trav_p)->next) {
21ab4e
+                        victim = (*trav_p)->xlator;
21ab4e
+                        if (strcmp (victim->name, xlator_req.name) == 0) {
21ab4e
+                                break;
21ab4e
+                        }
21ab4e
                 }
21ab4e
-        }
21ab4e
 
21ab4e
-        if (!*trav_p) {
21ab4e
-                gf_log (THIS->name, GF_LOG_ERROR,
21ab4e
-                        "can't terminate %s - not found", xlator_req.name);
21ab4e
-                /*
21ab4e
-                 * Used to be -ENOENT.  However, the caller asked us to make
21ab4e
-                 * sure it's down and if it's already down that's good enough.
21ab4e
-                 */
21ab4e
-                glusterfs_terminate_response_send (req, 0);
21ab4e
-                goto err;
21ab4e
-        }
21ab4e
+                if (!*trav_p) {
21ab4e
+                        gf_log (THIS->name, GF_LOG_ERROR,
21ab4e
+                                "can't terminate %s - not found",
21ab4e
+                                xlator_req.name);
21ab4e
+                        /*
21ab4e
+                         * Used to be -ENOENT.  However, the caller asked us to
21ab4e
+                         * make sure it's down and if it's already down that's
21ab4e
+                         * good enough.
21ab4e
+                         */
21ab4e
+                        glusterfs_terminate_response_send (req, 0);
21ab4e
+                        goto err;
21ab4e
+                }
21ab4e
 
21ab4e
-        glusterfs_terminate_response_send (req, 0);
21ab4e
-        if ((trav_p == &top->children) && !(*trav_p)->next) {
21ab4e
-                gf_log (THIS->name, GF_LOG_INFO,
21ab4e
-                        "terminating after loss of last child %s",
21ab4e
-                        xlator_req.name);
21ab4e
-                glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
21ab4e
-                cleanup_and_exit (SIGTERM);
21ab4e
-        } else {
21ab4e
-                /*
21ab4e
-                 * This is terribly unsafe without quiescing or shutting things
21ab4e
-                 * down properly (or even locking) but it gets us to the point
21ab4e
-                 * where we can test other stuff.
21ab4e
-                 *
21ab4e
-                 * TBD: finish implementing this "detach" code properly
21ab4e
-                 */
21ab4e
-                gf_log (THIS->name, GF_LOG_INFO, "detaching not-only child %s",
21ab4e
-                        xlator_req.name);
21ab4e
-                top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
21ab4e
-                glusterfs_mgmt_pmap_signout (glusterfsd_ctx, xlator_req.name);
21ab4e
+                glusterfs_terminate_response_send (req, 0);
21ab4e
+                if ((trav_p == &top->children) && !(*trav_p)->next) {
21ab4e
+                        gf_log (THIS->name, GF_LOG_INFO,
21ab4e
+                                "terminating after loss of last child %s",
21ab4e
+                                xlator_req.name);
21ab4e
+                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
21ab4e
+                                                     xlator_req.name);
21ab4e
+                        kill (getpid(), SIGTERM);
21ab4e
+                } else {
21ab4e
+                        /*
21ab4e
+                         * This is terribly unsafe without quiescing or shutting
21ab4e
+                         * things down properly but it gets us to the point
21ab4e
+                         * where we can test other stuff.
21ab4e
+                         *
21ab4e
+                         * TBD: finish implementing this "detach" code properly
21ab4e
+                         */
21ab4e
+                        gf_log (THIS->name, GF_LOG_INFO, "detaching not-only"
21ab4e
+                                " child %s", xlator_req.name);
21ab4e
+                        top->notify (top, GF_EVENT_TRANSPORT_CLEANUP, victim);
21ab4e
+                        glusterfs_mgmt_pmap_signout (glusterfsd_ctx,
21ab4e
+                                                     xlator_req.name);
21ab4e
+
21ab4e
+                        *trav_p = (*trav_p)->next;
21ab4e
+                        glusterfs_autoscale_threads (THIS->ctx, -1);
21ab4e
+                }
21ab4e
 
21ab4e
-                *trav_p = (*trav_p)->next;
21ab4e
-                glusterfs_autoscale_threads (THIS->ctx, -1);
21ab4e
         }
21ab4e
-
21ab4e
 err:
21ab4e
+        UNLOCK (&ctx->volfile_lock);
21ab4e
         free (xlator_req.name);
21ab4e
         xlator_req.name = NULL;
21ab4e
         return 0;
21ab4e
@@ -830,6 +841,7 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
21ab4e
         gd1_mgmt_brick_op_req   xlator_req      = {0,};
21ab4e
         xlator_t                *this           = NULL;
21ab4e
         glusterfs_graph_t       *newgraph       = NULL;
21ab4e
+        glusterfs_ctx_t         *ctx            = NULL;
21ab4e
 
21ab4e
         GF_ASSERT (req);
21ab4e
         this = THIS;
21ab4e
@@ -844,32 +856,38 @@ glusterfs_handle_attach (rpcsvc_request_t *req)
21ab4e
                 return -1;
21ab4e
         }
21ab4e
         ret = 0;
21ab4e
+        ctx = this->ctx;
21ab4e
 
21ab4e
-        if (this->ctx->active) {
21ab4e
-                gf_log (this->name, GF_LOG_INFO,
21ab4e
-                        "got attach for %s", xlator_req.name);
21ab4e
-                ret = glusterfs_graph_attach (this->ctx->active,
21ab4e
-                                              xlator_req.name, &newgraph);
21ab4e
-                if (ret == 0) {
21ab4e
-                        ret = glusterfs_graph_parent_up (newgraph);
21ab4e
-                        if (ret) {
21ab4e
-                                gf_msg (this->name, GF_LOG_ERROR, 0,
21ab4e
-                                        LG_MSG_EVENT_NOTIFY_FAILED,
21ab4e
-                                        "Parent up notification "
21ab4e
-                                        "failed");
21ab4e
-                                goto out;
21ab4e
+        LOCK (&ctx->volfile_lock);
21ab4e
+        {
21ab4e
+                if (this->ctx->active) {
21ab4e
+                        gf_log (this->name, GF_LOG_INFO,
21ab4e
+                                "got attach for %s", xlator_req.name);
21ab4e
+                        ret = glusterfs_graph_attach (this->ctx->active,
21ab4e
+                                                      xlator_req.name,
21ab4e
+                                                      &newgraph);
21ab4e
+                        if (ret == 0) {
21ab4e
+                                ret = glusterfs_graph_parent_up (newgraph);
21ab4e
+                                if (ret) {
21ab4e
+                                        gf_msg (this->name, GF_LOG_ERROR, 0,
21ab4e
+                                                LG_MSG_EVENT_NOTIFY_FAILED,
21ab4e
+                                                "Parent up notification "
21ab4e
+                                                "failed");
21ab4e
+                                        goto out;
21ab4e
+                                }
21ab4e
+                                glusterfs_autoscale_threads (this->ctx, 1);
21ab4e
                         }
21ab4e
-                        glusterfs_autoscale_threads (this->ctx, 1);
21ab4e
+                } else {
21ab4e
+                        gf_log (this->name, GF_LOG_WARNING,
21ab4e
+                                "got attach for %s but no active graph",
21ab4e
+                                xlator_req.name);
21ab4e
                 }
21ab4e
-        } else {
21ab4e
-                gf_log (this->name, GF_LOG_WARNING,
21ab4e
-                        "got attach for %s but no active graph",
21ab4e
-                        xlator_req.name);
21ab4e
-        }
21ab4e
 
21ab4e
-        glusterfs_translator_info_response_send (req, ret, NULL, NULL);
21ab4e
+                glusterfs_translator_info_response_send (req, ret, NULL, NULL);
21ab4e
 
21ab4e
 out:
21ab4e
+                UNLOCK (&ctx->volfile_lock);
21ab4e
+        }
21ab4e
         free (xlator_req.input.input_val);
21ab4e
         free (xlator_req.name);
21ab4e
 
21ab4e
@@ -1983,23 +2001,28 @@ glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
21ab4e
         xlator_list_t   *trav;
21ab4e
         int             ret;
21ab4e
 
21ab4e
-        if (ctx->active) {
21ab4e
-                server_xl = ctx->active->first;
21ab4e
-                if (strcmp (server_xl->type, "protocol/server") != 0) {
21ab4e
-                        server_xl = NULL;
21ab4e
+        LOCK (&ctx->volfile_lock);
21ab4e
+        {
21ab4e
+                if (ctx->active) {
21ab4e
+                        server_xl = ctx->active->first;
21ab4e
+                        if (strcmp (server_xl->type, "protocol/server") != 0) {
21ab4e
+                                server_xl = NULL;
21ab4e
+                        }
21ab4e
+                }
21ab4e
+                if (!server_xl) {
21ab4e
+                        /* Startup (ctx->active not set) or non-server. */
21ab4e
+                        UNLOCK (&ctx->volfile_lock);
21ab4e
+                        return glusterfs_volfile_fetch_one
21ab4e
+                                (ctx, ctx->cmd_args.volfile_id);
21ab4e
                 }
21ab4e
-        }
21ab4e
-        if (!server_xl) {
21ab4e
-                /* Startup (ctx->active not set) or non-server. */
21ab4e
-                return glusterfs_volfile_fetch_one (ctx,
21ab4e
-                                                    ctx->cmd_args.volfile_id);
21ab4e
-        }
21ab4e
 
21ab4e
-        ret = 0;
21ab4e
-        for (trav = server_xl->children; trav; trav = trav->next) {
21ab4e
-                ret |= glusterfs_volfile_fetch_one (ctx,
21ab4e
-                                                    trav->xlator->volfile_id);
21ab4e
+                ret = 0;
21ab4e
+                for (trav = server_xl->children; trav; trav = trav->next) {
21ab4e
+                        ret |= glusterfs_volfile_fetch_one
21ab4e
+                                (ctx, trav->xlator->volfile_id);
21ab4e
+                }
21ab4e
         }
21ab4e
+        UNLOCK (&ctx->volfile_lock);
21ab4e
         return ret;
21ab4e
 }
21ab4e
 
21ab4e
diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c
21ab4e
index 64267f2..f00804a 100644
21ab4e
--- a/xlators/protocol/server/src/server-handshake.c
21ab4e
+++ b/xlators/protocol/server/src/server-handshake.c
21ab4e
@@ -412,7 +412,7 @@ server_setvolume (rpcsvc_request_t *req)
21ab4e
         rpc_transport_t     *xprt          = NULL;
21ab4e
         int32_t              fop_version   = 0;
21ab4e
         int32_t              mgmt_version  = 0;
21ab4e
-
21ab4e
+        glusterfs_ctx_t     *ctx           = NULL;
21ab4e
 
21ab4e
         params = dict_new ();
21ab4e
         reply  = dict_new ();
21ab4e
@@ -423,6 +423,7 @@ server_setvolume (rpcsvc_request_t *req)
21ab4e
                 req->rpc_err = GARBAGE_ARGS;
21ab4e
                 goto fail;
21ab4e
         }
21ab4e
+        ctx = THIS->ctx;
21ab4e
 
21ab4e
         this = req->svc->xl;
21ab4e
         /* this is to ensure config_params is populated with the first brick
21ab4e
@@ -468,7 +469,11 @@ server_setvolume (rpcsvc_request_t *req)
21ab4e
                 goto fail;
21ab4e
         }
21ab4e
 
21ab4e
-        xl = get_xlator_by_name (this, name);
21ab4e
+        LOCK (&ctx->volfile_lock);
21ab4e
+        {
21ab4e
+                xl = get_xlator_by_name (this, name);
21ab4e
+        }
21ab4e
+        UNLOCK (&ctx->volfile_lock);
21ab4e
         if (xl == NULL) {
21ab4e
                 ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found",
21ab4e
                                    name);
21ab4e
-- 
21ab4e
1.8.3.1
21ab4e