Blob Blame History Raw
From 893f1428fb7153ea504ba7e85b729be05cb185b8 Mon Sep 17 00:00:00 2001
From: Milind Changire <mchangir@redhat.com>
Date: Tue, 9 May 2017 17:51:18 +0530
Subject: [PATCH 424/426] rpc: fix transport add/remove race on port probing

Problem:
Spurious __gf_free() assertion failures seen all over the place with
header->magic being overwritten when running port probing tests with
'nmap'

Solution:
Fix sequence of:
1. add accept()ed socket connection fd to epoll set
2. add newly created rpc_transport_t object in RPCSVC service list

Correct sequence is #2 followed by #1.

Reason:
Adding new fd returned by accept() to epoll set causes an epoll_wait()
to return immediately with a POLLIN event. This races ahead to a readv()
which returms with errno:104 (Connection reset by peer) during port
probing using 'nmap'. The error is then handled by POLLERR code to
remove the new transport object from RPCSVC service list and later
unref and destroy the rpc transport object.
socket_server_event_handler() then catches up with registering the
unref'd/destroyed rpc transport object. This is later manifest as
assertion failures in __gf_free() with the header->magic field botched
due to invalid address references.
All this does not result in a Segmentation Fault since the address
space continues to be mapped into the process and pages still being
referenced elsewhere.

As a further note:
This race happens only in accept() codepath. Only in this codepath,
the notify will be referring to two transports:
1, listener transport and
2. newly accepted transport
All other notify refer to only one transport i.e., the transport/socket
on which the event is received. Since epoll is ONE_SHOT another event
won't arrive on the same socket till the current event is processed.
However, in the accept() codepath, the current event - ACCEPT - and the
new event - POLLIN/POLLER - arrive on two different sockets:
1. ACCEPT on listener socket and
2. POLLIN/POLLERR on newly registered socket.
Also, note that these two events are handled different thread contexts.

Cleanup:
Critical section in socket_server_event_handler() has been removed.
Instead, an additional ref on new_trans has been used to avoid ref/unref
race when notifying RPCSVC.

mainline:
> BUG: 1438966
> Signed-off-by: Milind Changire <mchangir@redhat.com>
> Reviewed-on: https://review.gluster.org/17139
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Amar Tumballi <amarts@redhat.com>
> Reviewed-by: Oleksandr Natalenko <oleksandr@natalenko.name>
> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
(cherry picked from commit 4f7ef3020edcc75cdeb22d8da8a1484f9db77ac9)

Change-Id: I4417924bc9e6277d24bd1a1c5bcb7445bcb226a3
BUG: 1442535
Signed-off-by: Milind Changire <mchangir@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/105653
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 rpc/rpc-transport/socket/src/socket.c | 359 ++++++++++++++++++----------------
 1 file changed, 195 insertions(+), 164 deletions(-)

diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index ca939bb..f64e2f0 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -2671,107 +2671,106 @@ socket_server_event_handler (int fd, int idx, void *data,
         priv = this->private;
         ctx  = this->ctx;
 
-        pthread_mutex_lock (&priv->lock);
-        {
-                priv->idx = idx;
-
-                if (poll_in) {
-                        new_sock = accept (priv->sock, SA (&new_sockaddr),
-                                           &addrlen);
-
-                        if (new_sock == -1) {
-                                gf_log (this->name, GF_LOG_WARNING,
-                                        "accept on %d failed (%s)",
-                                        priv->sock, strerror (errno));
-                                goto unlock;
-                        }
-
-                        if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) {
-                                ret = __socket_nodelay (new_sock);
-                                if (ret == -1) {
-                                        gf_log (this->name, GF_LOG_WARNING,
-                                                "setsockopt() failed for "
-                                                "NODELAY (%s)",
-                                                strerror (errno));
-                                }
-                        }
+        /* NOTE:
+         * We have done away with the critical section in this function. since
+         * there's little that it helps with. There's no other code that
+         * attempts to unref the listener socket/transport from any other
+         * thread context while we are using it here.
+         */
+        priv->idx = idx;
 
-                        if (priv->keepalive &&
-                            new_sockaddr.ss_family != AF_UNIX) {
-                                ret = __socket_keepalive (new_sock,
-                                                          new_sockaddr.ss_family,
-                                                          priv->keepaliveintvl,
-                                                          priv->keepaliveidle,
-                                                          priv->timeout);
-                                if (ret == -1)
-                                        gf_log (this->name, GF_LOG_WARNING,
-                                                "Failed to set keep-alive: %s",
-                                                strerror (errno));
-                        }
+        if (poll_in) {
+                new_sock = accept (priv->sock, SA (&new_sockaddr), &addrlen);
 
-                        new_trans = GF_CALLOC (1, sizeof (*new_trans),
-                                               gf_common_mt_rpc_trans_t);
-                        if (!new_trans) {
-                                sys_close (new_sock);
-                                goto unlock;
-                        }
+                if (new_sock == -1) {
+                        gf_log (this->name, GF_LOG_WARNING,
+                                "accept on %d failed (%s)",
+                                priv->sock, strerror (errno));
+                        goto out;
+                }
 
-                        ret = pthread_mutex_init(&new_trans->lock, NULL);
+                if (priv->nodelay && (new_sockaddr.ss_family != AF_UNIX)) {
+                        ret = __socket_nodelay (new_sock);
                         if (ret == -1) {
                                 gf_log (this->name, GF_LOG_WARNING,
-                                        "pthread_mutex_init() failed: %s",
+                                        "setsockopt() failed for "
+                                        "NODELAY (%s)",
                                         strerror (errno));
-                                sys_close (new_sock);
-                                GF_FREE (new_trans);
-                                goto unlock;
                         }
-                        INIT_LIST_HEAD (&new_trans->list);
+                }
 
-                        new_trans->name = gf_strdup (this->name);
+                if (priv->keepalive &&
+                                new_sockaddr.ss_family != AF_UNIX) {
+                        ret = __socket_keepalive (new_sock,
+                                                  new_sockaddr.ss_family,
+                                                  priv->keepaliveintvl,
+                                                  priv->keepaliveidle,
+                                                  priv->timeout);
+                        if (ret == -1)
+                                gf_log (this->name, GF_LOG_WARNING,
+                                        "Failed to set keep-alive: %s",
+                                        strerror (errno));
+                }
 
-                        memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
-                                addrlen);
-                        new_trans->peerinfo.sockaddr_len = addrlen;
+                new_trans = GF_CALLOC (1, sizeof (*new_trans),
+                                gf_common_mt_rpc_trans_t);
+                if (!new_trans) {
+                        sys_close (new_sock);
+                        goto out;
+                }
 
-                        new_trans->myinfo.sockaddr_len =
-                                sizeof (new_trans->myinfo.sockaddr);
+                ret = pthread_mutex_init(&new_trans->lock, NULL);
+                if (ret == -1) {
+                        gf_log (this->name, GF_LOG_WARNING,
+                                "pthread_mutex_init() failed: %s",
+                                strerror (errno));
+                        sys_close (new_sock);
+                        GF_FREE (new_trans);
+                        goto out;
+                }
+                INIT_LIST_HEAD (&new_trans->list);
 
-                        ret = getsockname (new_sock,
-                                           SA (&new_trans->myinfo.sockaddr),
-                                           &new_trans->myinfo.sockaddr_len);
-                        if (ret == -1) {
-                                gf_log (this->name, GF_LOG_WARNING,
-                                        "getsockname on %d failed (%s)",
-                                        new_sock, strerror (errno));
-                                sys_close (new_sock);
-                                GF_FREE (new_trans->name);
-                                GF_FREE (new_trans);
-                                goto unlock;
-                        }
+                new_trans->name = gf_strdup (this->name);
 
-                        get_transport_identifiers (new_trans);
-                        ret = socket_init(new_trans);
-                        if (ret != 0) {
-                                sys_close (new_sock);
-                                GF_FREE (new_trans->name);
-                                GF_FREE (new_trans);
-                                goto unlock;
-                        }
-                        new_trans->ops = this->ops;
-                        new_trans->init = this->init;
-                        new_trans->fini = this->fini;
-                        new_trans->ctx  = ctx;
-                        new_trans->xl   = this->xl;
-                        new_trans->mydata = this->mydata;
-                        new_trans->notify = this->notify;
-                        new_trans->listener = this;
-                        new_priv = new_trans->private;
-
-                        if (new_sockaddr.ss_family == AF_UNIX) {
-                                new_priv->use_ssl = _gf_false;
-                        }
-                        else {
-                                switch (priv->srvr_ssl) {
+                memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr, addrlen);
+                new_trans->peerinfo.sockaddr_len = addrlen;
+
+                new_trans->myinfo.sockaddr_len = sizeof (new_trans->myinfo.sockaddr);
+
+                ret = getsockname (new_sock, SA (&new_trans->myinfo.sockaddr),
+                                   &new_trans->myinfo.sockaddr_len);
+                if (ret == -1) {
+                        gf_log (this->name, GF_LOG_WARNING,
+                                "getsockname on %d failed (%s)",
+                                new_sock, strerror (errno));
+                        sys_close (new_sock);
+                        GF_FREE (new_trans->name);
+                        GF_FREE (new_trans);
+                        goto out;
+                }
+
+                get_transport_identifiers (new_trans);
+                ret = socket_init(new_trans);
+                if (ret != 0) {
+                        sys_close (new_sock);
+                        GF_FREE (new_trans->name);
+                        GF_FREE (new_trans);
+                        goto out;
+                }
+                new_trans->ops = this->ops;
+                new_trans->init = this->init;
+                new_trans->fini = this->fini;
+                new_trans->ctx  = ctx;
+                new_trans->xl   = this->xl;
+                new_trans->mydata = this->mydata;
+                new_trans->notify = this->notify;
+                new_trans->listener = this;
+                new_priv = new_trans->private;
+
+                if (new_sockaddr.ss_family == AF_UNIX) {
+                        new_priv->use_ssl = _gf_false;
+                } else {
+                        switch (priv->srvr_ssl) {
                                 case MGMT_SSL_ALWAYS:
                                         /* Glusterd with secure_mgmt. */
                                         new_priv->use_ssl = _gf_true;
@@ -2782,95 +2781,127 @@ socket_server_event_handler (int fd, int idx, void *data,
                                         break;
                                 default:
                                         new_priv->use_ssl = _gf_false;
-                                }
                         }
+                }
 
-			new_priv->sock = new_sock;
-			new_priv->own_thread = priv->own_thread;
-
-                        new_priv->ssl_ctx = priv->ssl_ctx;
-			if (new_priv->use_ssl && !new_priv->own_thread) {
-				cname = ssl_setup_connection(new_trans,1);
-                                if (!cname) {
-					gf_log(this->name,GF_LOG_ERROR,
-					       "server setup failed");
-					sys_close (new_sock);
-                                        GF_FREE (new_trans->name);
-                                        GF_FREE (new_trans);
-					goto unlock;
-				}
-                                this->ssl_name = cname;
-			}
+                new_priv->sock = new_sock;
+                new_priv->own_thread = priv->own_thread;
 
-                        if (!priv->bio && !priv->own_thread) {
-                                ret = __socket_nonblock (new_sock);
+                new_priv->ssl_ctx = priv->ssl_ctx;
+                if (new_priv->use_ssl && !new_priv->own_thread) {
+                        cname = ssl_setup_connection(new_trans, 1);
+                        if (!cname) {
+                                gf_log(this->name, GF_LOG_ERROR,
+                                       "server setup failed");
+                                sys_close (new_sock);
+                                GF_FREE (new_trans->name);
+                                GF_FREE (new_trans);
+                                goto out;
+                        }
+                        this->ssl_name = cname;
+                }
 
-                                if (ret == -1) {
-                                        gf_log (this->name, GF_LOG_WARNING,
-                                                "NBIO on %d failed (%s)",
-                                                new_sock, strerror (errno));
+                if (!priv->bio && !priv->own_thread) {
+                        ret = __socket_nonblock (new_sock);
 
-                                        sys_close (new_sock);
-                                        GF_FREE (new_trans->name);
-                                        GF_FREE (new_trans);
-                                        goto unlock;
-                                }
+                        if (ret == -1) {
+                                gf_log (this->name, GF_LOG_WARNING,
+                                        "NBIO on %d failed (%s)",
+                                        new_sock, strerror (errno));
+
+                                sys_close (new_sock);
+                                GF_FREE (new_trans->name);
+                                GF_FREE (new_trans);
+                                goto out;
                         }
+                }
 
-                        pthread_mutex_lock (&new_priv->lock);
-                        {
-                                /*
-                                 * In the own_thread case, this is used to
-                                 * indicate that we're initializing a server
-                                 * connection.
-                                 */
-                                new_priv->connected = 1;
-                                new_priv->is_server = _gf_true;
-                                rpc_transport_ref (new_trans);
-
-                                if (new_priv->own_thread) {
-                                        if (pipe(new_priv->pipe) < 0) {
-                                                gf_log(this->name, GF_LOG_ERROR,
-                                                       "could not create pipe");
-                                        }
-                                        ret = socket_spawn(new_trans);
-                                        if (ret) {
-                                                gf_log(this->name, GF_LOG_ERROR,
-                                                       "could not spawn thread");
-                                                sys_close (new_priv->pipe[0]);
-                                                sys_close (new_priv->pipe[1]);
-                                        }
-                                }  else {
-                                        new_priv->idx =
-                                                event_register (ctx->event_pool,
-                                                                new_sock,
-                                                           socket_event_handler,
-                                                                new_trans,
-                                                                1, 0);
-                                        if (new_priv->idx == -1) {
-                                                ret = -1;
-                                                gf_log(this->name, GF_LOG_ERROR,
-                                                       "failed to register the socket with event");
-                                        }
-                                }
+                /*
+                 * In the own_thread case, this is used to
+                 * indicate that we're initializing a server
+                 * connection.
+                 */
+                new_priv->connected = 1;
+                new_priv->is_server = _gf_true;
+                rpc_transport_ref (new_trans);
 
+                if (new_priv->own_thread) {
+                        if (pipe(new_priv->pipe) < 0) {
+                                gf_log(this->name, GF_LOG_ERROR,
+                                       "could not create pipe");
                         }
-                        pthread_mutex_unlock (&new_priv->lock);
-                        if (ret == -1) {
-                                sys_close (new_sock);
-                                rpc_transport_unref (new_trans);
-                                goto unlock;
+                        ret = socket_spawn(new_trans);
+                        if (ret) {
+                                gf_log(this->name, GF_LOG_ERROR,
+                                       "could not spawn thread");
+                                sys_close (new_priv->pipe[0]);
+                                sys_close (new_priv->pipe[1]);
                         }
+                }  else {
+                        /* Take a ref on the new_trans to avoid
+                         * getting deleted when event_register()
+                         * causes socket_event_handler() to race
+                         * ahead of this path to eventually find
+                         * a disconnect and unref the transport
+                         */
+                        rpc_transport_ref (new_trans);
 
-                        if (!priv->own_thread) {
-                                ret = rpc_transport_notify (this,
-                                        RPC_TRANSPORT_ACCEPT, new_trans);
+                        /* Send a notification to RPCSVC layer
+                         * to save the new_trans in its service
+                         * list before we register the new_sock
+                         * with epoll to begin receiving notifications
+                         * for data handling.
+                         */
+                        ret = rpc_transport_notify (this, RPC_TRANSPORT_ACCEPT, new_trans);
+
+                        if (ret != -1) {
+                                new_priv->idx =
+                                        event_register (ctx->event_pool,
+                                                        new_sock,
+                                                        socket_event_handler,
+                                                        new_trans,
+                                                        1, 0);
+                                if (new_priv->idx == -1) {
+                                        ret = -1;
+                                        gf_log(this->name, GF_LOG_ERROR,
+                                               "failed to register the socket "
+                                               "with event");
+
+                                        /* event_register() could have failed for some
+                                         * reason, implying that the new_sock cannot be
+                                         * added to the epoll set. If we wont get any
+                                         * more notifications for new_sock from epoll,
+                                         * then we better remove the corresponding
+                                         * new_trans object from the RPCSVC service list.
+                                         * Since we've notified RPC service of new_trans
+                                         * before we attempted event_register(), we better
+                                         * unlink the new_trans from the RPCSVC service list
+                                         * to cleanup the stateby sending out a DISCONNECT
+                                         * notification.
+                                         */
+                                        rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, new_trans);
+                                }
                         }
+
+                        /* this rpc_transport_unref() is for managing race between
+                         * 1. socket_server_event_handler and
+                         * 2. socket_event_handler
+                         * trying to add and remove new_trans from the rpcsvc
+                         * service list
+                         * now that we are done with the notifications, lets
+                         * reduce the reference
+                         */
+                        rpc_transport_unref (new_trans);
                 }
-        }
-unlock:
-        pthread_mutex_unlock (&priv->lock);
 
+                if (ret == -1) {
+                        sys_close (new_sock);
+                        /* this unref is to actually cause the destruction of
+                         * the new_trans since we've failed at everything so far
+                         */
+                        rpc_transport_unref (new_trans);
+                }
+        }
 out:
         if (cname && (cname != this->ssl_name)) {
                 GF_FREE(cname);
-- 
1.8.3.1