|
|
21ab4e |
From 0fa8f17fc07c9b4e3150484be0de7b8395e00c5f Mon Sep 17 00:00:00 2001
|
|
|
21ab4e |
From: Jeff Darcy <jdarcy@redhat.com>
|
|
|
21ab4e |
Date: Wed, 1 Feb 2017 22:00:32 -0500
|
|
|
21ab4e |
Subject: [PATCH 323/361] socket: retry connect immediately if it fails
|
|
|
21ab4e |
|
|
|
21ab4e |
Previously we relied on a complex dance of setting flags, shutting
|
|
|
21ab4e |
down the socket, tearing stuff down, getting an event, tearing more
|
|
|
21ab4e |
stuff down, and waiting for a higher-level retry. What we really
|
|
|
21ab4e |
need, in the case where we're just trying to connect prematurely e.g.
|
|
|
21ab4e |
to a brick that hasn't fully come up yet, is a simple retry of the
|
|
|
21ab4e |
connect(2) call.
|
|
|
21ab4e |
|
|
|
21ab4e |
This was discovered by observing failures in ec-new-entry.t with
|
|
|
21ab4e |
multiplexing enabled, but probably fixes other random failures as
|
|
|
21ab4e |
well.
|
|
|
21ab4e |
|
|
|
21ab4e |
mainline:
|
|
|
21ab4e |
> BUG: 1385758
|
|
|
21ab4e |
> Reviewed-on: https://review.gluster.org/16510
|
|
|
21ab4e |
> Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
21ab4e |
> Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
|
|
|
21ab4e |
(cherry picked from commit f1615e8286ab131f362db217b7ce398e24ca1e6e)
|
|
|
21ab4e |
|
|
|
21ab4e |
BUG: 1417815
|
|
|
21ab4e |
Change-Id: Ibedb8942060bccc96b02272a333c3002c9b77d4c
|
|
|
21ab4e |
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
|
|
|
21ab4e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/101304
|
|
|
21ab4e |
Tested-by: Milind Changire <mchangir@redhat.com>
|
|
|
21ab4e |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
21ab4e |
---
|
|
|
21ab4e |
rpc/rpc-transport/socket/src/socket.c | 38 +++++++++++++++++++++++++++++++++--
|
|
|
21ab4e |
1 file changed, 36 insertions(+), 2 deletions(-)
|
|
|
21ab4e |
|
|
|
21ab4e |
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
|
|
|
21ab4e |
index 9062df2..a3b12dd 100644
|
|
|
21ab4e |
--- a/rpc/rpc-transport/socket/src/socket.c
|
|
|
21ab4e |
+++ b/rpc/rpc-transport/socket/src/socket.c
|
|
|
21ab4e |
@@ -2948,6 +2948,33 @@ socket_fix_ssl_opts (rpc_transport_t *this, socket_private_t *priv,
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
+/*
|
|
|
21ab4e |
+ * If we might just be trying to connect prematurely, e.g. to a brick that's
|
|
|
21ab4e |
+ * slow coming up, all we need is a simple retry. Don't worry about sleeping
|
|
|
21ab4e |
+ * in some arbitrary thread. The connect(2) could already have the exact same
|
|
|
21ab4e |
+ * effect, and we deal with it in that case so we can deal with it for sleep(2)
|
|
|
21ab4e |
+ * as well.
|
|
|
21ab4e |
+ */
|
|
|
21ab4e |
+static int
|
|
|
21ab4e |
+connect_loop (int sockfd, const struct sockaddr *addr, socklen_t addrlen)
|
|
|
21ab4e |
+{
|
|
|
21ab4e |
+ int ret;
|
|
|
21ab4e |
+ int connect_fails = 0;
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ for (;;) {
|
|
|
21ab4e |
+ ret = connect (sockfd, addr, addrlen);
|
|
|
21ab4e |
+ if (ret >= 0) {
|
|
|
21ab4e |
+ break;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ if ((errno != ENOENT) || (++connect_fails >= 5)) {
|
|
|
21ab4e |
+ break;
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+ sleep (1);
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
+
|
|
|
21ab4e |
+ return ret;
|
|
|
21ab4e |
+}
|
|
|
21ab4e |
+
|
|
|
21ab4e |
static int
|
|
|
21ab4e |
socket_connect (rpc_transport_t *this, int port)
|
|
|
21ab4e |
{
|
|
|
21ab4e |
@@ -3109,8 +3136,15 @@ socket_connect (rpc_transport_t *this, int port)
|
|
|
21ab4e |
}
|
|
|
21ab4e |
}
|
|
|
21ab4e |
|
|
|
21ab4e |
- ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
|
|
|
21ab4e |
- this->peerinfo.sockaddr_len);
|
|
|
21ab4e |
+ if (ign_enoent) {
|
|
|
21ab4e |
+ ret = connect_loop (priv->sock,
|
|
|
21ab4e |
+ SA (&this->peerinfo.sockaddr),
|
|
|
21ab4e |
+ this->peerinfo.sockaddr_len);
|
|
|
21ab4e |
+ } else {
|
|
|
21ab4e |
+ ret = connect (priv->sock,
|
|
|
21ab4e |
+ SA (&this->peerinfo.sockaddr),
|
|
|
21ab4e |
+ this->peerinfo.sockaddr_len);
|
|
|
21ab4e |
+ }
|
|
|
21ab4e |
|
|
|
21ab4e |
if (ret == -1 && errno == ENOENT && ign_enoent) {
|
|
|
21ab4e |
gf_log (this->name, GF_LOG_WARNING,
|
|
|
21ab4e |
--
|
|
|
21ab4e |
1.8.3.1
|
|
|
21ab4e |
|