|
|
3604df |
From 758e5e8448d2694a49f31dee99b793b2f21b66c4 Mon Sep 17 00:00:00 2001
|
|
|
3604df |
From: Atin Mukherjee <amukherj@redhat.com>
|
|
|
3604df |
Date: Thu, 8 Sep 2016 11:33:59 +0530
|
|
|
3604df |
Subject: [PATCH 87/94] socket: pollerr event shouldn't trigger socket_connnect_finish
|
|
|
3604df |
|
|
|
3604df |
If connect fails with any other error than EINPROGRESS we cannot get
|
|
|
3604df |
the error status using getsockopt (... SO_ERROR ... ). Hence we need
|
|
|
3604df |
to remember the state of connect and take appropriate action in the
|
|
|
3604df |
event_handler for the same.
|
|
|
3604df |
|
|
|
3604df |
As an added note, a event can come where poll_err is HUP and we have
|
|
|
3604df |
poll_in as well (i.e some status was written to the socket), so for
|
|
|
3604df |
such cases we need to finish the connect, process the data and then
|
|
|
3604df |
the poll_err as is the case in the current code.
|
|
|
3604df |
|
|
|
3604df |
Special thanks to Kaushal M & Raghavendra G for figuring out the issue.
|
|
|
3604df |
|
|
|
3604df |
>Signed-off-by: Shyam <srangana@redhat.com>
|
|
|
3604df |
>Reviewed-on: http://review.gluster.org/15440
|
|
|
3604df |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
3604df |
|
|
|
3604df |
>Reviewed-on: http://review.gluster.org/15533
|
|
|
3604df |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
3604df |
|
|
|
3604df |
Change-Id: Ic45ad59ff8ab1d0a9d2cab2c924ad940b9d38528
|
|
|
3604df |
BUG: 1377387
|
|
|
3604df |
Signed-off-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
3604df |
Reviewed-on: https://code.engineering.redhat.com/gerrit/85608
|
|
|
3604df |
Reviewed-by: Milind Changire <mchangir@redhat.com>
|
|
|
3604df |
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
|
|
|
3604df |
---
|
|
|
3604df |
rpc/rpc-transport/socket/src/socket.c | 42 ++++++++++++++++++++++++++++++++-
|
|
|
3604df |
rpc/rpc-transport/socket/src/socket.h | 3 ++
|
|
|
3604df |
tests/bugs/changelog/bug-1211327.t | 8 ++----
|
|
|
3604df |
tests/bugs/ec/bug-1236065.t | 4 +-
|
|
|
3604df |
4 files changed, 49 insertions(+), 8 deletions(-)
|
|
|
3604df |
|
|
|
3604df |
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
|
|
|
3604df |
index deec0cb..d9383c3 100644
|
|
|
3604df |
--- a/rpc/rpc-transport/socket/src/socket.c
|
|
|
3604df |
+++ b/rpc/rpc-transport/socket/src/socket.c
|
|
|
3604df |
@@ -2346,6 +2346,7 @@ out:
|
|
|
3604df |
return ret;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
+static int socket_disconnect (rpc_transport_t *this);
|
|
|
3604df |
|
|
|
3604df |
/* reads rpc_requests during pollin */
|
|
|
3604df |
static int
|
|
|
3604df |
@@ -2370,7 +2371,23 @@ socket_event_handler (int fd, int idx, void *data,
|
|
|
3604df |
}
|
|
|
3604df |
pthread_mutex_unlock (&priv->lock);
|
|
|
3604df |
|
|
|
3604df |
- ret = (priv->connected == 1) ? 0 : socket_connect_finish(this);
|
|
|
3604df |
+ if (priv->connected != 1) {
|
|
|
3604df |
+ if (priv->connect_failed) {
|
|
|
3604df |
+ /* connect failed with some other error than
|
|
|
3604df |
+ EINPROGRESS or ENOENT, so nothing more to do, fail
|
|
|
3604df |
+ reading/writing anything even if poll_in or poll_out
|
|
|
3604df |
+ is set */
|
|
|
3604df |
+ ret = socket_disconnect (this);
|
|
|
3604df |
+
|
|
|
3604df |
+ /* Force ret to be -1, as we are officially done with
|
|
|
3604df |
+ this socket */
|
|
|
3604df |
+ ret = -1;
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ ret = socket_connect_finish (this);
|
|
|
3604df |
+ }
|
|
|
3604df |
+ } else {
|
|
|
3604df |
+ ret = 0;
|
|
|
3604df |
+ }
|
|
|
3604df |
|
|
|
3604df |
if (!ret && poll_out) {
|
|
|
3604df |
ret = socket_event_poll_out (this);
|
|
|
3604df |
@@ -3046,6 +3063,16 @@ socket_connect (rpc_transport_t *this, int port)
|
|
|
3604df |
gf_log (this->name, GF_LOG_WARNING,
|
|
|
3604df |
"Ignore failed connection attempt on %s, (%s) ",
|
|
|
3604df |
this->peerinfo.identifier, strerror (errno));
|
|
|
3604df |
+
|
|
|
3604df |
+ /* connect failed with some other error than EINPROGRESS
|
|
|
3604df |
+ so, getsockopt (... SO_ERROR ...), will not catch any
|
|
|
3604df |
+ errors and return them to us, we need to remember this
|
|
|
3604df |
+ state, and take actions in socket_event_handler
|
|
|
3604df |
+ appropriately */
|
|
|
3604df |
+ /* TBD: What about ENOENT, we will do getsockopt there
|
|
|
3604df |
+ as well, so how is that exempt from such a problem? */
|
|
|
3604df |
+ priv->connect_failed = 1;
|
|
|
3604df |
+
|
|
|
3604df |
goto handler;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -3058,9 +3085,22 @@ socket_connect (rpc_transport_t *this, int port)
|
|
|
3604df |
GF_LOG_DEBUG : GF_LOG_ERROR),
|
|
|
3604df |
"connection attempt on %s failed, (%s)",
|
|
|
3604df |
this->peerinfo.identifier, strerror (errno));
|
|
|
3604df |
+
|
|
|
3604df |
+ /* connect failed with some other error than EINPROGRESS
|
|
|
3604df |
+ so, getsockopt (... SO_ERROR ...), will not catch any
|
|
|
3604df |
+ errors and return them to us, we need to remember this
|
|
|
3604df |
+ state, and take actions in socket_event_handler
|
|
|
3604df |
+ appropriately */
|
|
|
3604df |
+ /* TBD: What about ENOENT, we will do getsockopt there
|
|
|
3604df |
+ as well, so how is that exempt from such a problem? */
|
|
|
3604df |
+ priv->connect_failed = 1;
|
|
|
3604df |
+
|
|
|
3604df |
goto handler;
|
|
|
3604df |
}
|
|
|
3604df |
else {
|
|
|
3604df |
+ /* reset connect_failed so that any previous attempts
|
|
|
3604df |
+ state is not carried forward */
|
|
|
3604df |
+ priv->connect_failed = 0;
|
|
|
3604df |
ret = 0;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h
|
|
|
3604df |
index 8395fd2..7c7005b 100644
|
|
|
3604df |
--- a/rpc/rpc-transport/socket/src/socket.h
|
|
|
3604df |
+++ b/rpc/rpc-transport/socket/src/socket.h
|
|
|
3604df |
@@ -200,6 +200,9 @@ typedef struct {
|
|
|
3604df |
int32_t idx;
|
|
|
3604df |
/* -1 = not connected. 0 = in progress. 1 = connected */
|
|
|
3604df |
char connected;
|
|
|
3604df |
+ /* 1 = connect failed for reasons other than EINPROGRESS/ENOENT
|
|
|
3604df |
+ see socket_connect for details */
|
|
|
3604df |
+ char connect_failed;
|
|
|
3604df |
char bio;
|
|
|
3604df |
char connect_finish_log;
|
|
|
3604df |
char submit_log;
|
|
|
3604df |
diff --git a/tests/bugs/changelog/bug-1211327.t b/tests/bugs/changelog/bug-1211327.t
|
|
|
3604df |
index 19d6e76..a849ec3 100644
|
|
|
3604df |
--- a/tests/bugs/changelog/bug-1211327.t
|
|
|
3604df |
+++ b/tests/bugs/changelog/bug-1211327.t
|
|
|
3604df |
@@ -27,15 +27,13 @@ TEST $CLI volume set $V0 changelog.changelog on;
|
|
|
3604df |
sleep 1
|
|
|
3604df |
|
|
|
3604df |
TEST killall_gluster;
|
|
|
3604df |
-sleep 1
|
|
|
3604df |
-EXPECT 0 online_brick_count;
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" online_brick_count
|
|
|
3604df |
|
|
|
3604df |
TEST glusterd;
|
|
|
3604df |
TEST pidof glusterd;
|
|
|
3604df |
-##Let the brick processes starts
|
|
|
3604df |
-sleep 1;
|
|
|
3604df |
|
|
|
3604df |
-EXPECT 1 online_brick_count;
|
|
|
3604df |
+##Let the brick processes starts
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
|
|
|
3604df |
|
|
|
3604df |
##On brick restart only one HTIME should be found.
|
|
|
3604df |
EXPECT 1 count_htime_files;
|
|
|
3604df |
diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t
|
|
|
3604df |
index 9038cb9..e425f3e 100644
|
|
|
3604df |
--- a/tests/bugs/ec/bug-1236065.t
|
|
|
3604df |
+++ b/tests/bugs/ec/bug-1236065.t
|
|
|
3604df |
@@ -48,7 +48,7 @@ TEST ec_test_make
|
|
|
3604df |
|
|
|
3604df |
## step 4
|
|
|
3604df |
TEST $CLI volume start $V0 force
|
|
|
3604df |
-EXPECT '7' online_brick_count
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" online_brick_count
|
|
|
3604df |
|
|
|
3604df |
# active heal
|
|
|
3604df |
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
|
|
|
3604df |
@@ -70,7 +70,7 @@ EXPECT '5' online_brick_count
|
|
|
3604df |
|
|
|
3604df |
## step 6
|
|
|
3604df |
TEST $CLI volume start $V0 force
|
|
|
3604df |
-EXPECT '7' online_brick_count
|
|
|
3604df |
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" online_brick_count
|
|
|
3604df |
|
|
|
3604df |
# self-healing
|
|
|
3604df |
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
|
|
|
3604df |
--
|
|
|
3604df |
1.7.1
|
|
|
3604df |
|