From f96f711ddacfe8c758de3a994e339339325a1539 Mon Sep 17 00:00:00 2001
From: Rajesh Joseph <rjoseph@redhat.com>
Date: Sat, 3 Dec 2016 01:10:51 +0530
Subject: [PATCH 227/227] rpc: fix for race between rpc and protocol/client
It is possible that the notification thread which notifies
protocol/client layer about the disconnection is put to sleep
and meanwhile, a fuse thread or a timer thread initiates and
completes reconnection to the brick. The notification thread
is then woken up and protocol/client layer updates its flags
to indicate that network is disconnected. No reconnection is
initiated because reconnection is rpc-lib layer's responsibility
and its flags indicate that connection is connected.
Fix: Serialize connect and disconnect notify
Backport-of: http://review.gluster.org/15916
Credit: Raghavendra Talur <rtalur@redhat.com>
Change-Id: I8ff5d1a3283b47f5c26848a42016a40bc34ffc1d
BUG: 1385605
Signed-off-by: Rajesh Joseph <rjoseph@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/92095
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
rpc/rpc-lib/src/rpc-clnt.c | 98 +++++++++++++++++++++++++++-------------------
rpc/rpc-lib/src/rpc-clnt.h | 1 +
2 files changed, 59 insertions(+), 40 deletions(-)
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index a9e43eb..fe099f9 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -872,6 +872,41 @@ rpc_clnt_destroy (struct rpc_clnt *rpc);
#define RPC_THIS_RESTORE (THIS = old_THIS)
+static int
+rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
+{
+ struct timespec ts = {0, };
+ gf_boolean_t unref_clnt = _gf_false;
+
+ rpc_clnt_connection_cleanup (conn);
+
+ pthread_mutex_lock (&conn->lock);
+ {
+ if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {
+ ts.tv_sec = 10;
+ ts.tv_nsec = 0;
+
+ rpc_clnt_ref (clnt);
+ conn->reconnect = gf_timer_call_after (clnt->ctx, ts,
+ rpc_clnt_reconnect, conn);
+ if (conn->reconnect == NULL) {
+ gf_log (conn->name, GF_LOG_WARNING,
+ "Cannot create rpc_clnt_reconnect timer");
+ unref_clnt = _gf_true;
+ }
+ }
+ }
+ pthread_mutex_unlock (&conn->lock);
+
+ if (clnt->notifyfn)
+ clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
+
+ if (unref_clnt)
+ rpc_clnt_ref (clnt);
+
+ return 0;
+}
+
int
rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
rpc_transport_event_t event, void *data, ...)
@@ -881,9 +916,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
int ret = -1;
rpc_request_info_t *req_info = NULL;
rpc_transport_pollin_t *pollin = NULL;
- struct timespec ts = {0, };
void *clnt_mydata = NULL;
- gf_boolean_t unref_clnt = _gf_false;
DECLARE_OLD_THIS;
conn = mydata;
@@ -899,35 +932,11 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
switch (event) {
case RPC_TRANSPORT_DISCONNECT:
{
- rpc_clnt_connection_cleanup (conn);
-
- pthread_mutex_lock (&conn->lock);
+ pthread_mutex_lock (&clnt->notifylock);
{
- if (!conn->rpc_clnt->disabled
- && (conn->reconnect == NULL)) {
- ts.tv_sec = 10;
- ts.tv_nsec = 0;
-
- rpc_clnt_ref (clnt);
- conn->reconnect =
- gf_timer_call_after (clnt->ctx, ts,
- rpc_clnt_reconnect,
- conn);
- if (conn->reconnect == NULL) {
- gf_log (conn->name, GF_LOG_WARNING,
- "Cannot create rpc_clnt_reconnect timer");
- unref_clnt = _gf_true;
- }
- }
+ rpc_clnt_handle_disconnect (clnt, conn);
}
- pthread_mutex_unlock (&conn->lock);
-
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_DISCONNECT, NULL);
- if (unref_clnt)
- rpc_clnt_ref (clnt);
-
+ pthread_mutex_unlock (&clnt->notifylock);
break;
}
@@ -982,17 +991,21 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
case RPC_TRANSPORT_CONNECT:
{
- /* Every time there is a disconnection, processes
- should try to connect to 'glusterd' (ie, default
- port) or whichever port given as 'option remote-port'
- in volume file. */
- /* Below code makes sure the (re-)configured port lasts
- for just one successful attempt */
- conn->config.remote_port = 0;
-
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_CONNECT, NULL);
+ pthread_mutex_lock (&clnt->notifylock);
+ {
+ /* Every time there is a disconnection, processes
+ * should try to connect to 'glusterd' (ie, default
+ * port) or whichever port given as 'option remote-port'
+ * in volume file. */
+ /* Below code makes sure the (re-)configured port lasts
+ * for just one successful attempt */
+ conn->config.remote_port = 0;
+
+ if (clnt->notifyfn)
+ ret = clnt->notifyfn (clnt, clnt->mydata,
+ RPC_CLNT_CONNECT, NULL);
+ }
+ pthread_mutex_unlock (&clnt->notifylock);
break;
}
@@ -1116,6 +1129,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
}
pthread_mutex_init (&rpc->lock, NULL);
+ pthread_mutex_init (&rpc->notifylock, NULL);
rpc->ctx = ctx;
rpc->owner = owner;
@@ -1125,6 +1139,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
if (rpc->reqpool == NULL) {
pthread_mutex_destroy (&rpc->lock);
+ pthread_mutex_destroy (&rpc->notifylock);
GF_FREE (rpc);
rpc = NULL;
goto out;
@@ -1134,6 +1149,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
reqpool_size);
if (rpc->saved_frames_pool == NULL) {
pthread_mutex_destroy (&rpc->lock);
+ pthread_mutex_destroy (&rpc->notifylock);
mem_pool_destroy (rpc->reqpool);
GF_FREE (rpc);
rpc = NULL;
@@ -1143,6 +1159,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
ret = rpc_clnt_connection_init (rpc, ctx, options, name);
if (ret == -1) {
pthread_mutex_destroy (&rpc->lock);
+ pthread_mutex_destroy (&rpc->notifylock);
mem_pool_destroy (rpc->reqpool);
mem_pool_destroy (rpc->saved_frames_pool);
GF_FREE (rpc);
@@ -1738,6 +1755,7 @@ rpc_clnt_destroy (struct rpc_clnt *rpc)
saved_frames_destroy (rpc->conn.saved_frames);
pthread_mutex_destroy (&rpc->lock);
pthread_mutex_destroy (&rpc->conn.lock);
+ pthread_mutex_destroy (&rpc->notifylock);
/* mem-pool should be destroyed, otherwise,
it will cause huge memory leaks */
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index f84b4cb..3a5b287 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -172,6 +172,7 @@ struct rpc_req {
typedef struct rpc_clnt {
pthread_mutex_t lock;
+ pthread_mutex_t notifylock;
rpc_clnt_notify_t notifyfn;
rpc_clnt_connection_t conn;
void *mydata;
--
2.9.3