From f96f711ddacfe8c758de3a994e339339325a1539 Mon Sep 17 00:00:00 2001 From: Rajesh Joseph Date: Sat, 3 Dec 2016 01:10:51 +0530 Subject: [PATCH 227/227] rpc: fix for race between rpc and protocol/client It is possible that the notification thread which notifies protocol/client layer about the disconnection is put to sleep and meanwhile, a fuse thread or a timer thread initiates and completes reconnection to the brick. The notification thread is then woken up and protocol/client layer updates its flags to indicate that network is disconnected. No reconnection is initiated because reconnection is rpc-lib layer's responsibility and its flags indicate that connection is connected. Fix: Serialize connect and disconnect notify Backport-of: http://review.gluster.org/15916 Credit: Raghavendra Talur Change-Id: I8ff5d1a3283b47f5c26848a42016a40bc34ffc1d BUG: 1385605 Signed-off-by: Rajesh Joseph Reviewed-on: https://code.engineering.redhat.com/gerrit/92095 Reviewed-by: Atin Mukherjee --- rpc/rpc-lib/src/rpc-clnt.c | 98 +++++++++++++++++++++++++++------------------- rpc/rpc-lib/src/rpc-clnt.h | 1 + 2 files changed, 59 insertions(+), 40 deletions(-) diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index a9e43eb..fe099f9 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -872,6 +872,41 @@ rpc_clnt_destroy (struct rpc_clnt *rpc); #define RPC_THIS_RESTORE (THIS = old_THIS) +static int +rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn) +{ + struct timespec ts = {0, }; + gf_boolean_t unref_clnt = _gf_false; + + rpc_clnt_connection_cleanup (conn); + + pthread_mutex_lock (&conn->lock); + { + if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) { + ts.tv_sec = 10; + ts.tv_nsec = 0; + + rpc_clnt_ref (clnt); + conn->reconnect = gf_timer_call_after (clnt->ctx, ts, + rpc_clnt_reconnect, conn); + if (conn->reconnect == NULL) { + gf_log (conn->name, GF_LOG_WARNING, + "Cannot create rpc_clnt_reconnect timer"); + unref_clnt = _gf_true; + } + } + } + pthread_mutex_unlock (&conn->lock); + + if (clnt->notifyfn) + clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL); + + if (unref_clnt) + rpc_clnt_ref (clnt); + + return 0; +} + int rpc_clnt_notify (rpc_transport_t *trans, void *mydata, rpc_transport_event_t event, void *data, ...) @@ -881,9 +916,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, int ret = -1; rpc_request_info_t *req_info = NULL; rpc_transport_pollin_t *pollin = NULL; - struct timespec ts = {0, }; void *clnt_mydata = NULL; - gf_boolean_t unref_clnt = _gf_false; DECLARE_OLD_THIS; conn = mydata; @@ -899,35 +932,11 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, switch (event) { case RPC_TRANSPORT_DISCONNECT: { - rpc_clnt_connection_cleanup (conn); - - pthread_mutex_lock (&conn->lock); + pthread_mutex_lock (&clnt->notifylock); { - if (!conn->rpc_clnt->disabled - && (conn->reconnect == NULL)) { - ts.tv_sec = 10; - ts.tv_nsec = 0; - - rpc_clnt_ref (clnt); - conn->reconnect = - gf_timer_call_after (clnt->ctx, ts, - rpc_clnt_reconnect, - conn); - if (conn->reconnect == NULL) { - gf_log (conn->name, GF_LOG_WARNING, - "Cannot create rpc_clnt_reconnect timer"); - unref_clnt = _gf_true; - } - } + rpc_clnt_handle_disconnect (clnt, conn); } - pthread_mutex_unlock (&conn->lock); - - if (clnt->notifyfn) - ret = clnt->notifyfn (clnt, clnt->mydata, - RPC_CLNT_DISCONNECT, NULL); - if (unref_clnt) - rpc_clnt_ref (clnt); - + pthread_mutex_unlock (&clnt->notifylock); break; } @@ -982,17 +991,21 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, case RPC_TRANSPORT_CONNECT: { - /* Every time there is a disconnection, processes - should try to connect to 'glusterd' (ie, default - port) or whichever port given as 'option remote-port' - in volume file. */ - /* Below code makes sure the (re-)configured port lasts - for just one successful attempt */ - conn->config.remote_port = 0; - - if (clnt->notifyfn) - ret = clnt->notifyfn (clnt, clnt->mydata, - RPC_CLNT_CONNECT, NULL); + pthread_mutex_lock (&clnt->notifylock); + { + /* Every time there is a disconnection, processes + * should try to connect to 'glusterd' (ie, default + * port) or whichever port given as 'option remote-port' + * in volume file. */ + /* Below code makes sure the (re-)configured port lasts + * for just one successful attempt */ + conn->config.remote_port = 0; + + if (clnt->notifyfn) + ret = clnt->notifyfn (clnt, clnt->mydata, + RPC_CLNT_CONNECT, NULL); + } + pthread_mutex_unlock (&clnt->notifylock); break; } @@ -1116,6 +1129,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name, } pthread_mutex_init (&rpc->lock, NULL); + pthread_mutex_init (&rpc->notifylock, NULL); rpc->ctx = ctx; rpc->owner = owner; @@ -1125,6 +1139,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name, rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size); if (rpc->reqpool == NULL) { pthread_mutex_destroy (&rpc->lock); + pthread_mutex_destroy (&rpc->notifylock); GF_FREE (rpc); rpc = NULL; goto out; @@ -1134,6 +1149,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name, reqpool_size); if (rpc->saved_frames_pool == NULL) { pthread_mutex_destroy (&rpc->lock); + pthread_mutex_destroy (&rpc->notifylock); mem_pool_destroy (rpc->reqpool); GF_FREE (rpc); rpc = NULL; @@ -1143,6 +1159,7 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name, ret = rpc_clnt_connection_init (rpc, ctx, options, name); if (ret == -1) { pthread_mutex_destroy (&rpc->lock); + pthread_mutex_destroy (&rpc->notifylock); mem_pool_destroy (rpc->reqpool); mem_pool_destroy (rpc->saved_frames_pool); GF_FREE (rpc); @@ -1738,6 +1755,7 @@ rpc_clnt_destroy (struct rpc_clnt *rpc) saved_frames_destroy (rpc->conn.saved_frames); pthread_mutex_destroy (&rpc->lock); pthread_mutex_destroy (&rpc->conn.lock); + pthread_mutex_destroy (&rpc->notifylock); /* mem-pool should be destroyed, otherwise, it will cause huge memory leaks */ diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h index f84b4cb..3a5b287 100644 --- a/rpc/rpc-lib/src/rpc-clnt.h +++ b/rpc/rpc-lib/src/rpc-clnt.h @@ -172,6 +172,7 @@ struct rpc_req { typedef struct rpc_clnt { pthread_mutex_t lock; + pthread_mutex_t notifylock; rpc_clnt_notify_t notifyfn; rpc_clnt_connection_t conn; void *mydata; -- 2.9.3