|
|
3604df |
From 086436a2bf35ac0f1e2a87e5c1ff4169119d3082 Mon Sep 17 00:00:00 2001
|
|
|
3604df |
From: Raghavendra G <rgowdapp@redhat.com>
|
|
|
3604df |
Date: Tue, 28 Feb 2017 13:13:59 +0530
|
|
|
3604df |
Subject: [PATCH 301/302] rpc/clnt: remove locks while notifying
|
|
|
3604df |
CONNECT/DISCONNECT
|
|
|
3604df |
|
|
|
3604df |
Locking during notify was introduced as part of commit
|
|
|
3604df |
aa22f24f5db7659387704998ae01520708869873 [1]. The fix was introduced
|
|
|
3604df |
to fix out-of-order CONNECT/DISCONNECT events from rpc-clnt to parent
|
|
|
3604df |
xlators [2]. However as part of handling DISCONNECT protocol/client
|
|
|
3604df |
does unwind saved frames (with failure) waiting for responses. This
|
|
|
3604df |
saved_frames_unwind can be a costly operation and hence ideally
|
|
|
3604df |
shouldn't be included in the critical section of notifylock, as it
|
|
|
3604df |
unnecessarily delays the reconnection to same brick. Also, its not a
|
|
|
3604df |
good practise to pass control to other xlators holding a lock as it
|
|
|
3604df |
can lead to deadlocks. So, this patch removes locking in rpc-clnt
|
|
|
3604df |
while notifying parent xlators.
|
|
|
3604df |
|
|
|
3604df |
To fix [2], two changes are present in this patch:
|
|
|
3604df |
|
|
|
3604df |
* notify DISCONNECT before cleaning up rpc connection (same as commit
|
|
|
3604df |
a6b63e11b7758cf1bfcb6798, patch [3]).
|
|
|
3604df |
* protocol/client uses rpc_clnt_cleanup_and_start, which cleans up rpc
|
|
|
3604df |
connection and does a start while handling a DISCONNECT event from
|
|
|
3604df |
rpc. Note that patch [3] was reverted as rpc_clnt_start called in
|
|
|
3604df |
quick_reconnect path of protocol/client didn't invoke connect on
|
|
|
3604df |
transport as the connection was not cleaned up _yet_ (as cleanup was
|
|
|
3604df |
moved post notification in rpc-clnt). This resulted in clients never
|
|
|
3604df |
attempting connect to bricks.
|
|
|
3604df |
|
|
|
3604df |
Note that one of the neater ways to fix [2] (without using locks) is
|
|
|
3604df |
to introduce generation numbers to map CONNECT and DISCONNECTS across
|
|
|
3604df |
epochs and ignore DISCONNECT events if they don't belong to current
|
|
|
3604df |
epoch. However, this approach is a bit complex to implement and
|
|
|
3604df |
requires time. So, current patch is a hacky stop-gap fix till we come
|
|
|
3604df |
up with a more cleaner solution.
|
|
|
3604df |
|
|
|
3604df |
[1] http://review.gluster.org/15916
|
|
|
3604df |
[2] https://bugzilla.redhat.com/show_bug.cgi?id=1386626
|
|
|
3604df |
[3] http://review.gluster.org/15681
|
|
|
3604df |
|
|
|
3604df |
>Change-Id: I62daeee8bb1430004e28558f6eb133efd4ccf418
|
|
|
3604df |
>Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
3604df |
>BUG: 1427012
|
|
|
3604df |
>Reviewed-on: https://review.gluster.org/16784
|
|
|
3604df |
>Smoke: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>Reviewed-by: Milind Changire <mchangir@redhat.com>
|
|
|
3604df |
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
3604df |
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
|
|
|
3604df |
|
|
|
3604df |
Change-Id: I62daeee8bb1430004e28558f6eb133efd4ccf418
|
|
|
3604df |
BUG: 1425740
|
|
|
3604df |
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
|
|
|
3604df |
Reviewed-on: https://code.engineering.redhat.com/gerrit/99220
|
|
|
3604df |
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
|
|
|
3604df |
---
|
|
|
3604df |
rpc/rpc-lib/src/rpc-clnt.c | 94 ++++++++++++++++++++++++------------
|
|
|
3604df |
rpc/rpc-lib/src/rpc-clnt.h | 4 +-
|
|
|
3604df |
xlators/protocol/client/src/client.c | 2 +-
|
|
|
3604df |
3 files changed, 68 insertions(+), 32 deletions(-)
|
|
|
3604df |
|
|
|
3604df |
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
|
|
|
3604df |
index f24c934..7bd4495 100644
|
|
|
3604df |
--- a/rpc/rpc-lib/src/rpc-clnt.c
|
|
|
3604df |
+++ b/rpc/rpc-lib/src/rpc-clnt.c
|
|
|
3604df |
@@ -550,6 +550,7 @@ rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn)
|
|
|
3604df |
/*reset rpc msgs stats*/
|
|
|
3604df |
conn->pingcnt = 0;
|
|
|
3604df |
conn->msgcnt = 0;
|
|
|
3604df |
+ conn->cleanup_gen++;
|
|
|
3604df |
}
|
|
|
3604df |
pthread_mutex_unlock (&conn->lock);
|
|
|
3604df |
|
|
|
3604df |
@@ -875,10 +876,29 @@ rpc_clnt_destroy (struct rpc_clnt *rpc);
|
|
|
3604df |
static int
|
|
|
3604df |
rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
|
|
|
3604df |
{
|
|
|
3604df |
- struct timespec ts = {0, };
|
|
|
3604df |
- gf_boolean_t unref_clnt = _gf_false;
|
|
|
3604df |
+ struct timespec ts = {0, };
|
|
|
3604df |
+ gf_boolean_t unref_clnt = _gf_false;
|
|
|
3604df |
+ uint64_t pre_notify_gen = 0, post_notify_gen = 0;
|
|
|
3604df |
|
|
|
3604df |
- rpc_clnt_connection_cleanup (conn);
|
|
|
3604df |
+ pthread_mutex_lock (&conn->lock);
|
|
|
3604df |
+ {
|
|
|
3604df |
+ pre_notify_gen = conn->cleanup_gen;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ pthread_mutex_unlock (&conn->lock);
|
|
|
3604df |
+
|
|
|
3604df |
+ if (clnt->notifyfn)
|
|
|
3604df |
+ clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
|
|
|
3604df |
+
|
|
|
3604df |
+ pthread_mutex_lock (&conn->lock);
|
|
|
3604df |
+ {
|
|
|
3604df |
+ post_notify_gen = conn->cleanup_gen;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ pthread_mutex_unlock (&conn->lock);
|
|
|
3604df |
+
|
|
|
3604df |
+ if (pre_notify_gen == post_notify_gen) {
|
|
|
3604df |
+ /* program didn't invoke cleanup, so rpc has to do it */
|
|
|
3604df |
+ rpc_clnt_connection_cleanup (conn);
|
|
|
3604df |
+ }
|
|
|
3604df |
|
|
|
3604df |
pthread_mutex_lock (&conn->lock);
|
|
|
3604df |
{
|
|
|
3604df |
@@ -898,8 +918,6 @@ rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
|
|
|
3604df |
}
|
|
|
3604df |
pthread_mutex_unlock (&conn->lock);
|
|
|
3604df |
|
|
|
3604df |
- if (clnt->notifyfn)
|
|
|
3604df |
- clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
|
|
|
3604df |
|
|
|
3604df |
if (unref_clnt)
|
|
|
3604df |
rpc_clnt_ref (clnt);
|
|
|
3604df |
@@ -932,11 +950,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
|
|
|
3604df |
switch (event) {
|
|
|
3604df |
case RPC_TRANSPORT_DISCONNECT:
|
|
|
3604df |
{
|
|
|
3604df |
- pthread_mutex_lock (&clnt->notifylock);
|
|
|
3604df |
- {
|
|
|
3604df |
- rpc_clnt_handle_disconnect (clnt, conn);
|
|
|
3604df |
- }
|
|
|
3604df |
- pthread_mutex_unlock (&clnt->notifylock);
|
|
|
3604df |
+ rpc_clnt_handle_disconnect (clnt, conn);
|
|
|
3604df |
break;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -991,21 +1005,19 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
|
|
|
3604df |
|
|
|
3604df |
case RPC_TRANSPORT_CONNECT:
|
|
|
3604df |
{
|
|
|
3604df |
- pthread_mutex_lock (&clnt->notifylock);
|
|
|
3604df |
- {
|
|
|
3604df |
- /* Every time there is a disconnection, processes
|
|
|
3604df |
- * should try to connect to 'glusterd' (ie, default
|
|
|
3604df |
- * port) or whichever port given as 'option remote-port'
|
|
|
3604df |
- * in volume file. */
|
|
|
3604df |
- /* Below code makes sure the (re-)configured port lasts
|
|
|
3604df |
- * for just one successful attempt */
|
|
|
3604df |
- conn->config.remote_port = 0;
|
|
|
3604df |
-
|
|
|
3604df |
- if (clnt->notifyfn)
|
|
|
3604df |
- ret = clnt->notifyfn (clnt, clnt->mydata,
|
|
|
3604df |
- RPC_CLNT_CONNECT, NULL);
|
|
|
3604df |
- }
|
|
|
3604df |
- pthread_mutex_unlock (&clnt->notifylock);
|
|
|
3604df |
+
|
|
|
3604df |
+ /* Every time there is a disconnection, processes
|
|
|
3604df |
+ * should try to connect to 'glusterd' (ie, default
|
|
|
3604df |
+ * port) or whichever port given as 'option remote-port'
|
|
|
3604df |
+ * in volume file. */
|
|
|
3604df |
+ /* Below code makes sure the (re-)configured port lasts
|
|
|
3604df |
+ * for just one successful attempt */
|
|
|
3604df |
+ conn->config.remote_port = 0;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (clnt->notifyfn)
|
|
|
3604df |
+ ret = clnt->notifyfn (clnt, clnt->mydata,
|
|
|
3604df |
+ RPC_CLNT_CONNECT, NULL);
|
|
|
3604df |
+
|
|
|
3604df |
break;
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
@@ -1129,7 +1141,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
|
|
|
3604df |
}
|
|
|
3604df |
|
|
|
3604df |
pthread_mutex_init (&rpc->lock, NULL);
|
|
|
3604df |
- pthread_mutex_init (&rpc->notifylock, NULL);
|
|
|
3604df |
rpc->ctx = ctx;
|
|
|
3604df |
rpc->owner = owner;
|
|
|
3604df |
|
|
|
3604df |
@@ -1139,7 +1150,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
|
|
|
3604df |
rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
|
|
|
3604df |
if (rpc->reqpool == NULL) {
|
|
|
3604df |
pthread_mutex_destroy (&rpc->lock);
|
|
|
3604df |
- pthread_mutex_destroy (&rpc->notifylock);
|
|
|
3604df |
GF_FREE (rpc);
|
|
|
3604df |
rpc = NULL;
|
|
|
3604df |
goto out;
|
|
|
3604df |
@@ -1149,7 +1159,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
|
|
|
3604df |
reqpool_size);
|
|
|
3604df |
if (rpc->saved_frames_pool == NULL) {
|
|
|
3604df |
pthread_mutex_destroy (&rpc->lock);
|
|
|
3604df |
- pthread_mutex_destroy (&rpc->notifylock);
|
|
|
3604df |
mem_pool_destroy (rpc->reqpool);
|
|
|
3604df |
GF_FREE (rpc);
|
|
|
3604df |
rpc = NULL;
|
|
|
3604df |
@@ -1159,7 +1168,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
|
|
|
3604df |
ret = rpc_clnt_connection_init (rpc, ctx, options, name);
|
|
|
3604df |
if (ret == -1) {
|
|
|
3604df |
pthread_mutex_destroy (&rpc->lock);
|
|
|
3604df |
- pthread_mutex_destroy (&rpc->notifylock);
|
|
|
3604df |
mem_pool_destroy (rpc->reqpool);
|
|
|
3604df |
mem_pool_destroy (rpc->saved_frames_pool);
|
|
|
3604df |
GF_FREE (rpc);
|
|
|
3604df |
@@ -1205,6 +1213,33 @@ rpc_clnt_start (struct rpc_clnt *rpc)
|
|
|
3604df |
|
|
|
3604df |
|
|
|
3604df |
int
|
|
|
3604df |
+rpc_clnt_cleanup_and_start (struct rpc_clnt *rpc)
|
|
|
3604df |
+{
|
|
|
3604df |
+ struct rpc_clnt_connection *conn = NULL;
|
|
|
3604df |
+
|
|
|
3604df |
+ if (!rpc)
|
|
|
3604df |
+ return -1;
|
|
|
3604df |
+
|
|
|
3604df |
+ conn = &rpc->conn;
|
|
|
3604df |
+
|
|
|
3604df |
+ rpc_clnt_connection_cleanup (conn);
|
|
|
3604df |
+
|
|
|
3604df |
+ pthread_mutex_lock (&conn->lock);
|
|
|
3604df |
+ {
|
|
|
3604df |
+ rpc->disabled = 0;
|
|
|
3604df |
+ }
|
|
|
3604df |
+ pthread_mutex_unlock (&conn->lock);
|
|
|
3604df |
+ /* Corresponding unref will be either on successful timer cancel or last
|
|
|
3604df |
+ * rpc_clnt_reconnect fire event.
|
|
|
3604df |
+ */
|
|
|
3604df |
+ rpc_clnt_ref (rpc);
|
|
|
3604df |
+ rpc_clnt_reconnect (conn);
|
|
|
3604df |
+
|
|
|
3604df |
+ return 0;
|
|
|
3604df |
+}
|
|
|
3604df |
+
|
|
|
3604df |
+
|
|
|
3604df |
+int
|
|
|
3604df |
rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
|
|
|
3604df |
void *mydata)
|
|
|
3604df |
{
|
|
|
3604df |
@@ -1755,7 +1790,6 @@ rpc_clnt_destroy (struct rpc_clnt *rpc)
|
|
|
3604df |
saved_frames_destroy (rpc->conn.saved_frames);
|
|
|
3604df |
pthread_mutex_destroy (&rpc->lock);
|
|
|
3604df |
pthread_mutex_destroy (&rpc->conn.lock);
|
|
|
3604df |
- pthread_mutex_destroy (&rpc->notifylock);
|
|
|
3604df |
|
|
|
3604df |
/* mem-pool should be destroyed, otherwise,
|
|
|
3604df |
it will cause huge memory leaks */
|
|
|
3604df |
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
|
|
|
3604df |
index 3a5b287..df19a0c 100644
|
|
|
3604df |
--- a/rpc/rpc-lib/src/rpc-clnt.h
|
|
|
3604df |
+++ b/rpc/rpc-lib/src/rpc-clnt.h
|
|
|
3604df |
@@ -150,6 +150,7 @@ struct rpc_clnt_connection {
|
|
|
3604df |
int32_t ping_timeout;
|
|
|
3604df |
uint64_t pingcnt;
|
|
|
3604df |
uint64_t msgcnt;
|
|
|
3604df |
+ uint64_t cleanup_gen;
|
|
|
3604df |
};
|
|
|
3604df |
typedef struct rpc_clnt_connection rpc_clnt_connection_t;
|
|
|
3604df |
|
|
|
3604df |
@@ -172,7 +173,6 @@ struct rpc_req {
|
|
|
3604df |
|
|
|
3604df |
typedef struct rpc_clnt {
|
|
|
3604df |
pthread_mutex_t lock;
|
|
|
3604df |
- pthread_mutex_t notifylock;
|
|
|
3604df |
rpc_clnt_notify_t notifyfn;
|
|
|
3604df |
rpc_clnt_connection_t conn;
|
|
|
3604df |
void *mydata;
|
|
|
3604df |
@@ -199,6 +199,8 @@ struct rpc_clnt *rpc_clnt_new (dict_t *options, xlator_t *owner,
|
|
|
3604df |
|
|
|
3604df |
int rpc_clnt_start (struct rpc_clnt *rpc);
|
|
|
3604df |
|
|
|
3604df |
+int rpc_clnt_cleanup_and_start (struct rpc_clnt *rpc);
|
|
|
3604df |
+
|
|
|
3604df |
int rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
|
|
|
3604df |
void *mydata);
|
|
|
3604df |
|
|
|
3604df |
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
|
|
|
3604df |
index 3cb5e23..66f15b8 100644
|
|
|
3604df |
--- a/xlators/protocol/client/src/client.c
|
|
|
3604df |
+++ b/xlators/protocol/client/src/client.c
|
|
|
3604df |
@@ -2314,7 +2314,7 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
|
|
|
3604df |
|
|
|
3604df |
if (conf->quick_reconnect) {
|
|
|
3604df |
conf->quick_reconnect = 0;
|
|
|
3604df |
- rpc_clnt_start (rpc);
|
|
|
3604df |
+ rpc_clnt_cleanup_and_start (rpc);
|
|
|
3604df |
|
|
|
3604df |
} else {
|
|
|
3604df |
rpc->conn.config.remote_port = 0;
|
|
|
3604df |
--
|
|
|
3604df |
2.9.3
|
|
|
3604df |
|