3604df
From 086436a2bf35ac0f1e2a87e5c1ff4169119d3082 Mon Sep 17 00:00:00 2001
3604df
From: Raghavendra G <rgowdapp@redhat.com>
3604df
Date: Tue, 28 Feb 2017 13:13:59 +0530
3604df
Subject: [PATCH 301/302] rpc/clnt: remove locks while notifying
3604df
 CONNECT/DISCONNECT
3604df
3604df
Locking during notify was introduced as part of commit
3604df
aa22f24f5db7659387704998ae01520708869873 [1]. The fix was introduced
3604df
to fix out-of-order CONNECT/DISCONNECT events from rpc-clnt to parent
3604df
xlators [2]. However as part of handling DISCONNECT protocol/client
3604df
does unwind saved frames (with failure) waiting for responses. This
3604df
saved_frames_unwind can be a costly operation and hence ideally
3604df
shouldn't be included in the critical section of notifylock, as it
3604df
unnecessarily delays the reconnection to same brick. Also, its not a
3604df
good practise to pass control to other xlators holding a lock as it
3604df
can lead to deadlocks. So, this patch removes locking in rpc-clnt
3604df
while notifying parent xlators.
3604df
3604df
To fix [2], two changes are present in this patch:
3604df
3604df
* notify DISCONNECT before cleaning up rpc connection (same as commit
3604df
  a6b63e11b7758cf1bfcb6798, patch [3]).
3604df
* protocol/client uses rpc_clnt_cleanup_and_start, which cleans up rpc
3604df
  connection and does a start while handling a DISCONNECT event from
3604df
  rpc. Note that patch [3] was reverted as rpc_clnt_start called in
3604df
  quick_reconnect path of protocol/client didn't invoke connect on
3604df
  transport as the connection was not cleaned up _yet_ (as cleanup was
3604df
  moved post notification in rpc-clnt). This resulted in clients never
3604df
  attempting connect to bricks.
3604df
3604df
Note that one of the neater ways to fix [2] (without using locks) is
3604df
to introduce generation numbers to map CONNECT and DISCONNECTS across
3604df
epochs and ignore DISCONNECT events if they don't belong to current
3604df
epoch. However, this approach is a bit complex to implement and
3604df
requires time. So, current patch is a hacky stop-gap fix till we come
3604df
up with a more cleaner solution.
3604df
3604df
[1] http://review.gluster.org/15916
3604df
[2] https://bugzilla.redhat.com/show_bug.cgi?id=1386626
3604df
[3] http://review.gluster.org/15681
3604df
3604df
>Change-Id: I62daeee8bb1430004e28558f6eb133efd4ccf418
3604df
>Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
3604df
>BUG: 1427012
3604df
>Reviewed-on: https://review.gluster.org/16784
3604df
>Smoke: Gluster Build System <jenkins@build.gluster.org>
3604df
>Reviewed-by: Milind Changire <mchangir@redhat.com>
3604df
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
3604df
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
3604df
3604df
Change-Id: I62daeee8bb1430004e28558f6eb133efd4ccf418
3604df
BUG: 1425740
3604df
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/99220
3604df
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
3604df
---
3604df
 rpc/rpc-lib/src/rpc-clnt.c           | 94 ++++++++++++++++++++++++------------
3604df
 rpc/rpc-lib/src/rpc-clnt.h           |  4 +-
3604df
 xlators/protocol/client/src/client.c |  2 +-
3604df
 3 files changed, 68 insertions(+), 32 deletions(-)
3604df
3604df
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
3604df
index f24c934..7bd4495 100644
3604df
--- a/rpc/rpc-lib/src/rpc-clnt.c
3604df
+++ b/rpc/rpc-lib/src/rpc-clnt.c
3604df
@@ -550,6 +550,7 @@ rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn)
3604df
                 /*reset rpc msgs stats*/
3604df
                 conn->pingcnt = 0;
3604df
                 conn->msgcnt = 0;
3604df
+                conn->cleanup_gen++;
3604df
         }
3604df
         pthread_mutex_unlock (&conn->lock);
3604df
 
3604df
@@ -875,10 +876,29 @@ rpc_clnt_destroy (struct rpc_clnt *rpc);
3604df
 static int
3604df
 rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
3604df
 {
3604df
-        struct timespec         ts          = {0, };
3604df
-        gf_boolean_t            unref_clnt  = _gf_false;
3604df
+        struct timespec ts             = {0, };
3604df
+        gf_boolean_t    unref_clnt     = _gf_false;
3604df
+        uint64_t        pre_notify_gen = 0, post_notify_gen = 0;
3604df
 
3604df
-        rpc_clnt_connection_cleanup (conn);
3604df
+        pthread_mutex_lock (&conn->lock);
3604df
+        {
3604df
+                pre_notify_gen = conn->cleanup_gen;
3604df
+        }
3604df
+        pthread_mutex_unlock (&conn->lock);
3604df
+
3604df
+        if (clnt->notifyfn)
3604df
+                clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
3604df
+
3604df
+        pthread_mutex_lock (&conn->lock);
3604df
+        {
3604df
+                post_notify_gen = conn->cleanup_gen;
3604df
+        }
3604df
+        pthread_mutex_unlock (&conn->lock);
3604df
+
3604df
+        if (pre_notify_gen == post_notify_gen) {
3604df
+                /* program didn't invoke cleanup, so rpc has to do it */
3604df
+                rpc_clnt_connection_cleanup (conn);
3604df
+        }
3604df
 
3604df
         pthread_mutex_lock (&conn->lock);
3604df
         {
3604df
@@ -898,8 +918,6 @@ rpc_clnt_handle_disconnect (struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
3604df
         }
3604df
         pthread_mutex_unlock (&conn->lock);
3604df
 
3604df
-        if (clnt->notifyfn)
3604df
-                clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
3604df
 
3604df
         if (unref_clnt)
3604df
                 rpc_clnt_ref (clnt);
3604df
@@ -932,11 +950,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
3604df
         switch (event) {
3604df
         case RPC_TRANSPORT_DISCONNECT:
3604df
         {
3604df
-                pthread_mutex_lock (&clnt->notifylock);
3604df
-                {
3604df
-                        rpc_clnt_handle_disconnect (clnt, conn);
3604df
-                }
3604df
-                pthread_mutex_unlock (&clnt->notifylock);
3604df
+                rpc_clnt_handle_disconnect (clnt, conn);
3604df
                 break;
3604df
         }
3604df
 
3604df
@@ -991,21 +1005,19 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
3604df
 
3604df
         case RPC_TRANSPORT_CONNECT:
3604df
         {
3604df
-                pthread_mutex_lock (&clnt->notifylock);
3604df
-                {
3604df
-                        /* Every time there is a disconnection, processes
3604df
-                         * should try to connect to 'glusterd' (ie, default
3604df
-                         * port) or whichever port given as 'option remote-port'
3604df
-                         * in volume file. */
3604df
-                        /* Below code makes sure the (re-)configured port lasts
3604df
-                         * for just one successful attempt */
3604df
-                        conn->config.remote_port = 0;
3604df
-
3604df
-                        if (clnt->notifyfn)
3604df
-                                ret = clnt->notifyfn (clnt, clnt->mydata,
3604df
-                                                RPC_CLNT_CONNECT, NULL);
3604df
-                }
3604df
-                pthread_mutex_unlock (&clnt->notifylock);
3604df
+
3604df
+                /* Every time there is a disconnection, processes
3604df
+                 * should try to connect to 'glusterd' (ie, default
3604df
+                 * port) or whichever port given as 'option remote-port'
3604df
+                 * in volume file. */
3604df
+                /* Below code makes sure the (re-)configured port lasts
3604df
+                 * for just one successful attempt */
3604df
+                conn->config.remote_port = 0;
3604df
+
3604df
+                if (clnt->notifyfn)
3604df
+                        ret = clnt->notifyfn (clnt, clnt->mydata,
3604df
+                                              RPC_CLNT_CONNECT, NULL);
3604df
+
3604df
                 break;
3604df
         }
3604df
 
3604df
@@ -1129,7 +1141,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
3604df
         }
3604df
 
3604df
         pthread_mutex_init (&rpc->lock, NULL);
3604df
-        pthread_mutex_init (&rpc->notifylock, NULL);
3604df
         rpc->ctx = ctx;
3604df
         rpc->owner = owner;
3604df
 
3604df
@@ -1139,7 +1150,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
3604df
         rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
3604df
         if (rpc->reqpool == NULL) {
3604df
                 pthread_mutex_destroy (&rpc->lock);
3604df
-                pthread_mutex_destroy (&rpc->notifylock);
3604df
                 GF_FREE (rpc);
3604df
                 rpc = NULL;
3604df
                 goto out;
3604df
@@ -1149,7 +1159,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
3604df
                                                reqpool_size);
3604df
         if (rpc->saved_frames_pool == NULL) {
3604df
                 pthread_mutex_destroy (&rpc->lock);
3604df
-                pthread_mutex_destroy (&rpc->notifylock);
3604df
                 mem_pool_destroy (rpc->reqpool);
3604df
                 GF_FREE (rpc);
3604df
                 rpc = NULL;
3604df
@@ -1159,7 +1168,6 @@ rpc_clnt_new (dict_t *options, xlator_t *owner, char *name,
3604df
         ret = rpc_clnt_connection_init (rpc, ctx, options, name);
3604df
         if (ret == -1) {
3604df
                 pthread_mutex_destroy (&rpc->lock);
3604df
-                pthread_mutex_destroy (&rpc->notifylock);
3604df
                 mem_pool_destroy (rpc->reqpool);
3604df
                 mem_pool_destroy (rpc->saved_frames_pool);
3604df
                 GF_FREE (rpc);
3604df
@@ -1205,6 +1213,33 @@ rpc_clnt_start (struct rpc_clnt *rpc)
3604df
 
3604df
 
3604df
 int
3604df
+rpc_clnt_cleanup_and_start (struct rpc_clnt *rpc)
3604df
+{
3604df
+        struct rpc_clnt_connection *conn = NULL;
3604df
+
3604df
+        if (!rpc)
3604df
+                return -1;
3604df
+
3604df
+        conn = &rpc->conn;
3604df
+
3604df
+        rpc_clnt_connection_cleanup (conn);
3604df
+
3604df
+        pthread_mutex_lock (&conn->lock);
3604df
+        {
3604df
+                rpc->disabled = 0;
3604df
+        }
3604df
+        pthread_mutex_unlock (&conn->lock);
3604df
+        /* Corresponding unref will be either on successful timer cancel or last
3604df
+         * rpc_clnt_reconnect fire event.
3604df
+         */
3604df
+        rpc_clnt_ref (rpc);
3604df
+        rpc_clnt_reconnect (conn);
3604df
+
3604df
+        return 0;
3604df
+}
3604df
+
3604df
+
3604df
+int
3604df
 rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
3604df
                           void *mydata)
3604df
 {
3604df
@@ -1755,7 +1790,6 @@ rpc_clnt_destroy (struct rpc_clnt *rpc)
3604df
         saved_frames_destroy (rpc->conn.saved_frames);
3604df
         pthread_mutex_destroy (&rpc->lock);
3604df
         pthread_mutex_destroy (&rpc->conn.lock);
3604df
-        pthread_mutex_destroy (&rpc->notifylock);
3604df
 
3604df
         /* mem-pool should be destroyed, otherwise,
3604df
            it will cause huge memory leaks */
3604df
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
3604df
index 3a5b287..df19a0c 100644
3604df
--- a/rpc/rpc-lib/src/rpc-clnt.h
3604df
+++ b/rpc/rpc-lib/src/rpc-clnt.h
3604df
@@ -150,6 +150,7 @@ struct rpc_clnt_connection {
3604df
 	int32_t                  ping_timeout;
3604df
         uint64_t                 pingcnt;
3604df
         uint64_t                 msgcnt;
3604df
+        uint64_t                 cleanup_gen;
3604df
 };
3604df
 typedef struct rpc_clnt_connection rpc_clnt_connection_t;
3604df
 
3604df
@@ -172,7 +173,6 @@ struct rpc_req {
3604df
 
3604df
 typedef struct rpc_clnt {
3604df
         pthread_mutex_t        lock;
3604df
-        pthread_mutex_t        notifylock;
3604df
         rpc_clnt_notify_t      notifyfn;
3604df
         rpc_clnt_connection_t  conn;
3604df
         void                  *mydata;
3604df
@@ -199,6 +199,8 @@ struct rpc_clnt *rpc_clnt_new (dict_t *options, xlator_t *owner,
3604df
 
3604df
 int rpc_clnt_start (struct rpc_clnt *rpc);
3604df
 
3604df
+int rpc_clnt_cleanup_and_start (struct rpc_clnt *rpc);
3604df
+
3604df
 int rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
3604df
                               void *mydata);
3604df
 
3604df
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
3604df
index 3cb5e23..66f15b8 100644
3604df
--- a/xlators/protocol/client/src/client.c
3604df
+++ b/xlators/protocol/client/src/client.c
3604df
@@ -2314,7 +2314,7 @@ client_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
3604df
 
3604df
                 if (conf->quick_reconnect) {
3604df
                         conf->quick_reconnect = 0;
3604df
-                        rpc_clnt_start (rpc);
3604df
+                        rpc_clnt_cleanup_and_start (rpc);
3604df
 
3604df
                 } else {
3604df
                         rpc->conn.config.remote_port = 0;
3604df
-- 
3604df
2.9.3
3604df