Blob Blame History Raw
From d79cb2cdff6fe8d962c9ac095a7541ddf500302b Mon Sep 17 00:00:00 2001
From: Mohammed Rafi KC <rkavunga@redhat.com>
Date: Mon, 1 Apr 2019 14:44:20 +0530
Subject: [PATCH 099/124] client/fini: return fini after rpc cleanup

There is a race condition in rpc_transport later
and client fini.

Sequence of events to happen the race condition
1) When we want to destroy a graph, we send a parent down
   event first
2) Once parent down received on a client xlator, we will
   initiates a rpc disconnect
3) This will in turn generates a child down event.
4) When we process child down, we first do fini for
   Every xlator
5) On successful return of fini, we delete the graph

Here after the step 5, there is a chance that the fini
on client might not be finished. Because an rpc_tranpsort
ref can race with the above sequence.

So we have to wait till all rpc's are successfully freed
before returning the fini from client

Backport of: https://review.gluster.org/#/c/glusterfs/+/22468/

>Change-Id: I20145662d71fb837e448a4d3210d1fcb2855f2d4
>fixes: bz#1659708
>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>

Change-Id: I848bcfb9443467caed32bae0717244ab01b407fc
BUG: 1471742
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/167831
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
---
 xlators/protocol/client/src/client.c | 25 ++++++++++++++++++++-----
 xlators/protocol/client/src/client.h |  6 ++++++
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 19f5175..a372807 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -49,11 +49,12 @@ client_fini_complete(xlator_t *this)
     if (!conf->destroy)
         return 0;
 
-    this->private = NULL;
-
-    pthread_spin_destroy(&conf->fd_lock);
-    pthread_mutex_destroy(&conf->lock);
-    GF_FREE(conf);
+    pthread_mutex_lock(&conf->lock);
+    {
+        conf->fini_completed = _gf_true;
+        pthread_cond_broadcast(&conf->fini_complete_cond);
+    }
+    pthread_mutex_unlock(&conf->lock);
 
 out:
     return 0;
@@ -2721,6 +2722,7 @@ init(xlator_t *this)
         goto out;
 
     pthread_mutex_init(&conf->lock, NULL);
+    pthread_cond_init(&conf->fini_complete_cond, NULL);
     pthread_spin_init(&conf->fd_lock, 0);
     INIT_LIST_HEAD(&conf->saved_fds);
 
@@ -2779,6 +2781,7 @@ fini(xlator_t *this)
     if (!conf)
         return;
 
+    conf->fini_completed = _gf_false;
     conf->destroy = 1;
     if (conf->rpc) {
         /* cleanup the saved-frames before last unref */
@@ -2786,6 +2789,18 @@ fini(xlator_t *this)
         rpc_clnt_unref(conf->rpc);
     }
 
+    pthread_mutex_lock(&conf->lock);
+    {
+        while (!conf->fini_completed)
+            pthread_cond_wait(&conf->fini_complete_cond, &conf->lock);
+    }
+    pthread_mutex_unlock(&conf->lock);
+
+    pthread_spin_destroy(&conf->fd_lock);
+    pthread_mutex_destroy(&conf->lock);
+    pthread_cond_destroy(&conf->fini_complete_cond);
+    GF_FREE(conf);
+
     /* Saved Fds */
     /* TODO: */
 
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index f12fa61..8dcd72f 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -235,6 +235,12 @@ typedef struct clnt_conf {
                                       * up, disconnects can be
                                       * logged
                                       */
+
+    gf_boolean_t old_protocol;         /* used only for old-protocol testing */
+    pthread_cond_t fini_complete_cond; /* Used to wait till we finsh the fini
+                                          compltely, ie client_fini_complete
+                                          to return*/
+    gf_boolean_t fini_completed;
 } clnt_conf_t;
 
 typedef struct _client_fd_ctx {
-- 
1.8.3.1