3604df
From 9ac60df1fdce697e38571d20e23fcd5472790350 Mon Sep 17 00:00:00 2001
3604df
From: Ravishankar N <ravishankar@redhat.com>
3604df
Date: Thu, 2 Feb 2017 16:41:45 +0530
3604df
Subject: [PATCH 288/294] protocol/client: Fix double free of client fdctx
3604df
 destroy
3604df
3604df
Backport of https://review.gluster.org/#/c/16521/
3604df
3604df
This patch fixes the race between fd re-open code and fd release code,
3604df
both of which free the fd context due to a race in certain variable
3604df
checks as explained below:
3604df
3604df
1. client process (shd in the case of this BZ) sends an opendir to its
3604df
children (client xlators) which send the fop to the bricks to get a valid fd.
3604df
3604df
2. Client xlator loses connection to the brick. fdctx->remotefd is -1
3604df
3604df
3. Client re-establishes connection. After handshake, it reopens the dir
3604df
and sets fdctx->remotefd to a valid fd in client3_3_reopendir_cbk().
3604df
3604df
4. Meanwhile, shd sends a fd unref after it is done with the opendir.
3604df
This triggers a releasedir (since fd->refcount becomes 0).
3604df
3604df
5. client3_3_releasedir() sees that fdctx-->remotefd is a valid number
3604df
(i.e not -1), sets fdctx->released=1 and calls  client_fdctx_destroy()
3604df
3604df
6. As a continuation of step3, client_reopen_done() is called by
3604df
client3_3_reopendir_cbk(), which sees that fdctx->released==1 and
3604df
again calls client_fdctx_destroy().
3604df
3604df
Depending on when step-5 does GF_FREE(fdctx), we may crash at any place in
3604df
step-6 in client3_3_reopendir_cbk() when it tries to access
3604df
fdctx->{whatever}.
3604df
3604df
Change-Id: Ifb668b248e6333a6f1fc09c1a116521f97102353
3604df
BUG: 1415101
3604df
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
3604df
Reviewed-on: https://code.engineering.redhat.com/gerrit/97481
3604df
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
3604df
---
3604df
 xlators/protocol/client/src/client-handshake.c | 37 +++++++++++---------------
3604df
 xlators/protocol/client/src/client-rpc-fops.c  | 25 ++++++++++-------
3604df
 xlators/protocol/client/src/client.h           |  5 ++--
3604df
 3 files changed, 33 insertions(+), 34 deletions(-)
3604df
3604df
diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c
3604df
index 74e7548..7866697 100644
3604df
--- a/xlators/protocol/client/src/client-handshake.c
3604df
+++ b/xlators/protocol/client/src/client-handshake.c
3604df
@@ -348,7 +348,7 @@ clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov,
3604df
 
3604df
         clnt_fd_lk_reacquire_failed (this, fdctx, conf);
3604df
 
3604df
-        fdctx->reopen_done (fdctx, this);
3604df
+        fdctx->reopen_done (fdctx, fdctx->remote_fd, this);
3604df
 
3604df
         frame->local = NULL;
3604df
         STACK_DESTROY (frame->root);
3604df
@@ -381,7 +381,7 @@ clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx)
3604df
  out:
3604df
         if (ret) {
3604df
                 clnt_fd_lk_reacquire_failed (this, fdctx, conf);
3604df
-                fdctx->reopen_done (fdctx, this);
3604df
+                fdctx->reopen_done (fdctx, fdctx->remote_fd, this);
3604df
         }
3604df
         return 0;
3604df
 }
3604df
@@ -500,7 +500,7 @@ client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov,
3604df
                 }
3604df
                 pthread_mutex_unlock (&conf->lock);
3604df
 
3604df
-                fdctx->reopen_done (fdctx, this);
3604df
+                fdctx->reopen_done (fdctx, fdctx->remote_fd, this);
3604df
         }
3604df
 
3604df
         ret = 0;
3604df
@@ -610,7 +610,7 @@ client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx)
3604df
         if (client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) {
3604df
                 gf_msg_debug (this->name, 0,
3604df
                               "fd lock list is empty");
3604df
-                fdctx->reopen_done (fdctx, this);
3604df
+                fdctx->reopen_done (fdctx, fdctx->remote_fd, this);
3604df
         } else {
3604df
                 lk_ctx = fdctx->lk_ctx;
3604df
 
3604df
@@ -626,14 +626,14 @@ out:
3604df
 }
3604df
 
3604df
 void
3604df
-client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
3604df
+client_default_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this)
3604df
 {
3604df
         gf_log_callingfn (this->name, GF_LOG_WARNING,
3604df
                           "This function should never be called");
3604df
 }
3604df
 
3604df
 void
3604df
-client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
3604df
+client_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this)
3604df
 {
3604df
         clnt_conf_t  *conf    = NULL;
3604df
         gf_boolean_t destroy  = _gf_false;
3604df
@@ -642,21 +642,23 @@ client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
3604df
 
3604df
         pthread_mutex_lock (&conf->lock);
3604df
         {
3604df
+                fdctx->remote_fd = rfd;
3604df
                 fdctx->reopen_attempts = 0;
3604df
+                fdctx->reopen_done = client_default_reopen_done;
3604df
                 if (!fdctx->released)
3604df
                         list_add_tail (&fdctx->sfd_pos, &conf->saved_fds);
3604df
                 else
3604df
                         destroy = _gf_true;
3604df
-                fdctx->reopen_done = client_default_reopen_done;
3604df
         }
3604df
         pthread_mutex_unlock (&conf->lock);
3604df
 
3604df
         if (destroy)
3604df
                 client_fdctx_destroy (this, fdctx);
3604df
+
3604df
 }
3604df
 
3604df
 void
3604df
-client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
3604df
+client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this)
3604df
 {
3604df
         clnt_conf_t  *conf    = NULL;
3604df
         uint64_t     fd_count = 0;
3604df
@@ -669,7 +671,7 @@ client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this)
3604df
         }
3604df
         UNLOCK (&conf->rec_lock);
3604df
 
3604df
-        client_reopen_done (fdctx, this);
3604df
+        client_reopen_done (fdctx, rfd, this);
3604df
         if (fd_count == 0) {
3604df
                 gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_CHILD_UP_NOTIFY,
3604df
                         "last fd open'd/lock-self-heal'd - notifying CHILD-UP");
3604df
@@ -732,7 +734,6 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
3604df
 
3604df
         pthread_mutex_lock (&conf->lock);
3604df
         {
3604df
-                fdctx->remote_fd = rsp.fd;
3604df
                 if (!fdctx->released) {
3604df
                         if (conf->lk_heal &&
3604df
                             !client_fd_lk_list_empty (fdctx->lk_ctx,
3604df
@@ -762,7 +763,7 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count,
3604df
 
3604df
 out:
3604df
         if (!attempt_lock_recovery)
3604df
-                fdctx->reopen_done (fdctx, this);
3604df
+                fdctx->reopen_done (fdctx, (rsp.op_ret) ? -1 : rsp.fd, this);
3604df
 
3604df
         frame->local = NULL;
3604df
         STACK_DESTROY (frame->root);
3604df
@@ -779,14 +780,12 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
3604df
         int32_t        ret   = -1;
3604df
         gfs3_open_rsp  rsp   = {0,};
3604df
         clnt_local_t  *local = NULL;
3604df
-        clnt_conf_t   *conf  = NULL;
3604df
         clnt_fd_ctx_t *fdctx = NULL;
3604df
         call_frame_t  *frame = NULL;
3604df
 
3604df
         frame = myframe;
3604df
         local = frame->local;
3604df
         fdctx = local->fdctx;
3604df
-        conf  = frame->this->private;
3604df
 
3604df
 
3604df
         if (-1 == req->rpc_status) {
3604df
@@ -822,14 +821,8 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count,
3604df
                 goto out;
3604df
         }
3604df
 
3604df
-        pthread_mutex_lock (&conf->lock);
3604df
-        {
3604df
-                fdctx->remote_fd = rsp.fd;
3604df
-        }
3604df
-        pthread_mutex_unlock (&conf->lock);
3604df
-
3604df
 out:
3604df
-        fdctx->reopen_done (fdctx, frame->this);
3604df
+        fdctx->reopen_done (fdctx, (rsp.op_ret) ? -1 : rsp.fd, frame->this);
3604df
 
3604df
         frame->local = NULL;
3604df
         STACK_DESTROY (frame->root);
3604df
@@ -890,7 +883,7 @@ out:
3604df
         if (local)
3604df
                 client_local_wipe (local);
3604df
 
3604df
-        fdctx->reopen_done (fdctx, this);
3604df
+        fdctx->reopen_done (fdctx, fdctx->remote_fd, this);
3604df
 
3604df
         return 0;
3604df
 
3604df
@@ -954,7 +947,7 @@ out:
3604df
         if (local)
3604df
                 client_local_wipe (local);
3604df
 
3604df
-        fdctx->reopen_done (fdctx, this);
3604df
+        fdctx->reopen_done (fdctx, fdctx->remote_fd, this);
3604df
 
3604df
         return 0;
3604df
 
3604df
diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c
3604df
index 11a4e85..2eda19c 100644
3604df
--- a/xlators/protocol/client/src/client-rpc-fops.c
3604df
+++ b/xlators/protocol/client/src/client-rpc-fops.c
3604df
@@ -3315,6 +3315,7 @@ client3_3_releasedir (call_frame_t *frame, xlator_t *this,
3604df
         clnt_fd_ctx_t       *fdctx       = NULL;
3604df
         clnt_args_t         *args        = NULL;
3604df
         int64_t              remote_fd   = -1;
3604df
+        gf_boolean_t         destroy     = _gf_false;
3604df
 
3604df
         if (!this || !data)
3604df
                 goto out;
3604df
@@ -3333,16 +3334,19 @@ client3_3_releasedir (call_frame_t *frame, xlator_t *this,
3604df
                            reopen_cbk handle releasing
3604df
                         */
3604df
 
3604df
-                        if (remote_fd != -1)
3604df
+                        if (remote_fd == -1) {
3604df
+                                fdctx->released = 1;
3604df
+                        } else {
3604df
                                 list_del_init (&fdctx->sfd_pos);
3604df
-
3604df
-                        fdctx->released = 1;
3604df
+                                destroy = _gf_true;
3604df
+                        }
3604df
                 }
3604df
         }
3604df
         pthread_mutex_unlock (&conf->lock);
3604df
 
3604df
-        if (remote_fd != -1)
3604df
+        if (destroy)
3604df
                 client_fdctx_destroy (this, fdctx);
3604df
+
3604df
 out:
3604df
 
3604df
         return 0;
3604df
@@ -3357,6 +3361,7 @@ client3_3_release (call_frame_t *frame, xlator_t *this,
3604df
         clnt_fd_ctx_t    *fdctx         = NULL;
3604df
         clnt_args_t      *args          = NULL;
3604df
         lk_heal_state_t   lk_heal_state = GF_LK_HEAL_DONE;
3604df
+        gf_boolean_t      destroy       = _gf_false;
3604df
 
3604df
         if (!this || !data)
3604df
                 goto out;
3604df
@@ -3375,17 +3380,17 @@ client3_3_release (call_frame_t *frame, xlator_t *this,
3604df
                            in progress. Just mark ->released = 1 and let
3604df
                            reopen_cbk handle releasing
3604df
                         */
3604df
-
3604df
-                        if (remote_fd != -1 &&
3604df
-                            lk_heal_state == GF_LK_HEAL_DONE)
3604df
+                        if (remote_fd == -1) {
3604df
+                                fdctx->released = 1;
3604df
+                        } else if (lk_heal_state == GF_LK_HEAL_DONE) {
3604df
                                 list_del_init (&fdctx->sfd_pos);
3604df
-
3604df
-                        fdctx->released = 1;
3604df
+                                destroy = _gf_true;
3604df
+                        }
3604df
                 }
3604df
         }
3604df
         pthread_mutex_unlock (&conf->lock);
3604df
 
3604df
-        if (remote_fd != -1 && lk_heal_state == GF_LK_HEAL_DONE)
3604df
+        if (destroy)
3604df
                 client_fdctx_destroy (this, fdctx);
3604df
 out:
3604df
         return 0;
3604df
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
3604df
index 8854b82..2c3a960 100644
3604df
--- a/xlators/protocol/client/src/client.h
3604df
+++ b/xlators/protocol/client/src/client.h
3604df
@@ -218,7 +218,7 @@ typedef struct _client_fd_ctx {
3604df
         pthread_mutex_t   mutex;
3604df
         lk_heal_state_t   lk_heal_state;
3604df
         uuid_t            gfid;
3604df
-        void (*reopen_done) (struct _client_fd_ctx*, xlator_t *);
3604df
+        void (*reopen_done)(struct _client_fd_ctx*, int64_t rfd, xlator_t *);
3604df
         struct list_head  lock_list;     /* List of all granted locks on this fd */
3604df
         int32_t           reopen_attempts;
3604df
 } clnt_fd_ctx_t;
3604df
@@ -347,7 +347,8 @@ int client_mark_fd_bad (xlator_t *this);
3604df
 int client_set_lk_version (xlator_t *this);
3604df
 
3604df
 int client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx, gf_boolean_t use_try_lock);
3604df
-void client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this);
3604df
+void client_default_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd,
3604df
+                                 xlator_t *this);
3604df
 void client_attempt_reopen (fd_t *fd, xlator_t *this);
3604df
 int client_get_remote_fd (xlator_t *this, fd_t *fd, int flags,
3604df
                           int64_t *remote_fd);
3604df
-- 
3604df
2.9.3
3604df