From 9ac60df1fdce697e38571d20e23fcd5472790350 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 2 Feb 2017 16:41:45 +0530 Subject: [PATCH 288/294] protocol/client: Fix double free of client fdctx destroy Backport of https://review.gluster.org/#/c/16521/ This patch fixes the race between fd re-open code and fd release code, both of which free the fd context due to a race in certain variable checks as explained below: 1. client process (shd in the case of this BZ) sends an opendir to its children (client xlators) which send the fop to the bricks to get a valid fd. 2. Client xlator loses connection to the brick. fdctx->remotefd is -1 3. Client re-establishes connection. After handshake, it reopens the dir and sets fdctx->remotefd to a valid fd in client3_3_reopendir_cbk(). 4. Meanwhile, shd sends a fd unref after it is done with the opendir. This triggers a releasedir (since fd->refcount becomes 0). 5. client3_3_releasedir() sees that fdctx-->remotefd is a valid number (i.e not -1), sets fdctx->released=1 and calls client_fdctx_destroy() 6. As a continuation of step3, client_reopen_done() is called by client3_3_reopendir_cbk(), which sees that fdctx->released==1 and again calls client_fdctx_destroy(). Depending on when step-5 does GF_FREE(fdctx), we may crash at any place in step-6 in client3_3_reopendir_cbk() when it tries to access fdctx->{whatever}. Change-Id: Ifb668b248e6333a6f1fc09c1a116521f97102353 BUG: 1415101 Signed-off-by: Ravishankar N Reviewed-on: https://code.engineering.redhat.com/gerrit/97481 Reviewed-by: Atin Mukherjee --- xlators/protocol/client/src/client-handshake.c | 37 +++++++++++--------------- xlators/protocol/client/src/client-rpc-fops.c | 25 ++++++++++------- xlators/protocol/client/src/client.h | 5 ++-- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 74e7548..7866697 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -348,7 +348,7 @@ clnt_release_reopen_fd_cbk (struct rpc_req *req, struct iovec *iov, clnt_fd_lk_reacquire_failed (this, fdctx, conf); - fdctx->reopen_done (fdctx, this); + fdctx->reopen_done (fdctx, fdctx->remote_fd, this); frame->local = NULL; STACK_DESTROY (frame->root); @@ -381,7 +381,7 @@ clnt_release_reopen_fd (xlator_t *this, clnt_fd_ctx_t *fdctx) out: if (ret) { clnt_fd_lk_reacquire_failed (this, fdctx, conf); - fdctx->reopen_done (fdctx, this); + fdctx->reopen_done (fdctx, fdctx->remote_fd, this); } return 0; } @@ -500,7 +500,7 @@ client_reacquire_lock_cbk (struct rpc_req *req, struct iovec *iov, } pthread_mutex_unlock (&conf->lock); - fdctx->reopen_done (fdctx, this); + fdctx->reopen_done (fdctx, fdctx->remote_fd, this); } ret = 0; @@ -610,7 +610,7 @@ client_reacquire_lock (xlator_t *this, clnt_fd_ctx_t *fdctx) if (client_fd_lk_list_empty (fdctx->lk_ctx, _gf_false)) { gf_msg_debug (this->name, 0, "fd lock list is empty"); - fdctx->reopen_done (fdctx, this); + fdctx->reopen_done (fdctx, fdctx->remote_fd, this); } else { lk_ctx = fdctx->lk_ctx; @@ -626,14 +626,14 @@ out: } void -client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +client_default_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this) { gf_log_callingfn (this->name, GF_LOG_WARNING, "This function should never be called"); } void -client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +client_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this) { clnt_conf_t *conf = NULL; gf_boolean_t destroy = _gf_false; @@ -642,21 +642,23 @@ client_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) pthread_mutex_lock (&conf->lock); { + fdctx->remote_fd = rfd; fdctx->reopen_attempts = 0; + fdctx->reopen_done = client_default_reopen_done; if (!fdctx->released) list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); else destroy = _gf_true; - fdctx->reopen_done = client_default_reopen_done; } pthread_mutex_unlock (&conf->lock); if (destroy) client_fdctx_destroy (this, fdctx); + } void -client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) +client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, xlator_t *this) { clnt_conf_t *conf = NULL; uint64_t fd_count = 0; @@ -669,7 +671,7 @@ client_child_up_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this) } UNLOCK (&conf->rec_lock); - client_reopen_done (fdctx, this); + client_reopen_done (fdctx, rfd, this); if (fd_count == 0) { gf_msg (this->name, GF_LOG_INFO, 0, PC_MSG_CHILD_UP_NOTIFY, "last fd open'd/lock-self-heal'd - notifying CHILD-UP"); @@ -732,7 +734,6 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, pthread_mutex_lock (&conf->lock); { - fdctx->remote_fd = rsp.fd; if (!fdctx->released) { if (conf->lk_heal && !client_fd_lk_list_empty (fdctx->lk_ctx, @@ -762,7 +763,7 @@ client3_3_reopen_cbk (struct rpc_req *req, struct iovec *iov, int count, out: if (!attempt_lock_recovery) - fdctx->reopen_done (fdctx, this); + fdctx->reopen_done (fdctx, (rsp.op_ret) ? -1 : rsp.fd, this); frame->local = NULL; STACK_DESTROY (frame->root); @@ -779,14 +780,12 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count, int32_t ret = -1; gfs3_open_rsp rsp = {0,}; clnt_local_t *local = NULL; - clnt_conf_t *conf = NULL; clnt_fd_ctx_t *fdctx = NULL; call_frame_t *frame = NULL; frame = myframe; local = frame->local; fdctx = local->fdctx; - conf = frame->this->private; if (-1 == req->rpc_status) { @@ -822,14 +821,8 @@ client3_3_reopendir_cbk (struct rpc_req *req, struct iovec *iov, int count, goto out; } - pthread_mutex_lock (&conf->lock); - { - fdctx->remote_fd = rsp.fd; - } - pthread_mutex_unlock (&conf->lock); - out: - fdctx->reopen_done (fdctx, frame->this); + fdctx->reopen_done (fdctx, (rsp.op_ret) ? -1 : rsp.fd, frame->this); frame->local = NULL; STACK_DESTROY (frame->root); @@ -890,7 +883,7 @@ out: if (local) client_local_wipe (local); - fdctx->reopen_done (fdctx, this); + fdctx->reopen_done (fdctx, fdctx->remote_fd, this); return 0; @@ -954,7 +947,7 @@ out: if (local) client_local_wipe (local); - fdctx->reopen_done (fdctx, this); + fdctx->reopen_done (fdctx, fdctx->remote_fd, this); return 0; diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index 11a4e85..2eda19c 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -3315,6 +3315,7 @@ client3_3_releasedir (call_frame_t *frame, xlator_t *this, clnt_fd_ctx_t *fdctx = NULL; clnt_args_t *args = NULL; int64_t remote_fd = -1; + gf_boolean_t destroy = _gf_false; if (!this || !data) goto out; @@ -3333,16 +3334,19 @@ client3_3_releasedir (call_frame_t *frame, xlator_t *this, reopen_cbk handle releasing */ - if (remote_fd != -1) + if (remote_fd == -1) { + fdctx->released = 1; + } else { list_del_init (&fdctx->sfd_pos); - - fdctx->released = 1; + destroy = _gf_true; + } } } pthread_mutex_unlock (&conf->lock); - if (remote_fd != -1) + if (destroy) client_fdctx_destroy (this, fdctx); + out: return 0; @@ -3357,6 +3361,7 @@ client3_3_release (call_frame_t *frame, xlator_t *this, clnt_fd_ctx_t *fdctx = NULL; clnt_args_t *args = NULL; lk_heal_state_t lk_heal_state = GF_LK_HEAL_DONE; + gf_boolean_t destroy = _gf_false; if (!this || !data) goto out; @@ -3375,17 +3380,17 @@ client3_3_release (call_frame_t *frame, xlator_t *this, in progress. Just mark ->released = 1 and let reopen_cbk handle releasing */ - - if (remote_fd != -1 && - lk_heal_state == GF_LK_HEAL_DONE) + if (remote_fd == -1) { + fdctx->released = 1; + } else if (lk_heal_state == GF_LK_HEAL_DONE) { list_del_init (&fdctx->sfd_pos); - - fdctx->released = 1; + destroy = _gf_true; + } } } pthread_mutex_unlock (&conf->lock); - if (remote_fd != -1 && lk_heal_state == GF_LK_HEAL_DONE) + if (destroy) client_fdctx_destroy (this, fdctx); out: return 0; diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 8854b82..2c3a960 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -218,7 +218,7 @@ typedef struct _client_fd_ctx { pthread_mutex_t mutex; lk_heal_state_t lk_heal_state; uuid_t gfid; - void (*reopen_done) (struct _client_fd_ctx*, xlator_t *); + void (*reopen_done)(struct _client_fd_ctx*, int64_t rfd, xlator_t *); struct list_head lock_list; /* List of all granted locks on this fd */ int32_t reopen_attempts; } clnt_fd_ctx_t; @@ -347,7 +347,8 @@ int client_mark_fd_bad (xlator_t *this); int client_set_lk_version (xlator_t *this); int client_fd_lk_list_empty (fd_lk_ctx_t *lk_ctx, gf_boolean_t use_try_lock); -void client_default_reopen_done (clnt_fd_ctx_t *fdctx, xlator_t *this); +void client_default_reopen_done (clnt_fd_ctx_t *fdctx, int64_t rfd, + xlator_t *this); void client_attempt_reopen (fd_t *fd, xlator_t *this); int client_get_remote_fd (xlator_t *this, fd_t *fd, int flags, int64_t *remote_fd); -- 2.9.3