From a7ade5267ebaf4bf318ee2aebe48000cee583e3b Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Fri, 28 Dec 2018 18:53:15 +0530 Subject: [PATCH 505/506] features/shard: Fix launch of multiple synctasks for background deletion > Upstream: https://review.gluster.org/21957 > BUG: 1662368 > Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e PROBLEM: When multiple sharded files are deleted in quick succession, multiple issues were observed: 1. misleading logs corresponding to a sharded file where while one log message said the shards corresponding to the file were deleted successfully, this was followed by multiple logs suggesting the very same operation failed. This was because of multiple synctasks attempting to clean up shards of the same file and only one of them succeeding (the one that gets ENTRYLK successfully), and the rest of them logging failure. 2. multiple synctasks to do background deletion would be launched, one for each deleted file but all of them could readdir entries from .remove_me at the same time could potentially contend for ENTRYLK on .shard for each of the entry names. This is undesirable and wasteful. FIX: Background deletion will now follow a state machine. In the event that there are multiple attempts to launch synctask for background deletion, one for each file deleted, only the first task is launched. And if while this task is doing the cleanup, more attempts are made to delete other files, the state of the synctask is adjusted so that it restarts the crawl even after reaching end-of-directory to pick up any files it may have missed in the previous iteration. This patch also fixes uninitialized lk-owner during syncop_entrylk() which was leading to multiple background deletion synctasks entering the critical section at the same time and leading to illegal memory access of base inode in the second syntcask after it was destroyed post shard deletion by the first synctask. Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e BUG: 1662059 Signed-off-by: Krutika Dhananjay Reviewed-on: https://code.engineering.redhat.com/gerrit/160437 Tested-by: RHGS Build Bot Reviewed-by: Xavi Hernandez --- xlators/features/shard/src/shard.c | 199 +++++++++++++++++++---------- xlators/features/shard/src/shard.h | 12 +- 2 files changed, 136 insertions(+), 75 deletions(-) diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 5b72399f5..8aed1a386 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -1465,16 +1465,45 @@ int shard_start_background_deletion (xlator_t *this) { int ret = 0; + gf_boolean_t i_cleanup = _gf_true; + shard_priv_t *priv = NULL; call_frame_t *cleanup_frame = NULL; + priv = this->private; + + LOCK(&priv->lock); + { + switch (priv->bg_del_state) { + case SHARD_BG_DELETION_NONE: + i_cleanup = _gf_true; + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + break; + case SHARD_BG_DELETION_LAUNCHING: + i_cleanup = _gf_false; + break; + case SHARD_BG_DELETION_IN_PROGRESS: + priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING; + i_cleanup = _gf_false; + break; + default: + break; + } + } + UNLOCK(&priv->lock); + if (!i_cleanup) + return 0; + cleanup_frame = create_frame (this, this->ctx->pool); if (!cleanup_frame) { gf_msg (this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED, "Failed to create " "new frame to delete shards"); - return -ENOMEM; + ret = -ENOMEM; + goto err; } + set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root); + ret = synctask_new (this->ctx->env, shard_delete_shards, shard_delete_shards_cbk, cleanup_frame, cleanup_frame); @@ -1484,7 +1513,16 @@ shard_start_background_deletion (xlator_t *this) "failed to create task to do background " "cleanup of shards"); STACK_DESTROY (cleanup_frame->root); + goto err; } + return 0; + +err: + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); return ret; } @@ -1493,7 +1531,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int ret = 0; + int ret = -1; shard_priv_t *priv = NULL; gf_boolean_t i_start_cleanup = _gf_false; @@ -1526,22 +1564,23 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&priv->lock); { - if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) { - priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS; + if (priv->first_lookup_done == _gf_false) { + priv->first_lookup_done = _gf_true; i_start_cleanup = _gf_true; } } UNLOCK (&priv->lock); - if (i_start_cleanup) { - ret = shard_start_background_deletion (this); - if (ret) { - LOCK (&priv->lock); - { - priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; - } - UNLOCK (&priv->lock); + if (!i_start_cleanup) + goto unwind; + + ret = shard_start_background_deletion(this); + if (ret < 0) { + LOCK(&priv->lock); + { + priv->first_lookup_done = _gf_false; } + UNLOCK(&priv->lock); } unwind: SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, @@ -2940,9 +2979,10 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num) if (ctx->fsync_needed) { unref_base_inode++; list_del_init (&ctx->to_fsync_list); - if (base_inode) + if (base_inode) { __shard_inode_ctx_get (base_inode, this, &base_ictx); - base_ictx->fsync_count--; + base_ictx->fsync_count--; + } } } UNLOCK(&inode->lock); @@ -3334,10 +3374,15 @@ shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this, loc.inode = inode_ref (priv->dot_shard_rm_inode); ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc, - entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, - NULL); - if (ret) + entry->d_name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, + NULL, NULL); + if (ret < 0) { + if (ret == -EAGAIN) { + ret = 0; + } goto out; + } + { ret = __shard_delete_shards_of_entry (cleanup_frame, this, entry, inode); @@ -3352,20 +3397,6 @@ out: int shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data) { - xlator_t *this = NULL; - shard_priv_t *priv = NULL; - - this = frame->this; - priv = this->private; - - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Background deletion of shards failed"); - priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING; - } else { - priv->first_lookup = SHARD_FIRST_LOOKUP_DONE; - } SHARD_STACK_DESTROY (frame); return 0; } @@ -3482,6 +3513,7 @@ shard_delete_shards (void *opaque) gf_dirent_t entries; gf_dirent_t *entry = NULL; call_frame_t *cleanup_frame = NULL; + gf_boolean_t done = _gf_false; this = THIS; priv = this->private; @@ -3534,52 +3566,81 @@ shard_delete_shards (void *opaque) goto err; } - while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072, - offset, &entries, local->xattr_req, - NULL))) { - if (ret > 0) - ret = 0; - list_for_each_entry (entry, &entries.list, list) { - offset = entry->d_off; - - if (!strcmp (entry->d_name, ".") || - !strcmp (entry->d_name, "..")) - continue; + for (;;) { + offset = 0; + LOCK(&priv->lock); + { + if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) { + priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS; + } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + done = _gf_true; + } + } + UNLOCK(&priv->lock); + if (done) + break; + while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, + 131072, offset, &entries, + local->xattr_req, NULL))) { + if (ret > 0) + ret = 0; + list_for_each_entry (entry, &entries.list, list) { + offset = entry->d_off; + + if (!strcmp (entry->d_name, ".") || + !strcmp (entry->d_name, "..")) + continue; - if (!entry->inode) { - ret = shard_lookup_marker_entry (this, local, - entry); - if (ret < 0) + if (!entry->inode) { + ret = shard_lookup_marker_entry (this, + local, + entry); + if (ret < 0) + continue; + } + link_inode = inode_link (entry->inode, + local->fd->inode, + entry->d_name, + &entry->d_stat); + + gf_msg_debug (this->name, 0, "Initiating " + "deletion of shards of gfid %s", + entry->d_name); + ret = shard_delete_shards_of_entry (cleanup_frame, + this, + entry, + link_inode); + inode_unlink (link_inode, local->fd->inode, + entry->d_name); + inode_unref (link_inode); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, + SHARD_MSG_SHARDS_DELETION_FAILED, + "Failed to clean up shards of " + "gfid %s", entry->d_name); continue; - } - link_inode = inode_link (entry->inode, local->fd->inode, - entry->d_name, &entry->d_stat); - - gf_msg_debug (this->name, 0, "Initiating deletion of " - "shards of gfid %s", entry->d_name); - ret = shard_delete_shards_of_entry (cleanup_frame, this, - entry, link_inode); - inode_unlink (link_inode, local->fd->inode, - entry->d_name); - inode_unref (link_inode); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, -ret, - SHARD_MSG_SHARDS_DELETION_FAILED, - "Failed to clean up shards of gfid %s", + } + gf_msg (this->name, GF_LOG_INFO, 0, + SHARD_MSG_SHARDS_DELETION_COMPLETED, + "Deleted shards of gfid=%s from backend", entry->d_name); - continue; } - gf_msg (this->name, GF_LOG_INFO, 0, - SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted " - "shards of gfid=%s from backend", - entry->d_name); + gf_dirent_free (&entries); + if (ret) + break; } - gf_dirent_free (&entries); - if (ret) - break; } ret = 0; + loc_wipe(&loc); + return ret; + err: + LOCK(&priv->lock); + { + priv->bg_del_state = SHARD_BG_DELETION_NONE; + } + UNLOCK(&priv->lock); loc_wipe (&loc); return ret; } diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index ac3813c8c..37934f3a2 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -196,11 +196,10 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this); } while (0) typedef enum { - SHARD_FIRST_LOOKUP_PENDING = 0, - SHARD_FIRST_LOOKUP_IN_PROGRESS, - SHARD_FIRST_LOOKUP_DONE, -} shard_first_lookup_state_t; - + SHARD_BG_DELETION_NONE = 0, + SHARD_BG_DELETION_LAUNCHING, + SHARD_BG_DELETION_IN_PROGRESS, +} shard_bg_deletion_state_t; /* rm = "remove me" */ typedef struct shard_priv { @@ -213,7 +212,8 @@ typedef struct shard_priv { int inode_count; struct list_head ilist_head; uint32_t deletion_rate; - shard_first_lookup_state_t first_lookup; + shard_bg_deletion_state_t bg_del_state; + gf_boolean_t first_lookup_done; uint64_t lru_limit; } shard_priv_t; -- 2.20.1