|
|
887953 |
From a7ade5267ebaf4bf318ee2aebe48000cee583e3b Mon Sep 17 00:00:00 2001
|
|
|
887953 |
From: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
887953 |
Date: Fri, 28 Dec 2018 18:53:15 +0530
|
|
|
887953 |
Subject: [PATCH 505/506] features/shard: Fix launch of multiple synctasks for
|
|
|
887953 |
background deletion
|
|
|
887953 |
|
|
|
887953 |
> Upstream: https://review.gluster.org/21957
|
|
|
887953 |
> BUG: 1662368
|
|
|
887953 |
> Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e
|
|
|
887953 |
|
|
|
887953 |
PROBLEM:
|
|
|
887953 |
|
|
|
887953 |
When multiple sharded files are deleted in quick succession, multiple
|
|
|
887953 |
issues were observed:
|
|
|
887953 |
1. misleading logs corresponding to a sharded file where while one log
|
|
|
887953 |
message said the shards corresponding to the file were deleted
|
|
|
887953 |
successfully, this was followed by multiple logs suggesting the very
|
|
|
887953 |
same operation failed. This was because of multiple synctasks
|
|
|
887953 |
attempting to clean up shards of the same file and only one of them
|
|
|
887953 |
succeeding (the one that gets ENTRYLK successfully), and the rest of
|
|
|
887953 |
them logging failure.
|
|
|
887953 |
|
|
|
887953 |
2. multiple synctasks to do background deletion would be launched, one
|
|
|
887953 |
for each deleted file but all of them could readdir entries from
|
|
|
887953 |
.remove_me at the same time could potentially contend for ENTRYLK on
|
|
|
887953 |
.shard for each of the entry names. This is undesirable and wasteful.
|
|
|
887953 |
|
|
|
887953 |
FIX:
|
|
|
887953 |
Background deletion will now follow a state machine. In the event that
|
|
|
887953 |
there are multiple attempts to launch synctask for background deletion,
|
|
|
887953 |
one for each file deleted, only the first task is launched. And if while
|
|
|
887953 |
this task is doing the cleanup, more attempts are made to delete other
|
|
|
887953 |
files, the state of the synctask is adjusted so that it restarts the
|
|
|
887953 |
crawl even after reaching end-of-directory to pick up any files it may
|
|
|
887953 |
have missed in the previous iteration.
|
|
|
887953 |
|
|
|
887953 |
This patch also fixes uninitialized lk-owner during syncop_entrylk()
|
|
|
887953 |
which was leading to multiple background deletion synctasks entering
|
|
|
887953 |
the critical section at the same time and leading to illegal memory access
|
|
|
887953 |
of base inode in the second syntcask after it was destroyed post shard deletion
|
|
|
887953 |
by the first synctask.
|
|
|
887953 |
|
|
|
887953 |
Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e
|
|
|
887953 |
BUG: 1662059
|
|
|
887953 |
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
|
|
|
887953 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/160437
|
|
|
887953 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
887953 |
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
887953 |
---
|
|
|
887953 |
xlators/features/shard/src/shard.c | 199 +++++++++++++++++++----------
|
|
|
887953 |
xlators/features/shard/src/shard.h | 12 +-
|
|
|
887953 |
2 files changed, 136 insertions(+), 75 deletions(-)
|
|
|
887953 |
|
|
|
887953 |
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
|
|
|
887953 |
index 5b72399f5..8aed1a386 100644
|
|
|
887953 |
--- a/xlators/features/shard/src/shard.c
|
|
|
887953 |
+++ b/xlators/features/shard/src/shard.c
|
|
|
887953 |
@@ -1465,16 +1465,45 @@ int
|
|
|
887953 |
shard_start_background_deletion (xlator_t *this)
|
|
|
887953 |
{
|
|
|
887953 |
int ret = 0;
|
|
|
887953 |
+ gf_boolean_t i_cleanup = _gf_true;
|
|
|
887953 |
+ shard_priv_t *priv = NULL;
|
|
|
887953 |
call_frame_t *cleanup_frame = NULL;
|
|
|
887953 |
|
|
|
887953 |
+ priv = this->private;
|
|
|
887953 |
+
|
|
|
887953 |
+ LOCK(&priv->lock);
|
|
|
887953 |
+ {
|
|
|
887953 |
+ switch (priv->bg_del_state) {
|
|
|
887953 |
+ case SHARD_BG_DELETION_NONE:
|
|
|
887953 |
+ i_cleanup = _gf_true;
|
|
|
887953 |
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
|
|
|
887953 |
+ break;
|
|
|
887953 |
+ case SHARD_BG_DELETION_LAUNCHING:
|
|
|
887953 |
+ i_cleanup = _gf_false;
|
|
|
887953 |
+ break;
|
|
|
887953 |
+ case SHARD_BG_DELETION_IN_PROGRESS:
|
|
|
887953 |
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
|
|
|
887953 |
+ i_cleanup = _gf_false;
|
|
|
887953 |
+ break;
|
|
|
887953 |
+ default:
|
|
|
887953 |
+ break;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
+ UNLOCK(&priv->lock);
|
|
|
887953 |
+ if (!i_cleanup)
|
|
|
887953 |
+ return 0;
|
|
|
887953 |
+
|
|
|
887953 |
cleanup_frame = create_frame (this, this->ctx->pool);
|
|
|
887953 |
if (!cleanup_frame) {
|
|
|
887953 |
gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
|
|
|
887953 |
SHARD_MSG_MEMALLOC_FAILED, "Failed to create "
|
|
|
887953 |
"new frame to delete shards");
|
|
|
887953 |
- return -ENOMEM;
|
|
|
887953 |
+ ret = -ENOMEM;
|
|
|
887953 |
+ goto err;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
+ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
|
|
|
887953 |
+
|
|
|
887953 |
ret = synctask_new (this->ctx->env, shard_delete_shards,
|
|
|
887953 |
shard_delete_shards_cbk, cleanup_frame,
|
|
|
887953 |
cleanup_frame);
|
|
|
887953 |
@@ -1484,7 +1513,16 @@ shard_start_background_deletion (xlator_t *this)
|
|
|
887953 |
"failed to create task to do background "
|
|
|
887953 |
"cleanup of shards");
|
|
|
887953 |
STACK_DESTROY (cleanup_frame->root);
|
|
|
887953 |
+ goto err;
|
|
|
887953 |
}
|
|
|
887953 |
+ return 0;
|
|
|
887953 |
+
|
|
|
887953 |
+err:
|
|
|
887953 |
+ LOCK(&priv->lock);
|
|
|
887953 |
+ {
|
|
|
887953 |
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ UNLOCK(&priv->lock);
|
|
|
887953 |
return ret;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
@@ -1493,7 +1531,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
887953 |
int32_t op_ret, int32_t op_errno, inode_t *inode,
|
|
|
887953 |
struct iatt *buf, dict_t *xdata, struct iatt *postparent)
|
|
|
887953 |
{
|
|
|
887953 |
- int ret = 0;
|
|
|
887953 |
+ int ret = -1;
|
|
|
887953 |
shard_priv_t *priv = NULL;
|
|
|
887953 |
gf_boolean_t i_start_cleanup = _gf_false;
|
|
|
887953 |
|
|
|
887953 |
@@ -1526,22 +1564,23 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
887953 |
|
|
|
887953 |
LOCK (&priv->lock);
|
|
|
887953 |
{
|
|
|
887953 |
- if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) {
|
|
|
887953 |
- priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS;
|
|
|
887953 |
+ if (priv->first_lookup_done == _gf_false) {
|
|
|
887953 |
+ priv->first_lookup_done = _gf_true;
|
|
|
887953 |
i_start_cleanup = _gf_true;
|
|
|
887953 |
}
|
|
|
887953 |
}
|
|
|
887953 |
UNLOCK (&priv->lock);
|
|
|
887953 |
|
|
|
887953 |
- if (i_start_cleanup) {
|
|
|
887953 |
- ret = shard_start_background_deletion (this);
|
|
|
887953 |
- if (ret) {
|
|
|
887953 |
- LOCK (&priv->lock);
|
|
|
887953 |
- {
|
|
|
887953 |
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
|
|
|
887953 |
- }
|
|
|
887953 |
- UNLOCK (&priv->lock);
|
|
|
887953 |
+ if (!i_start_cleanup)
|
|
|
887953 |
+ goto unwind;
|
|
|
887953 |
+
|
|
|
887953 |
+ ret = shard_start_background_deletion(this);
|
|
|
887953 |
+ if (ret < 0) {
|
|
|
887953 |
+ LOCK(&priv->lock);
|
|
|
887953 |
+ {
|
|
|
887953 |
+ priv->first_lookup_done = _gf_false;
|
|
|
887953 |
}
|
|
|
887953 |
+ UNLOCK(&priv->lock);
|
|
|
887953 |
}
|
|
|
887953 |
unwind:
|
|
|
887953 |
SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
|
|
|
887953 |
@@ -2940,9 +2979,10 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
|
|
|
887953 |
if (ctx->fsync_needed) {
|
|
|
887953 |
unref_base_inode++;
|
|
|
887953 |
list_del_init (&ctx->to_fsync_list);
|
|
|
887953 |
- if (base_inode)
|
|
|
887953 |
+ if (base_inode) {
|
|
|
887953 |
__shard_inode_ctx_get (base_inode, this, &base_ictx);
|
|
|
887953 |
- base_ictx->fsync_count--;
|
|
|
887953 |
+ base_ictx->fsync_count--;
|
|
|
887953 |
+ }
|
|
|
887953 |
}
|
|
|
887953 |
}
|
|
|
887953 |
UNLOCK(&inode->lock);
|
|
|
887953 |
@@ -3334,10 +3374,15 @@ shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this,
|
|
|
887953 |
loc.inode = inode_ref (priv->dot_shard_rm_inode);
|
|
|
887953 |
|
|
|
887953 |
ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc,
|
|
|
887953 |
- entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL,
|
|
|
887953 |
- NULL);
|
|
|
887953 |
- if (ret)
|
|
|
887953 |
+ entry->d_name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
|
|
|
887953 |
+ NULL, NULL);
|
|
|
887953 |
+ if (ret < 0) {
|
|
|
887953 |
+ if (ret == -EAGAIN) {
|
|
|
887953 |
+ ret = 0;
|
|
|
887953 |
+ }
|
|
|
887953 |
goto out;
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
{
|
|
|
887953 |
ret = __shard_delete_shards_of_entry (cleanup_frame, this,
|
|
|
887953 |
entry, inode);
|
|
|
887953 |
@@ -3352,20 +3397,6 @@ out:
|
|
|
887953 |
int
|
|
|
887953 |
shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data)
|
|
|
887953 |
{
|
|
|
887953 |
- xlator_t *this = NULL;
|
|
|
887953 |
- shard_priv_t *priv = NULL;
|
|
|
887953 |
-
|
|
|
887953 |
- this = frame->this;
|
|
|
887953 |
- priv = this->private;
|
|
|
887953 |
-
|
|
|
887953 |
- if (ret < 0) {
|
|
|
887953 |
- gf_msg (this->name, GF_LOG_WARNING, -ret,
|
|
|
887953 |
- SHARD_MSG_SHARDS_DELETION_FAILED,
|
|
|
887953 |
- "Background deletion of shards failed");
|
|
|
887953 |
- priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
|
|
|
887953 |
- } else {
|
|
|
887953 |
- priv->first_lookup = SHARD_FIRST_LOOKUP_DONE;
|
|
|
887953 |
- }
|
|
|
887953 |
SHARD_STACK_DESTROY (frame);
|
|
|
887953 |
return 0;
|
|
|
887953 |
}
|
|
|
887953 |
@@ -3482,6 +3513,7 @@ shard_delete_shards (void *opaque)
|
|
|
887953 |
gf_dirent_t entries;
|
|
|
887953 |
gf_dirent_t *entry = NULL;
|
|
|
887953 |
call_frame_t *cleanup_frame = NULL;
|
|
|
887953 |
+ gf_boolean_t done = _gf_false;
|
|
|
887953 |
|
|
|
887953 |
this = THIS;
|
|
|
887953 |
priv = this->private;
|
|
|
887953 |
@@ -3534,52 +3566,81 @@ shard_delete_shards (void *opaque)
|
|
|
887953 |
goto err;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072,
|
|
|
887953 |
- offset, &entries, local->xattr_req,
|
|
|
887953 |
- NULL))) {
|
|
|
887953 |
- if (ret > 0)
|
|
|
887953 |
- ret = 0;
|
|
|
887953 |
- list_for_each_entry (entry, &entries.list, list) {
|
|
|
887953 |
- offset = entry->d_off;
|
|
|
887953 |
-
|
|
|
887953 |
- if (!strcmp (entry->d_name, ".") ||
|
|
|
887953 |
- !strcmp (entry->d_name, ".."))
|
|
|
887953 |
- continue;
|
|
|
887953 |
+ for (;;) {
|
|
|
887953 |
+ offset = 0;
|
|
|
887953 |
+ LOCK(&priv->lock);
|
|
|
887953 |
+ {
|
|
|
887953 |
+ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
|
|
|
887953 |
+ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
|
|
|
887953 |
+ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
|
|
|
887953 |
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
|
|
|
887953 |
+ done = _gf_true;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ }
|
|
|
887953 |
+ UNLOCK(&priv->lock);
|
|
|
887953 |
+ if (done)
|
|
|
887953 |
+ break;
|
|
|
887953 |
+ while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd,
|
|
|
887953 |
+ 131072, offset, &entries,
|
|
|
887953 |
+ local->xattr_req, NULL))) {
|
|
|
887953 |
+ if (ret > 0)
|
|
|
887953 |
+ ret = 0;
|
|
|
887953 |
+ list_for_each_entry (entry, &entries.list, list) {
|
|
|
887953 |
+ offset = entry->d_off;
|
|
|
887953 |
+
|
|
|
887953 |
+ if (!strcmp (entry->d_name, ".") ||
|
|
|
887953 |
+ !strcmp (entry->d_name, ".."))
|
|
|
887953 |
+ continue;
|
|
|
887953 |
|
|
|
887953 |
- if (!entry->inode) {
|
|
|
887953 |
- ret = shard_lookup_marker_entry (this, local,
|
|
|
887953 |
- entry);
|
|
|
887953 |
- if (ret < 0)
|
|
|
887953 |
+ if (!entry->inode) {
|
|
|
887953 |
+ ret = shard_lookup_marker_entry (this,
|
|
|
887953 |
+ local,
|
|
|
887953 |
+ entry);
|
|
|
887953 |
+ if (ret < 0)
|
|
|
887953 |
+ continue;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ link_inode = inode_link (entry->inode,
|
|
|
887953 |
+ local->fd->inode,
|
|
|
887953 |
+ entry->d_name,
|
|
|
887953 |
+ &entry->d_stat);
|
|
|
887953 |
+
|
|
|
887953 |
+ gf_msg_debug (this->name, 0, "Initiating "
|
|
|
887953 |
+ "deletion of shards of gfid %s",
|
|
|
887953 |
+ entry->d_name);
|
|
|
887953 |
+ ret = shard_delete_shards_of_entry (cleanup_frame,
|
|
|
887953 |
+ this,
|
|
|
887953 |
+ entry,
|
|
|
887953 |
+ link_inode);
|
|
|
887953 |
+ inode_unlink (link_inode, local->fd->inode,
|
|
|
887953 |
+ entry->d_name);
|
|
|
887953 |
+ inode_unref (link_inode);
|
|
|
887953 |
+ if (ret) {
|
|
|
887953 |
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
|
|
|
887953 |
+ SHARD_MSG_SHARDS_DELETION_FAILED,
|
|
|
887953 |
+ "Failed to clean up shards of "
|
|
|
887953 |
+ "gfid %s", entry->d_name);
|
|
|
887953 |
continue;
|
|
|
887953 |
- }
|
|
|
887953 |
- link_inode = inode_link (entry->inode, local->fd->inode,
|
|
|
887953 |
- entry->d_name, &entry->d_stat);
|
|
|
887953 |
-
|
|
|
887953 |
- gf_msg_debug (this->name, 0, "Initiating deletion of "
|
|
|
887953 |
- "shards of gfid %s", entry->d_name);
|
|
|
887953 |
- ret = shard_delete_shards_of_entry (cleanup_frame, this,
|
|
|
887953 |
- entry, link_inode);
|
|
|
887953 |
- inode_unlink (link_inode, local->fd->inode,
|
|
|
887953 |
- entry->d_name);
|
|
|
887953 |
- inode_unref (link_inode);
|
|
|
887953 |
- if (ret) {
|
|
|
887953 |
- gf_msg (this->name, GF_LOG_ERROR, -ret,
|
|
|
887953 |
- SHARD_MSG_SHARDS_DELETION_FAILED,
|
|
|
887953 |
- "Failed to clean up shards of gfid %s",
|
|
|
887953 |
+ }
|
|
|
887953 |
+ gf_msg (this->name, GF_LOG_INFO, 0,
|
|
|
887953 |
+ SHARD_MSG_SHARDS_DELETION_COMPLETED,
|
|
|
887953 |
+ "Deleted shards of gfid=%s from backend",
|
|
|
887953 |
entry->d_name);
|
|
|
887953 |
- continue;
|
|
|
887953 |
}
|
|
|
887953 |
- gf_msg (this->name, GF_LOG_INFO, 0,
|
|
|
887953 |
- SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted "
|
|
|
887953 |
- "shards of gfid=%s from backend",
|
|
|
887953 |
- entry->d_name);
|
|
|
887953 |
+ gf_dirent_free (&entries);
|
|
|
887953 |
+ if (ret)
|
|
|
887953 |
+ break;
|
|
|
887953 |
}
|
|
|
887953 |
- gf_dirent_free (&entries);
|
|
|
887953 |
- if (ret)
|
|
|
887953 |
- break;
|
|
|
887953 |
}
|
|
|
887953 |
ret = 0;
|
|
|
887953 |
+ loc_wipe(&loc;;
|
|
|
887953 |
+ return ret;
|
|
|
887953 |
+
|
|
|
887953 |
err:
|
|
|
887953 |
+ LOCK(&priv->lock);
|
|
|
887953 |
+ {
|
|
|
887953 |
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ UNLOCK(&priv->lock);
|
|
|
887953 |
loc_wipe (&loc;;
|
|
|
887953 |
return ret;
|
|
|
887953 |
}
|
|
|
887953 |
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
|
|
|
887953 |
index ac3813c8c..37934f3a2 100644
|
|
|
887953 |
--- a/xlators/features/shard/src/shard.h
|
|
|
887953 |
+++ b/xlators/features/shard/src/shard.h
|
|
|
887953 |
@@ -196,11 +196,10 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
|
|
|
887953 |
} while (0)
|
|
|
887953 |
|
|
|
887953 |
typedef enum {
|
|
|
887953 |
- SHARD_FIRST_LOOKUP_PENDING = 0,
|
|
|
887953 |
- SHARD_FIRST_LOOKUP_IN_PROGRESS,
|
|
|
887953 |
- SHARD_FIRST_LOOKUP_DONE,
|
|
|
887953 |
-} shard_first_lookup_state_t;
|
|
|
887953 |
-
|
|
|
887953 |
+ SHARD_BG_DELETION_NONE = 0,
|
|
|
887953 |
+ SHARD_BG_DELETION_LAUNCHING,
|
|
|
887953 |
+ SHARD_BG_DELETION_IN_PROGRESS,
|
|
|
887953 |
+} shard_bg_deletion_state_t;
|
|
|
887953 |
/* rm = "remove me" */
|
|
|
887953 |
|
|
|
887953 |
typedef struct shard_priv {
|
|
|
887953 |
@@ -213,7 +212,8 @@ typedef struct shard_priv {
|
|
|
887953 |
int inode_count;
|
|
|
887953 |
struct list_head ilist_head;
|
|
|
887953 |
uint32_t deletion_rate;
|
|
|
887953 |
- shard_first_lookup_state_t first_lookup;
|
|
|
887953 |
+ shard_bg_deletion_state_t bg_del_state;
|
|
|
887953 |
+ gf_boolean_t first_lookup_done;
|
|
|
887953 |
uint64_t lru_limit;
|
|
|
887953 |
} shard_priv_t;
|
|
|
887953 |
|
|
|
887953 |
--
|
|
|
887953 |
2.20.1
|
|
|
887953 |
|