Blob Blame History Raw
From a7ade5267ebaf4bf318ee2aebe48000cee583e3b Mon Sep 17 00:00:00 2001
From: Krutika Dhananjay <kdhananj@redhat.com>
Date: Fri, 28 Dec 2018 18:53:15 +0530
Subject: [PATCH 505/506] features/shard: Fix launch of multiple synctasks for
 background deletion

> Upstream: https://review.gluster.org/21957
> BUG: 1662368
> Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e

PROBLEM:

When multiple sharded files are deleted in quick succession, multiple
issues were observed:
1. misleading logs corresponding to a sharded file where while one log
   message said the shards corresponding to the file were deleted
   successfully, this was followed by multiple logs suggesting the very
   same operation failed. This was because of multiple synctasks
   attempting to clean up shards of the same file and only one of them
   succeeding (the one that gets ENTRYLK successfully), and the rest of
   them logging failure.

2. multiple synctasks to do background deletion would be launched, one
   for each deleted file but all of them could readdir entries from
   .remove_me at the same time could potentially contend for ENTRYLK on
   .shard for each of the entry names. This is undesirable and wasteful.

FIX:
Background deletion will now follow a state machine. In the event that
there are multiple attempts to launch synctask for background deletion,
one for each file deleted, only the first task is launched. And if while
this task is doing the cleanup, more attempts are made to delete other
files, the state of the synctask is adjusted so that it restarts the
crawl even after reaching end-of-directory to pick up any files it may
have missed in the previous iteration.

This patch also fixes uninitialized lk-owner during syncop_entrylk()
which was leading to multiple background deletion synctasks entering
the critical section at the same time and leading to illegal memory access
of base inode in the second syntcask after it was destroyed post shard deletion
by the first synctask.

Change-Id: Ib33773d27fb4be463c7a8a5a6a4b63689705324e
BUG: 1662059
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/160437
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Xavi Hernandez <xhernandez@redhat.com>
---
 xlators/features/shard/src/shard.c | 199 +++++++++++++++++++----------
 xlators/features/shard/src/shard.h |  12 +-
 2 files changed, 136 insertions(+), 75 deletions(-)

diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 5b72399f5..8aed1a386 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -1465,16 +1465,45 @@ int
 shard_start_background_deletion (xlator_t *this)
 {
         int              ret           = 0;
+        gf_boolean_t     i_cleanup     = _gf_true;
+        shard_priv_t    *priv          = NULL;
         call_frame_t    *cleanup_frame = NULL;
 
+        priv = this->private;
+
+        LOCK(&priv->lock);
+        {
+                switch (priv->bg_del_state) {
+                case SHARD_BG_DELETION_NONE:
+                        i_cleanup = _gf_true;
+                        priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+                        break;
+                case SHARD_BG_DELETION_LAUNCHING:
+                        i_cleanup = _gf_false;
+                        break;
+                case SHARD_BG_DELETION_IN_PROGRESS:
+                        priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+                        i_cleanup = _gf_false;
+                        break;
+                default:
+                        break;
+                }
+        }
+        UNLOCK(&priv->lock);
+        if (!i_cleanup)
+                return 0;
+
         cleanup_frame = create_frame (this, this->ctx->pool);
         if (!cleanup_frame) {
                 gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
                         SHARD_MSG_MEMALLOC_FAILED, "Failed to create "
                         "new frame to delete shards");
-                return -ENOMEM;
+                ret = -ENOMEM;
+                goto err;
         }
 
+        set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+
         ret = synctask_new (this->ctx->env, shard_delete_shards,
                             shard_delete_shards_cbk, cleanup_frame,
                             cleanup_frame);
@@ -1484,7 +1513,16 @@ shard_start_background_deletion (xlator_t *this)
                         "failed to create task to do background "
                         "cleanup of shards");
                 STACK_DESTROY (cleanup_frame->root);
+                goto err;
         }
+        return 0;
+
+err:
+        LOCK(&priv->lock);
+        {
+                priv->bg_del_state = SHARD_BG_DELETION_NONE;
+        }
+        UNLOCK(&priv->lock);
         return ret;
 }
 
@@ -1493,7 +1531,7 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                   int32_t op_ret, int32_t op_errno, inode_t *inode,
                   struct iatt *buf, dict_t *xdata, struct iatt *postparent)
 {
-        int             ret             = 0;
+        int             ret             = -1;
         shard_priv_t   *priv            = NULL;
         gf_boolean_t    i_start_cleanup = _gf_false;
 
@@ -1526,22 +1564,23 @@ shard_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 
         LOCK (&priv->lock);
         {
-                if (priv->first_lookup == SHARD_FIRST_LOOKUP_PENDING) {
-                        priv->first_lookup = SHARD_FIRST_LOOKUP_IN_PROGRESS;
+                if (priv->first_lookup_done == _gf_false) {
+                        priv->first_lookup_done = _gf_true;
                         i_start_cleanup = _gf_true;
                 }
         }
         UNLOCK (&priv->lock);
 
-        if (i_start_cleanup) {
-                ret = shard_start_background_deletion (this);
-                if (ret) {
-                        LOCK (&priv->lock);
-                        {
-                                priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
-                        }
-                        UNLOCK (&priv->lock);
+        if (!i_start_cleanup)
+                goto unwind;
+
+        ret = shard_start_background_deletion(this);
+        if (ret < 0) {
+                LOCK(&priv->lock);
+                {
+                        priv->first_lookup_done = _gf_false;
                 }
+                UNLOCK(&priv->lock);
         }
 unwind:
         SHARD_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
@@ -2940,9 +2979,10 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)
                 if (ctx->fsync_needed) {
                         unref_base_inode++;
                         list_del_init (&ctx->to_fsync_list);
-                        if (base_inode)
+                        if (base_inode) {
                                 __shard_inode_ctx_get (base_inode, this, &base_ictx);
-                        base_ictx->fsync_count--;
+                                base_ictx->fsync_count--;
+                        }
                 }
         }
         UNLOCK(&inode->lock);
@@ -3334,10 +3374,15 @@ shard_delete_shards_of_entry (call_frame_t *cleanup_frame, xlator_t *this,
         loc.inode = inode_ref (priv->dot_shard_rm_inode);
 
         ret = syncop_entrylk (FIRST_CHILD(this), this->name, &loc,
-                              entry->d_name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL,
-                              NULL);
-        if (ret)
+                              entry->d_name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
+                              NULL, NULL);
+        if (ret < 0) {
+                if (ret == -EAGAIN) {
+                        ret = 0;
+                }
                 goto out;
+        }
+
         {
                 ret = __shard_delete_shards_of_entry (cleanup_frame, this,
                                                       entry, inode);
@@ -3352,20 +3397,6 @@ out:
 int
 shard_delete_shards_cbk (int ret, call_frame_t *frame, void *data)
 {
-        xlator_t     *this          = NULL;
-        shard_priv_t *priv          = NULL;
-
-        this = frame->this;
-        priv = this->private;
-
-        if (ret < 0) {
-                gf_msg (this->name, GF_LOG_WARNING, -ret,
-                        SHARD_MSG_SHARDS_DELETION_FAILED,
-                        "Background deletion of shards failed");
-                priv->first_lookup = SHARD_FIRST_LOOKUP_PENDING;
-        } else {
-                priv->first_lookup = SHARD_FIRST_LOOKUP_DONE;
-        }
         SHARD_STACK_DESTROY (frame);
         return 0;
 }
@@ -3482,6 +3513,7 @@ shard_delete_shards (void *opaque)
         gf_dirent_t      entries;
         gf_dirent_t     *entry                      = NULL;
         call_frame_t    *cleanup_frame              = NULL;
+        gf_boolean_t done = _gf_false;
 
         this = THIS;
         priv = this->private;
@@ -3534,52 +3566,81 @@ shard_delete_shards (void *opaque)
                 goto err;
         }
 
-        while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd, 131072,
-                                      offset, &entries, local->xattr_req,
-                                      NULL))) {
-                if (ret > 0)
-                        ret = 0;
-                list_for_each_entry (entry, &entries.list, list) {
-                        offset = entry->d_off;
-
-                        if (!strcmp (entry->d_name, ".") ||
-                            !strcmp (entry->d_name, ".."))
-                                continue;
+        for (;;) {
+                offset = 0;
+                LOCK(&priv->lock);
+                {
+                        if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+                                priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+                        } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+                                priv->bg_del_state = SHARD_BG_DELETION_NONE;
+                                done = _gf_true;
+                        }
+                }
+                UNLOCK(&priv->lock);
+                if (done)
+                        break;
+                while ((ret = syncop_readdirp (FIRST_CHILD(this), local->fd,
+                                               131072, offset, &entries,
+                                               local->xattr_req, NULL))) {
+                        if (ret > 0)
+                                ret = 0;
+                        list_for_each_entry (entry, &entries.list, list) {
+                                offset = entry->d_off;
+
+                                if (!strcmp (entry->d_name, ".") ||
+                                    !strcmp (entry->d_name, ".."))
+                                        continue;
 
-                        if (!entry->inode) {
-                                ret = shard_lookup_marker_entry (this, local,
-                                                                 entry);
-                                if (ret < 0)
+                                if (!entry->inode) {
+                                        ret = shard_lookup_marker_entry (this,
+                                                                         local,
+                                                                         entry);
+                                        if (ret < 0)
+                                                continue;
+                                }
+                                link_inode = inode_link (entry->inode,
+                                                         local->fd->inode,
+                                                         entry->d_name,
+                                                         &entry->d_stat);
+
+                                gf_msg_debug (this->name, 0, "Initiating "
+                                              "deletion of shards of gfid %s",
+                                              entry->d_name);
+                                ret = shard_delete_shards_of_entry (cleanup_frame,
+                                                                    this,
+                                                                    entry,
+                                                                    link_inode);
+                                inode_unlink (link_inode, local->fd->inode,
+                                              entry->d_name);
+                                inode_unref (link_inode);
+                                if (ret) {
+                                        gf_msg (this->name, GF_LOG_ERROR, -ret,
+                                                SHARD_MSG_SHARDS_DELETION_FAILED,
+                                                "Failed to clean up shards of "
+                                                "gfid %s", entry->d_name);
                                         continue;
-                        }
-                        link_inode = inode_link (entry->inode, local->fd->inode,
-                                                 entry->d_name, &entry->d_stat);
-
-                        gf_msg_debug (this->name, 0, "Initiating deletion of "
-                                      "shards of gfid %s", entry->d_name);
-                        ret = shard_delete_shards_of_entry (cleanup_frame, this,
-                                                            entry, link_inode);
-                        inode_unlink (link_inode, local->fd->inode,
-                                      entry->d_name);
-                        inode_unref (link_inode);
-                        if (ret) {
-                                gf_msg (this->name, GF_LOG_ERROR, -ret,
-                                        SHARD_MSG_SHARDS_DELETION_FAILED,
-                                        "Failed to clean up shards of gfid %s",
+                                }
+                                gf_msg (this->name, GF_LOG_INFO, 0,
+                                        SHARD_MSG_SHARDS_DELETION_COMPLETED,
+                                        "Deleted shards of gfid=%s from backend",
                                         entry->d_name);
-                                continue;
                         }
-                        gf_msg (this->name, GF_LOG_INFO, 0,
-                                SHARD_MSG_SHARDS_DELETION_COMPLETED, "Deleted "
-                                "shards of gfid=%s from backend",
-                                entry->d_name);
+                        gf_dirent_free (&entries);
+                        if (ret)
+                                break;
                 }
-                gf_dirent_free (&entries);
-                if (ret)
-                        break;
         }
         ret = 0;
+        loc_wipe(&loc);
+        return ret;
+
 err:
+        LOCK(&priv->lock);
+        {
+                priv->bg_del_state = SHARD_BG_DELETION_NONE;
+        }
+        UNLOCK(&priv->lock);
         loc_wipe (&loc);
         return ret;
 }
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index ac3813c8c..37934f3a2 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -196,11 +196,10 @@ shard_unlock_entrylk (call_frame_t *frame, xlator_t *this);
         } while (0)
 
 typedef enum {
-        SHARD_FIRST_LOOKUP_PENDING = 0,
-        SHARD_FIRST_LOOKUP_IN_PROGRESS,
-        SHARD_FIRST_LOOKUP_DONE,
-} shard_first_lookup_state_t;
-
+        SHARD_BG_DELETION_NONE = 0,
+        SHARD_BG_DELETION_LAUNCHING,
+        SHARD_BG_DELETION_IN_PROGRESS,
+} shard_bg_deletion_state_t;
 /* rm = "remove me" */
 
 typedef struct shard_priv {
@@ -213,7 +212,8 @@ typedef struct shard_priv {
         int inode_count;
         struct list_head ilist_head;
         uint32_t deletion_rate;
-        shard_first_lookup_state_t first_lookup;
+        shard_bg_deletion_state_t bg_del_state;
+        gf_boolean_t first_lookup_done;
         uint64_t lru_limit;
 } shard_priv_t;
 
-- 
2.20.1