Blob Blame History Raw
From 586b35b717e884b138de6d2a8715cb7082f0e9ca Mon Sep 17 00:00:00 2001
From: N Balachandran <nbalacha@redhat.com>
Date: Tue, 2 May 2017 13:21:57 +0530
Subject: [PATCH 311/316] cluster/dht: rm -rf fails if dir has stale linkto
 files

rm -rf <dir> fails with ENOENT if dir contains a lot of
stale linkto files. This is because a single
readdirp is sent as part of the rmdir which would return
and delete only as many linkto files on the bricks as would fit
in one readdirp buffer. Running rm -rf <dir> multiple times
will eventually delete all the files. The fix sends readdirp
on each subvol until no more entries are returned.

> BUG: 1444540
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
> Reviewed-on: https://review.gluster.org/17102
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>

Change-Id: I74dfbe90a169421f1fad037152771a9b6faf1b04
BUG: 1447186
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/104865
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
---
 xlators/cluster/dht/src/dht-common.c | 223 +++++++++++++++++++++++++++++------
 1 file changed, 188 insertions(+), 35 deletions(-)

diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 62657a0..5d9ab12 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -41,6 +41,9 @@ dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
                int ret);
 
 
+int
+dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this);
+
 /* Sets the blocks and size values to fixed values. This is to be called
  * only for dirs. The caller is responsible for checking the type
  */
@@ -7968,8 +7971,8 @@ dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this
         dht_local_t    *local = NULL;
         call_frame_t   *prev = NULL;
         xlator_t       *src = NULL;
-        call_frame_t   *main_frame = NULL;
-        dht_local_t    *main_local = NULL;
+        call_frame_t   *readdirp_frame = NULL;
+        dht_local_t    *readdirp_local = NULL;
         int             this_call_cnt = 0;
         char gfid[GF_UUID_BUF_SIZE] ={0};
 
@@ -7978,8 +7981,8 @@ dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this
         prev   = cookie;
         src    = prev->this;
 
-        main_frame = local->main_frame;
-        main_local = main_frame->local;
+        readdirp_frame = local->main_frame;
+        readdirp_local = readdirp_frame->local;
 
         gf_uuid_unparse(local->loc.gfid, gfid);
 
@@ -7988,16 +7991,18 @@ dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this
                               "Unlinked linkfile %s on %s, gfid = %s",
                               local->loc.path, src->name, gfid);
         } else {
-                main_local->op_ret   = -1;
-                main_local->op_errno = op_errno;
+                if (op_errno != ENOENT) {
+                        readdirp_local->op_ret   = -1;
+                        readdirp_local->op_errno = op_errno;
+                }
                 gf_msg_debug (this->name, op_errno,
                               "Unlink of %s on %s failed. (gfid = %s)",
                               local->loc.path, src->name, gfid);
         }
 
-        this_call_cnt = dht_frame_return (main_frame);
+        this_call_cnt = dht_frame_return (readdirp_frame);
         if (is_last_call (this_call_cnt))
-                dht_rmdir_do (main_frame, this);
+                dht_rmdir_readdirp_do (readdirp_frame, this);
 
         DHT_STACK_DESTROY (frame);
         return 0;
@@ -8012,8 +8017,8 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         dht_local_t    *local = NULL;
         call_frame_t   *prev = NULL;
         xlator_t       *src = NULL;
-        call_frame_t   *main_frame = NULL;
-        dht_local_t    *main_local = NULL;
+        call_frame_t   *readdirp_frame = NULL;
+        dht_local_t    *readdirp_local = NULL;
         int             this_call_cnt = 0;
         dht_conf_t     *conf = this->private;
         char               gfid[GF_UUID_BUF_SIZE] = {0};
@@ -8022,15 +8027,23 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         prev  = cookie;
         src   = prev->this;
 
-        main_frame = local->main_frame;
-        main_local = main_frame->local;
+gf_log ("dht", GF_LOG_INFO, "dht_rmdir_lookup_cbk %s", local->loc.path);
+
+        readdirp_frame = local->main_frame;
+        readdirp_local = readdirp_frame->local;
+
+        if (op_ret != 0) {
 
-        if (op_ret != 0)
+                gf_msg (this->name, GF_LOG_WARNING, op_errno,
+                        DHT_MSG_NOT_LINK_FILE_ERROR,
+                        "lookup failed for %s on %s  (type=0%o)",
+                        local->loc.path, src->name, stbuf->ia_type);
                 goto err;
+        }
 
         if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
-                main_local->op_ret  = -1;
-                main_local->op_errno = ENOTEMPTY;
+                readdirp_local->op_ret  = -1;
+                readdirp_local->op_errno = ENOTEMPTY;
 
                  gf_uuid_unparse(local->loc.gfid, gfid);
 
@@ -8046,9 +8059,9 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         return 0;
 err:
 
-        this_call_cnt = dht_frame_return (main_frame);
+        this_call_cnt = dht_frame_return (readdirp_frame);
         if (is_last_call (this_call_cnt))
-                dht_rmdir_do (main_frame, this);
+                dht_rmdir_readdirp_do (readdirp_frame, this);
 
         DHT_STACK_DESTROY (frame);
         return 0;
@@ -8063,8 +8076,8 @@ dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 {
         dht_local_t    *local         = NULL;
         xlator_t       *src           = NULL;
-        call_frame_t   *main_frame    = NULL;
-        dht_local_t    *main_local    = NULL;
+        call_frame_t   *readdirp_frame    = NULL;
+        dht_local_t    *readdirp_local    = NULL;
         int             this_call_cnt = 0;
         dht_conf_t     *conf          = this->private;
         dict_t         *xattrs        = NULL;
@@ -8073,12 +8086,17 @@ dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         local = frame->local;
         src   = local->hashed_subvol;
 
-        main_frame = local->main_frame;
-        main_local = main_frame->local;
+        /* main_frame here is the readdirp_frame */
+
+        readdirp_frame = local->main_frame;
+        readdirp_local = readdirp_frame->local;
+
+        gf_msg_debug (this->name, 0, "returning for %s ",
+                      local->loc.path);
 
         if (op_ret == 0) {
-                main_local->op_ret  = -1;
-                main_local->op_errno = ENOTEMPTY;
+                readdirp_local->op_ret  = -1;
+                readdirp_local->op_errno = ENOTEMPTY;
 
                 gf_msg (this->name, GF_LOG_WARNING, 0,
                         DHT_MSG_SUBVOL_ERROR,
@@ -8086,8 +8104,13 @@ dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         local->loc.path, src->name);
                 goto err;
         } else if (op_errno != ENOENT) {
-                main_local->op_ret  = -1;
-                main_local->op_errno = op_errno;
+                readdirp_local->op_ret  = -1;
+                readdirp_local->op_errno = op_errno;
+
+                gf_msg (this->name, GF_LOG_WARNING, op_errno,
+                        DHT_MSG_SUBVOL_ERROR,
+                        "%s not found on cached subvol %s",
+                        local->loc.path, src->name);
                 goto err;
         }
 
@@ -8117,9 +8140,16 @@ dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         return 0;
 err:
 
-        this_call_cnt = dht_frame_return (main_frame);
+        this_call_cnt = dht_frame_return (readdirp_frame);
+
+        /* Once all the lookups/unlinks etc have returned, proceed to wind
+         * readdirp on the subvol again until no entries are returned.
+         * This is required if there are more entries than can be returned
+         * in a single readdirp call.
+         */
+
         if (is_last_call (this_call_cnt))
-                dht_rmdir_do (main_frame, this);
+                dht_rmdir_do (readdirp_frame, this);
 
         DHT_STACK_DESTROY (frame);
         return 0;
@@ -8214,12 +8244,13 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
 
                 gf_uuid_unparse(lookup_local->loc.gfid, gfid);
 
-                gf_msg_trace (this->name, 0,
+                gf_msg_debug (this->name, 0,
                               "looking up %s on subvolume %s, gfid = %s",
                               lookup_local->loc.path, src->name, gfid);
 
                 LOCK (&frame->lock);
                 {
+                        /* Increment the call count for the readdir frame */
                         local->call_cnt++;
                 }
                 UNLOCK (&frame->lock);
@@ -8227,15 +8258,27 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
                 subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat,
                                               trav->dict);
                 if (!subvol) {
+
                         gf_msg (this->name, GF_LOG_INFO, 0,
                                 DHT_MSG_INVALID_LINKFILE,
                                 "Linkfile does not have link subvolume. "
                                 "path = %s, gfid = %s",
                                 lookup_local->loc.path, gfid);
+
+                        gf_msg_debug (this->name, 0,
+                                     "looking up %s on subvolume %s, gfid = %s",
+                                      lookup_local->loc.path, src->name, gfid);
+
                         STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
                                     src, src->fops->lookup,
                                     &lookup_local->loc, xattrs);
                 } else {
+                        gf_msg_debug (this->name, 0,
+                                      "Looking up linkfile target %s on "
+                                      " subvolume %s, gfid = %s",
+                                      lookup_local->loc.path, subvol->name,
+                                      gfid);
+
                         STACK_WIND (lookup_frame, dht_rmdir_cached_lookup_cbk,
                                     subvol, subvol->fops->lookup,
                                     &lookup_local->loc, xattrs);
@@ -8257,6 +8300,46 @@ err:
 }
 
 
+
+/*
+ * No more entries on this subvol. Proceed to the actual rmdir operation.
+ */
+
+void
+dht_rmdir_readdirp_done (call_frame_t *readdirp_frame, xlator_t *this)
+{
+
+        call_frame_t      *main_frame    = NULL;
+        dht_local_t       *main_local         = NULL;
+        dht_local_t       *local         = NULL;
+        int                this_call_cnt = 0;
+
+
+        local = readdirp_frame->local;
+        main_frame = local->main_frame;
+        main_local = main_frame->local;
+
+        /* At least one readdirp failed.
+         * This is a bit hit or miss - if readdirp failed on more than
+         * one subvol, we don't know which error is returned.
+         */
+
+        if (local->op_ret == -1) {
+                main_local->op_ret = local->op_ret;
+                main_local->op_errno = local->op_errno;
+        }
+
+        this_call_cnt = dht_frame_return (main_frame);
+
+        if (is_last_call (this_call_cnt))
+                dht_rmdir_do (main_frame, this);
+
+
+        DHT_STACK_DESTROY (readdirp_frame);
+}
+
+
+
 int
 dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         int op_ret, int op_errno, gf_dirent_t *entries,
@@ -8268,6 +8351,7 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         xlator_t     *src = NULL;
         int           ret = 0;
 
+
         local = frame->local;
         prev  = cookie;
         src   = prev->this;
@@ -8283,7 +8367,7 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                                       local->loc.path, op_ret);
                         local->op_ret = -1;
                         local->op_errno = ENOTEMPTY;
-                        break;
+                        goto done;
                 default:
                         /* @ret number of linkfiles are getting unlinked */
                         gf_msg_trace (this->name, 0,
@@ -8292,15 +8376,49 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                                       local->loc.path, ret);
                         break;
                 }
+
         }
 
-        this_call_cnt = dht_frame_return (frame);
+        if (ret) {
+                return 0;
+        }
+done:
+        /* readdirp failed or no linkto files were found on this subvol */
 
-        if (is_last_call (this_call_cnt)) {
-                dht_rmdir_do (frame, this);
+        dht_rmdir_readdirp_done (frame, this);
+        return 0;
+}
+
+/* Keep sending readdirp on the subvol until it returns no more entries
+ * It is possible that not all entries will fit in a single readdirp in which
+ * case the rmdir will keep failing with ENOTEMPTY
+ */
+
+int
+dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this)
+{
+        dht_local_t *local  = NULL;
+
+        local = readdirp_frame->local;
+
+        if (local->op_ret == -1) {
+                /* there is no point doing another readdirp on this
+                 * subvol . */
+                dht_rmdir_readdirp_done (readdirp_frame, this);
+                return 0;
         }
 
+        gf_msg_debug ("this->name", 0, "Calling dht_rmdir_readdirp_do for %p",
+                      readdirp_frame);
+
+        STACK_WIND (readdirp_frame, dht_rmdir_readdirp_cbk,
+                    local->hashed_subvol,
+                    local->hashed_subvol->fops->readdirp,
+                    local->fd, 4096, 0, local->xattr);
+
+
         return 0;
+
 }
 
 
@@ -8311,11 +8429,13 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         dht_local_t  *local         = NULL;
         int           this_call_cnt = -1;
         call_frame_t *prev          = NULL;
-        dict_t       *dict          = NULL;
         int           ret           = 0;
         dht_conf_t   *conf          = this->private;
+        dict_t       *dict          = NULL;
         int           i             = 0;
         char               gfid[GF_UUID_BUF_SIZE] = {0};
+        dht_local_t  *readdirp_local = NULL;
+        call_frame_t *readdirp_frame = NULL;
 
         local = frame->local;
         prev  = cookie;
@@ -8343,6 +8463,7 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                 goto err;
 
         fd_bind (fd);
+
         dict = dict_new ();
         if (!dict) {
                 local->op_ret = -1;
@@ -8358,16 +8479,48 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                         local->loc.path, conf->link_xattr_name);
 
         local->call_cnt = conf->subvolume_cnt;
+
+        /* Create a separate frame per subvol as we might need
+         * to resend readdirp multiple times to get all the
+         * entries.
+         */
+
         for (i = 0; i < conf->subvolume_cnt; i++) {
-                STACK_WIND (frame, dht_rmdir_readdirp_cbk,
+                readdirp_frame = copy_frame (frame);
+
+                if (!readdirp_frame) {
+                        local->call_cnt--;
+                        continue;
+                }
+
+                readdirp_local = dht_local_init (readdirp_frame, &local->loc,
+                                                 local->fd, 0);
+
+                if (!readdirp_local) {
+                        DHT_STACK_DESTROY (readdirp_frame);
+                        local->call_cnt--;
+                        continue;
+                }
+                readdirp_local->main_frame = frame;
+                readdirp_local->op_ret = 0;
+                readdirp_local->xattr = dict_ref (dict);
+                /* overload this field to save the subvol info */
+                readdirp_local->hashed_subvol = conf->subvolumes[i];
+
+                STACK_WIND (readdirp_frame, dht_rmdir_readdirp_cbk,
                             conf->subvolumes[i],
                             conf->subvolumes[i]->fops->readdirp,
-                            local->fd, 4096, 0, dict);
+                            readdirp_local->fd, 4096, 0,
+                            readdirp_local->xattr);
         }
 
         if (dict)
                 dict_unref (dict);
 
+        /* Could not wind readdirp to any subvol */
+        if (!local->call_cnt)
+                goto err;
+
         return 0;
 
 err:
-- 
1.8.3.1