21ab4e
From 73d9135664bd3c9ea0fb49b762bc9dff285a5b73 Mon Sep 17 00:00:00 2001
3604df
From: N Balachandran <nbalacha@redhat.com>
21ab4e
Date: Thu, 27 Apr 2017 22:31:24 +0530
21ab4e
Subject: [PATCH 407/426] cluster/dht: rm -rf fails if dir has stale linkto
3604df
 files
3604df
3604df
rm -rf <dir> fails with ENOENT if dir contains a lot of
3604df
stale linkto files. This is because a single
3604df
readdirp is sent as part of the rmdir which would return
3604df
and delete only as many linkto files on the bricks as would fit
3604df
in one readdirp buffer. Running rm -rf <dir> multiple times
3604df
will eventually delete all the files. The fix sends readdirp
3604df
on each subvol until no more entries are returned.
3604df
3604df
> BUG: 1444540
3604df
> Signed-off-by: N Balachandran <nbalacha@redhat.com>
3604df
> Reviewed-on: https://review.gluster.org/17102
3604df
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
3604df
> Smoke: Gluster Build System <jenkins@build.gluster.org>
3604df
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
3604df
> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
3604df
21ab4e
Change-Id: I447f2d193de4bd8ac16e4541c6b919d22250e39e
21ab4e
BUG: 1445195
3604df
Signed-off-by: N Balachandran <nbalacha@redhat.com>
21ab4e
Reviewed-on: https://code.engineering.redhat.com/gerrit/104624
3604df
Reviewed-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
21ab4e
Tested-by: Raghavendra Gowdappa <rgowdapp@redhat.com>
3604df
---
21ab4e
 xlators/cluster/dht/src/dht-common.c | 248 ++++++++++++++++++++++++++++-------
21ab4e
 1 file changed, 203 insertions(+), 45 deletions(-)
3604df
3604df
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
21ab4e
index 1235467..f9a5729 100644
3604df
--- a/xlators/cluster/dht/src/dht-common.c
3604df
+++ b/xlators/cluster/dht/src/dht-common.c
21ab4e
@@ -41,6 +41,11 @@ dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
3604df
                int ret);
3604df
 
3604df
 
3604df
+int
3604df
+dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this);
3604df
+
21ab4e
+
21ab4e
+
3604df
 /* Sets the blocks and size values to fixed values. This is to be called
3604df
  * only for dirs. The caller is responsible for checking the type
3604df
  */
21ab4e
@@ -8197,8 +8202,8 @@ dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this
3604df
         dht_local_t    *local = NULL;
21ab4e
         xlator_t       *prev = NULL;
3604df
         xlator_t       *src = NULL;
3604df
-        call_frame_t   *main_frame = NULL;
3604df
-        dht_local_t    *main_local = NULL;
3604df
+        call_frame_t   *readdirp_frame = NULL;
3604df
+        dht_local_t    *readdirp_local = NULL;
3604df
         int             this_call_cnt = 0;
3604df
         char gfid[GF_UUID_BUF_SIZE] ={0};
3604df
 
21ab4e
@@ -8207,8 +8212,9 @@ dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this
3604df
         prev   = cookie;
21ab4e
         src    = prev;
3604df
 
3604df
-        main_frame = local->main_frame;
3604df
-        main_local = main_frame->local;
21ab4e
+
3604df
+        readdirp_frame = local->main_frame;
3604df
+        readdirp_local = readdirp_frame->local;
3604df
 
3604df
         gf_uuid_unparse(local->loc.gfid, gfid);
3604df
 
21ab4e
@@ -8217,16 +8223,18 @@ dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this
3604df
                               "Unlinked linkfile %s on %s, gfid = %s",
3604df
                               local->loc.path, src->name, gfid);
3604df
         } else {
3604df
-                main_local->op_ret   = -1;
3604df
-                main_local->op_errno = op_errno;
3604df
+                if (op_errno != ENOENT) {
3604df
+                        readdirp_local->op_ret   = -1;
3604df
+                        readdirp_local->op_errno = op_errno;
3604df
+                }
3604df
                 gf_msg_debug (this->name, op_errno,
3604df
                               "Unlink of %s on %s failed. (gfid = %s)",
3604df
                               local->loc.path, src->name, gfid);
3604df
         }
3604df
 
3604df
-        this_call_cnt = dht_frame_return (main_frame);
3604df
+        this_call_cnt = dht_frame_return (readdirp_frame);
3604df
         if (is_last_call (this_call_cnt))
3604df
-                dht_rmdir_do (main_frame, this);
3604df
+                dht_rmdir_readdirp_do (readdirp_frame, this);
3604df
 
3604df
         DHT_STACK_DESTROY (frame);
3604df
         return 0;
21ab4e
@@ -8241,8 +8249,8 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
         dht_local_t    *local = NULL;
21ab4e
         xlator_t       *prev = NULL;
3604df
         xlator_t       *src = NULL;
3604df
-        call_frame_t   *main_frame = NULL;
3604df
-        dht_local_t    *main_local = NULL;
3604df
+        call_frame_t   *readdirp_frame = NULL;
3604df
+        dht_local_t    *readdirp_local = NULL;
3604df
         int             this_call_cnt = 0;
3604df
         dht_conf_t     *conf = this->private;
3604df
         char               gfid[GF_UUID_BUF_SIZE] = {0};
21ab4e
@@ -8251,17 +8259,24 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
         prev  = cookie;
21ab4e
         src   = prev;
3604df
 
3604df
-        main_frame = local->main_frame;
3604df
-        main_local = main_frame->local;
3604df
+gf_log ("dht", GF_LOG_INFO, "dht_rmdir_lookup_cbk %s", local->loc.path);
3604df
+        readdirp_frame = local->main_frame;
3604df
+        readdirp_local = readdirp_frame->local;
3604df
 
3604df
-        if (op_ret != 0)
21ab4e
+        if (op_ret != 0) {
21ab4e
+
3604df
+                gf_msg (this->name, GF_LOG_WARNING, op_errno,
3604df
+                        DHT_MSG_NOT_LINK_FILE_ERROR,
3604df
+                        "lookup failed for %s on %s  (type=0%o)",
3604df
+                        local->loc.path, src->name, stbuf->ia_type);
3604df
                 goto err;
3604df
+        }
3604df
 
3604df
         if (!check_is_linkfile (inode, stbuf, xattr, conf->link_xattr_name)) {
3604df
-                main_local->op_ret  = -1;
3604df
-                main_local->op_errno = ENOTEMPTY;
3604df
+                readdirp_local->op_ret  = -1;
3604df
+                readdirp_local->op_errno = ENOTEMPTY;
3604df
 
21ab4e
-                 gf_uuid_unparse(local->loc.gfid, gfid);
21ab4e
+                gf_uuid_unparse(local->loc.gfid, gfid);
3604df
 
21ab4e
                 gf_msg (this->name, GF_LOG_WARNING, 0,
21ab4e
                         DHT_MSG_NOT_LINK_FILE_ERROR,
21ab4e
@@ -8275,9 +8290,9 @@ dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
         return 0;
3604df
 err:
3604df
 
3604df
-        this_call_cnt = dht_frame_return (main_frame);
3604df
+        this_call_cnt = dht_frame_return (readdirp_frame);
3604df
         if (is_last_call (this_call_cnt))
3604df
-                dht_rmdir_do (main_frame, this);
3604df
+                dht_rmdir_readdirp_do (readdirp_frame, this);
3604df
 
3604df
         DHT_STACK_DESTROY (frame);
3604df
         return 0;
21ab4e
@@ -8290,24 +8305,30 @@ dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
21ab4e
                              struct iatt *stbuf, dict_t *xattr,
21ab4e
                              struct iatt *parent)
3604df
 {
21ab4e
-        dht_local_t    *local         = NULL;
21ab4e
-        xlator_t       *src           = NULL;
3604df
-        call_frame_t   *main_frame    = NULL;
3604df
-        dht_local_t    *main_local    = NULL;
21ab4e
-        int             this_call_cnt = 0;
21ab4e
-        dht_conf_t     *conf          = this->private;
21ab4e
-        dict_t         *xattrs        = NULL;
21ab4e
-        int             ret           = 0;
21ab4e
+        dht_local_t    *local             = NULL;
21ab4e
+        xlator_t       *src               = NULL;
3604df
+        call_frame_t   *readdirp_frame    = NULL;
3604df
+        dht_local_t    *readdirp_local    = NULL;
21ab4e
+        int             this_call_cnt     = 0;
21ab4e
+        dht_conf_t     *conf              = this->private;
21ab4e
+        dict_t         *xattrs            = NULL;
21ab4e
+        int             ret               = 0;
21ab4e
 
3604df
         local = frame->local;
3604df
         src   = local->hashed_subvol;
3604df
 
3604df
-        main_frame = local->main_frame;
3604df
-        main_local = main_frame->local;
21ab4e
+
3604df
+        /* main_frame here is the readdirp_frame */
3604df
+
3604df
+        readdirp_frame = local->main_frame;
3604df
+        readdirp_local = readdirp_frame->local;
3604df
+
3604df
+        gf_msg_debug (this->name, 0, "returning for %s ",
3604df
+                      local->loc.path);
3604df
 
3604df
         if (op_ret == 0) {
3604df
-                main_local->op_ret  = -1;
3604df
-                main_local->op_errno = ENOTEMPTY;
3604df
+                readdirp_local->op_ret  = -1;
3604df
+                readdirp_local->op_errno = ENOTEMPTY;
3604df
 
3604df
                 gf_msg (this->name, GF_LOG_WARNING, 0,
3604df
                         DHT_MSG_SUBVOL_ERROR,
21ab4e
@@ -8315,8 +8336,13 @@ dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                         local->loc.path, src->name);
3604df
                 goto err;
3604df
         } else if (op_errno != ENOENT) {
3604df
-                main_local->op_ret  = -1;
3604df
-                main_local->op_errno = op_errno;
3604df
+                readdirp_local->op_ret  = -1;
3604df
+                readdirp_local->op_errno = op_errno;
3604df
+
3604df
+                gf_msg (this->name, GF_LOG_WARNING, op_errno,
3604df
+                        DHT_MSG_SUBVOL_ERROR,
3604df
+                        "%s not found on cached subvol %s",
3604df
+                        local->loc.path, src->name);
3604df
                 goto err;
3604df
         }
3604df
 
21ab4e
@@ -8337,7 +8363,6 @@ dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
21ab4e
                         dict_unref (xattrs);
21ab4e
                 goto err;
21ab4e
         }
21ab4e
-
21ab4e
         STACK_WIND_COOKIE (frame, dht_rmdir_lookup_cbk, src, src,
21ab4e
                            src->fops->lookup, &local->loc, xattrs);
21ab4e
         if (xattrs)
21ab4e
@@ -8346,9 +8371,16 @@ dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
         return 0;
3604df
 err:
3604df
 
3604df
-        this_call_cnt = dht_frame_return (main_frame);
3604df
+        this_call_cnt = dht_frame_return (readdirp_frame);
3604df
+
3604df
+        /* Once all the lookups/unlinks etc have returned, proceed to wind
3604df
+         * readdirp on the subvol again until no entries are returned.
3604df
+         * This is required if there are more entries than can be returned
3604df
+         * in a single readdirp call.
3604df
+         */
3604df
+
3604df
         if (is_last_call (this_call_cnt))
3604df
-                dht_rmdir_do (main_frame, this);
21ab4e
+                dht_rmdir_readdirp_do (readdirp_frame, this);
3604df
 
3604df
         DHT_STACK_DESTROY (frame);
3604df
         return 0;
21ab4e
@@ -8443,12 +8475,13 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
3604df
 
3604df
                 gf_uuid_unparse(lookup_local->loc.gfid, gfid);
3604df
 
3604df
-                gf_msg_trace (this->name, 0,
3604df
+                gf_msg_debug (this->name, 0,
3604df
                               "looking up %s on subvolume %s, gfid = %s",
3604df
                               lookup_local->loc.path, src->name, gfid);
3604df
 
3604df
                 LOCK (&frame->lock);
3604df
                 {
3604df
+                        /* Increment the call count for the readdir frame */
3604df
                         local->call_cnt++;
3604df
                 }
3604df
                 UNLOCK (&frame->lock);
21ab4e
@@ -8456,15 +8489,27 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
3604df
                 subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat,
3604df
                                               trav->dict);
3604df
                 if (!subvol) {
3604df
+
3604df
                         gf_msg (this->name, GF_LOG_INFO, 0,
3604df
                                 DHT_MSG_INVALID_LINKFILE,
3604df
                                 "Linkfile does not have link subvolume. "
3604df
                                 "path = %s, gfid = %s",
3604df
                                 lookup_local->loc.path, gfid);
3604df
+
3604df
+                        gf_msg_debug (this->name, 0,
3604df
+                                     "looking up %s on subvolume %s, gfid = %s",
3604df
+                                      lookup_local->loc.path, src->name, gfid);
3604df
+
21ab4e
                         STACK_WIND_COOKIE (lookup_frame, dht_rmdir_lookup_cbk,
21ab4e
                                            src, src, src->fops->lookup,
21ab4e
                                            &lookup_local->loc, xattrs);
3604df
                 } else {
3604df
+                        gf_msg_debug (this->name, 0,
3604df
+                                      "Looking up linkfile target %s on "
3604df
+                                      " subvolume %s, gfid = %s",
3604df
+                                      lookup_local->loc.path, subvol->name,
3604df
+                                      gfid);
3604df
+
3604df
                         STACK_WIND (lookup_frame, dht_rmdir_cached_lookup_cbk,
3604df
                                     subvol, subvol->fops->lookup,
3604df
                                     &lookup_local->loc, xattrs);
21ab4e
@@ -8486,17 +8531,56 @@ err:
3604df
 }
3604df
 
3604df
 
3604df
+
3604df
+/*
3604df
+ * No more entries on this subvol. Proceed to the actual rmdir operation.
3604df
+ */
3604df
+
3604df
+void
3604df
+dht_rmdir_readdirp_done (call_frame_t *readdirp_frame, xlator_t *this)
3604df
+{
3604df
+
3604df
+        call_frame_t      *main_frame    = NULL;
3604df
+        dht_local_t       *main_local         = NULL;
3604df
+        dht_local_t       *local         = NULL;
3604df
+        int                this_call_cnt = 0;
3604df
+
3604df
+
3604df
+        local = readdirp_frame->local;
3604df
+        main_frame = local->main_frame;
3604df
+        main_local = main_frame->local;
3604df
+
3604df
+        /* At least one readdirp failed.
3604df
+         * This is a bit hit or miss - if readdirp failed on more than
3604df
+         * one subvol, we don't know which error is returned.
3604df
+         */
3604df
+        if (local->op_ret == -1) {
3604df
+                main_local->op_ret = local->op_ret;
3604df
+                main_local->op_errno = local->op_errno;
3604df
+        }
3604df
+
3604df
+        this_call_cnt = dht_frame_return (main_frame);
3604df
+
3604df
+        if (is_last_call (this_call_cnt))
3604df
+                dht_rmdir_do (main_frame, this);
3604df
+
3604df
+
3604df
+        DHT_STACK_DESTROY (readdirp_frame);
3604df
+}
3604df
+
3604df
+
3604df
+
3604df
 int
3604df
 dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                         int op_ret, int op_errno, gf_dirent_t *entries,
21ab4e
                         dict_t *xdata)
21ab4e
 {
21ab4e
         dht_local_t  *local = NULL;
21ab4e
-        int           this_call_cnt = -1;
21ab4e
         xlator_t     *prev = NULL;
3604df
         xlator_t     *src = NULL;
3604df
         int           ret = 0;
3604df
 
3604df
+
3604df
         local = frame->local;
3604df
         prev  = cookie;
21ab4e
         src   = prev;
21ab4e
@@ -8512,7 +8596,7 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                                       local->loc.path, op_ret);
3604df
                         local->op_ret = -1;
3604df
                         local->op_errno = ENOTEMPTY;
3604df
-                        break;
3604df
+                        goto done;
3604df
                 default:
3604df
                         /* @ret number of linkfiles are getting unlinked */
3604df
                         gf_msg_trace (this->name, 0,
21ab4e
@@ -8521,17 +8605,54 @@ dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                                       local->loc.path, ret);
3604df
                         break;
3604df
                 }
3604df
+
3604df
         }
3604df
 
3604df
-        this_call_cnt = dht_frame_return (frame);
21ab4e
 
21ab4e
-        if (is_last_call (this_call_cnt)) {
21ab4e
-                dht_rmdir_do (frame, this);
3604df
+        if (ret) {
3604df
+                return 0;
21ab4e
         }
21ab4e
 
3604df
+done:
3604df
+        /* readdirp failed or no linkto files were found on this subvol */
3604df
+
21ab4e
+        dht_rmdir_readdirp_done (frame, this);
21ab4e
         return 0;
21ab4e
 }
21ab4e
 
3604df
+/* Keep sending readdirp on the subvol until it returns no more entries
3604df
+ * It is possible that not all entries will fit in a single readdirp in which
3604df
+ * case the rmdir will keep failing with ENOTEMPTY
3604df
+ */
3604df
+
3604df
+int
3604df
+dht_rmdir_readdirp_do (call_frame_t *readdirp_frame, xlator_t *this)
3604df
+{
3604df
+        dht_local_t *local  = NULL;
3604df
+
3604df
+        local = readdirp_frame->local;
3604df
+
3604df
+        if (local->op_ret == -1) {
3604df
+                /* there is no point doing another readdirp on this
3604df
+                 * subvol . */
3604df
+                dht_rmdir_readdirp_done (readdirp_frame, this);
3604df
+                return 0;
21ab4e
+        }
21ab4e
+
3604df
+        gf_msg_debug ("this->name", 0, "Calling dht_rmdir_readdirp_do for %p",
3604df
+                      readdirp_frame);
3604df
+
21ab4e
+        STACK_WIND_COOKIE (readdirp_frame, dht_rmdir_readdirp_cbk,
21ab4e
+                           local->hashed_subvol,
21ab4e
+                           local->hashed_subvol,
21ab4e
+                           local->hashed_subvol->fops->readdirp,
21ab4e
+                           local->fd, 4096, 0, local->xattr);
3604df
+
3604df
+
21ab4e
+        return 0;
21ab4e
+
21ab4e
+}
3604df
+
3604df
 
21ab4e
 int
21ab4e
 dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
21ab4e
@@ -8540,11 +8661,13 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
         dht_local_t  *local         = NULL;
3604df
         int           this_call_cnt = -1;
21ab4e
         xlator_t     *prev          = NULL;
3604df
-        dict_t       *dict          = NULL;
3604df
         int           ret           = 0;
3604df
         dht_conf_t   *conf          = this->private;
3604df
+        dict_t       *dict          = NULL;
3604df
         int           i             = 0;
21ab4e
-        char               gfid[GF_UUID_BUF_SIZE] = {0};
21ab4e
+        char          gfid[GF_UUID_BUF_SIZE] = {0};
3604df
+        dht_local_t  *readdirp_local = NULL;
3604df
+        call_frame_t *readdirp_frame = NULL;
3604df
 
3604df
         local = frame->local;
3604df
         prev  = cookie;
21ab4e
@@ -8572,6 +8695,7 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                 goto err;
3604df
 
3604df
         fd_bind (fd);
3604df
+
3604df
         dict = dict_new ();
3604df
         if (!dict) {
3604df
                 local->op_ret = -1;
21ab4e
@@ -8587,16 +8711,50 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
3604df
                         local->loc.path, conf->link_xattr_name);
3604df
 
3604df
         local->call_cnt = conf->subvolume_cnt;
3604df
+
21ab4e
+
3604df
+        /* Create a separate frame per subvol as we might need
3604df
+         * to resend readdirp multiple times to get all the
3604df
+         * entries.
3604df
+         */
3604df
+
3604df
         for (i = 0; i < conf->subvolume_cnt; i++) {
21ab4e
-                STACK_WIND_COOKIE (frame, dht_rmdir_readdirp_cbk,
21ab4e
+
3604df
+                readdirp_frame = copy_frame (frame);
3604df
+
3604df
+                if (!readdirp_frame) {
3604df
+                        local->call_cnt--;
3604df
+                        continue;
3604df
+                }
3604df
+
3604df
+                readdirp_local = dht_local_init (readdirp_frame, &local->loc,
3604df
+                                                 local->fd, 0);
3604df
+
3604df
+                if (!readdirp_local) {
3604df
+                        DHT_STACK_DESTROY (readdirp_frame);
3604df
+                        local->call_cnt--;
3604df
+                        continue;
3604df
+                }
3604df
+                readdirp_local->main_frame = frame;
3604df
+                readdirp_local->op_ret = 0;
3604df
+                readdirp_local->xattr = dict_ref (dict);
3604df
+                /* overload this field to save the subvol info */
3604df
+                readdirp_local->hashed_subvol = conf->subvolumes[i];
3604df
+
21ab4e
+                STACK_WIND_COOKIE (readdirp_frame, dht_rmdir_readdirp_cbk,
21ab4e
                                    conf->subvolumes[i], conf->subvolumes[i],
21ab4e
                                    conf->subvolumes[i]->fops->readdirp,
21ab4e
-                                   local->fd, 4096, 0, dict);
21ab4e
+                                   readdirp_local->fd, 4096, 0,
21ab4e
+                                   readdirp_local->xattr);
3604df
         }
3604df
 
3604df
         if (dict)
3604df
                 dict_unref (dict);
3604df
 
3604df
+        /* Could not wind readdirp to any subvol */
3604df
+        if (!local->call_cnt)
3604df
+                goto err;
3604df
+
3604df
         return 0;
3604df
 
3604df
 err:
3604df
-- 
3604df
1.8.3.1
3604df