Blob Blame History Raw
From 7bec1b8efce0d60b45a29b0b1de81def3bc79f6c Mon Sep 17 00:00:00 2001
From: Mohammed Rafi KC <rkavunga@redhat.com>
Date: Tue, 3 May 2016 14:43:20 +0530
Subject: [PATCH 135/139] dht:remember locked subvol and send unlock to the same

During locking we send lock request to cached subvol,
and normally we unlock to the cached subvol
But with parallel fresh lookup on a directory, there
is a race window where the cached subvol can change
and the unlock can go into a different subvol from
which we took lock.

This will result in a stale lock held on one of the
subvol.

So we will store the details of subvol which we took the lock
and will unlock from the same subvol

back port of>
>Change-Id: I47df99491671b10624eb37d1d17e40bacf0b15eb
>BUG: 1311002
>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
>Reviewed-on: http://review.gluster.org/13492
>Reviewed-by: N Balachandran <nbalacha@redhat.com>
>Smoke: Gluster Build System <jenkins@build.gluster.com>
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
>Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
>CentOS-regression: Gluster Build System <jenkins@build.gluster.com>

>Change-Id: Ia847e7115d2296ae9811b14a956f3b6bf39bd86d
>BUG: 1333645

Change-Id: Ief799bdd19086dca614505452ed96493d6f5f8e7
BUG: 1306194
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Signed-off-by: N Balachandran <nbalacha@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/73806
---
 xlators/cluster/dht/src/dht-common.c     |    5 +-
 xlators/cluster/dht/src/dht-common.h     |   11 ++
 xlators/cluster/dht/src/dht-helper.c     |  155 ++++++++++++++++++++++++++++++
 xlators/cluster/dht/src/dht-inode-read.c |   62 ++++++++----
 xlators/cluster/dht/src/dht-messages.h   |    8 ++-
 5 files changed, 220 insertions(+), 21 deletions(-)

diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 594eca0..fa053de 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -8406,7 +8406,10 @@ dht_entrylk_cbk (call_frame_t *frame, void *cookie,
         return 0;
 }
 
-
+/* TODO
+ * Sending entrylk to cached subvol can result in stale lock
+ * as described in the bug 1311002.
+ */
 int
 dht_entrylk (call_frame_t *frame, xlator_t *this,
              const char *volume, loc_t *loc, const char *basename,
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 058b149..9792189 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -106,6 +106,7 @@ typedef struct dht_stat_time dht_stat_time_t;
 struct dht_inode_ctx {
         dht_layout_t    *layout;
         dht_stat_time_t  time;
+        xlator_t        *lock_subvol;
 };
 
 typedef struct dht_inode_ctx dht_inode_ctx_t;
@@ -288,6 +289,8 @@ struct dht_local {
                 int                 op_errno;
         } lock;
 
+        short           lock_type;
+
         call_stub_t *stub;
         int32_t      parent_disk_layout[4];
 };
@@ -1208,4 +1211,12 @@ dht_release (xlator_t *this, fd_t *fd);
 
 int32_t
 dht_set_fixed_dir_stat (struct iatt *stat);
+
+xlator_t*
+dht_get_lock_subvolume (xlator_t *this, struct gf_flock *lock,
+                        dht_local_t *local);
+
+int
+dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret);
+
 #endif/* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 81d49db..ac70a66 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -2300,3 +2300,158 @@ dht_heal_full_path_done (int op_ret, call_frame_t *heal_frame, void *data)
         DHT_STACK_DESTROY (heal_frame);
         return 0;
 }
+
+/* This function must be called inside an inode lock */
+int
+__dht_lock_subvol_set (inode_t *inode, xlator_t *this,
+                       xlator_t *lock_subvol)
+{
+        dht_inode_ctx_t         *ctx            = NULL;
+        int                      ret            = -1;
+        uint64_t                 value          = 0;
+
+        GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+        ret = __inode_ctx_get0 (inode, this, &value);
+        if (ret || !value) {
+                return -1;
+        }
+
+        ctx = (dht_inode_ctx_t *) value;
+        ctx->lock_subvol = lock_subvol;
+out:
+        return ret;
+}
+
+xlator_t*
+dht_get_lock_subvolume (xlator_t *this, struct gf_flock *lock,
+                        dht_local_t *local)
+{
+        xlator_t                *subvol                  = NULL;
+        inode_t                 *inode                   = NULL;
+        int32_t                  ret                     = -1;
+        uint64_t                 value                   = 0;
+        xlator_t                *cached_subvol           = NULL;
+        dht_inode_ctx_t         *ctx                     = NULL;
+        char                     gfid[GF_UUID_BUF_SIZE]  = {0};
+
+        GF_VALIDATE_OR_GOTO (this->name, lock, out);
+        GF_VALIDATE_OR_GOTO (this->name, local, out);
+
+        cached_subvol = local->cached_subvol;
+
+        if (local->loc.inode || local->fd) {
+                inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+        }
+
+        if (!inode)
+                goto out;
+
+        if (!(IA_ISDIR (inode->ia_type) || IA_ISINVAL (inode->ia_type))) {
+                /*
+                 * We may get non-linked inode for directories as part
+                 * of the selfheal code path. So checking  for IA_INVAL
+                 * type also. This will only happen for directory.
+                 */
+                subvol = local->cached_subvol;
+                goto out;
+        }
+
+        if (lock->l_type != F_UNLCK) {
+                /*
+                 * inode purging might happen on NFS between a lk
+                 * and unlk. Due to this lk and unlk might be sent
+                 * to different subvols.
+                 * So during a lock request, taking a ref on inode
+                 * to prevent inode purging. inode unref will happen
+                 * in unlock cbk code path.
+                 */
+                inode_ref (inode);
+        }
+
+        LOCK (&inode->lock);
+                ret = __inode_ctx_get0 (inode, this, &value);
+                if (!ret && value) {
+                        ctx = (dht_inode_ctx_t *) value;
+                        subvol = ctx->lock_subvol;
+                }
+                if (!subvol && lock->l_type != F_UNLCK && cached_subvol) {
+                        ret = __dht_lock_subvol_set (inode, this,
+                                                     cached_subvol);
+                        if (ret) {
+                                gf_uuid_unparse(inode->gfid, gfid);
+                                gf_msg (this->name, GF_LOG_WARNING, 0,
+                                        DHT_MSG_SET_INODE_CTX_FAILED,
+                                        "Failed to set lock_subvol in "
+                                        "inode ctx for gfid %s",
+                                        gfid);
+                                goto unlock;
+                        }
+                        subvol = cached_subvol;
+                }
+unlock:
+        UNLOCK (&inode->lock);
+        if (!subvol && inode && lock->l_type != F_UNLCK) {
+                inode_unref (inode);
+        }
+out:
+        return subvol;
+}
+
+int
+dht_lk_inode_unref (call_frame_t *frame, int32_t op_ret)
+{
+        int                     ret                     = -1;
+        dht_local_t            *local                   = NULL;
+        inode_t                *inode                   = NULL;
+        xlator_t               *this                    = NULL;
+        char                    gfid[GF_UUID_BUF_SIZE]  = {0};
+
+        local = frame->local;
+        this = frame->this;
+
+        if (local->loc.inode || local->fd) {
+                inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+        }
+        if (!inode) {
+                gf_msg (this->name, GF_LOG_WARNING, 0,
+                        DHT_MSG_LOCK_INODE_UNREF_FAILED,
+                        "Found a NULL inode. Failed to unref the inode");
+                goto out;
+        }
+
+        if (!(IA_ISDIR (inode->ia_type) || IA_ISINVAL (inode->ia_type))) {
+                ret = 0;
+                goto out;
+        }
+
+        switch (local->lock_type) {
+        case F_RDLCK:
+        case F_WRLCK:
+                if (op_ret) {
+                        gf_uuid_unparse(inode->gfid, gfid);
+                        gf_msg_debug (this->name, 0,
+                                "lock request failed for gfid %s", gfid);
+                        inode_unref (inode);
+                        goto out;
+                }
+                break;
+
+        case F_UNLCK:
+                if (!op_ret) {
+                        inode_unref (inode);
+                } else {
+                        gf_uuid_unparse(inode->gfid, gfid);
+                        gf_msg (this->name, GF_LOG_WARNING, 0,
+                                DHT_MSG_LOCK_INODE_UNREF_FAILED,
+                                "Unlock request failed for gfid %s."
+                                "Failed to unref the inode", gfid);
+                        goto out;
+                }
+        default:
+                break;
+        }
+        ret = 0;
+out:
+        return ret;
+}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index f038d40..9172cec 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -947,6 +947,7 @@ int
 dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
             int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
 {
+        dht_lk_inode_unref (frame, op_ret);
         DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
 
         return 0;
@@ -957,16 +958,24 @@ int
 dht_lk (call_frame_t *frame, xlator_t *this,
         fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
 {
-        xlator_t     *subvol = NULL;
-        int           op_errno = -1;
+        xlator_t        *lock_subvol       = NULL;
+        dht_local_t     *local             = NULL;
+        int              op_errno          = -1;
 
 
         VALIDATE_OR_GOTO (frame, err);
         VALIDATE_OR_GOTO (this, err);
         VALIDATE_OR_GOTO (fd, err);
 
-        subvol = dht_subvol_get_cached (this, fd->inode);
-        if (!subvol) {
+        local = dht_local_init (frame, NULL, fd, GF_FOP_LK);
+        if (!local) {
+                op_errno = ENOMEM;
+                goto err;
+        }
+
+        local->lock_type = flock->l_type;
+        lock_subvol = dht_get_lock_subvolume (this, flock, local);
+        if (!lock_subvol) {
                 gf_msg_debug (this->name, 0,
                               "no cached subvolume for fd=%p", fd);
                 op_errno = EINVAL;
@@ -974,7 +983,7 @@ dht_lk (call_frame_t *frame, xlator_t *this,
         }
 
         /* TODO: for rebalance, we need to preserve the fop arguments */
-        STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
+        STACK_WIND (frame, dht_lk_cbk, lock_subvol, lock_subvol->fops->lk, fd,
                     cmd, flock, xdata);
 
         return 0;
@@ -1158,6 +1167,7 @@ dht_inodelk_cbk (call_frame_t *frame, void *cookie,
                  xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
 
 {
+        dht_lk_inode_unref (frame, op_ret);
         DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
         return 0;
 }
@@ -1167,9 +1177,9 @@ int32_t
 dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
              loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
 {
-        xlator_t     *subvol = NULL;
-        int           op_errno = -1;
-        dht_local_t  *local = NULL;
+        xlator_t        *lock_subvol            = NULL;
+        int              op_errno               = -1;
+        dht_local_t     *local                  = NULL;
 
 
         VALIDATE_OR_GOTO (frame, err);
@@ -1183,10 +1193,11 @@ dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
                 goto err;
         }
 
-        subvol = local->cached_subvol;
-        if (!subvol) {
+        local->lock_type = lock->l_type;
+        lock_subvol = dht_get_lock_subvolume (this, lock, local);
+        if (!lock_subvol) {
                 gf_msg_debug (this->name, 0,
-                              "no cached subvolume for path=%s", loc->path);
+                              "no lock subvolume for path=%s", loc->path);
                 op_errno = EINVAL;
                 goto err;
         }
@@ -1195,7 +1206,7 @@ dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
 
         STACK_WIND (frame,
                     dht_inodelk_cbk,
-                    subvol, subvol->fops->inodelk,
+                    lock_subvol, lock_subvol->fops->inodelk,
                     volume, loc, cmd, lock, xdata);
 
         return 0;
@@ -1207,12 +1218,13 @@ err:
         return 0;
 }
 
-
 int
 dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                   int32_t op_ret, int32_t op_errno, dict_t *xdata)
 
 {
+
+        dht_lk_inode_unref (frame, op_ret);
         DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
         return 0;
 }
@@ -1222,23 +1234,35 @@ int
 dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
               fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
 {
-        xlator_t     *subvol = NULL;
-        int           op_errno = -1;
+        xlator_t     *lock_subvol       = NULL;
+        dht_local_t  *local             = NULL;
+        int           op_errno          = -1;
+
 
         VALIDATE_OR_GOTO (frame, err);
         VALIDATE_OR_GOTO (this, err);
         VALIDATE_OR_GOTO (fd, err);
 
-        subvol = dht_subvol_get_cached (this, fd->inode);
-        if (!subvol) {
+        local = dht_local_init (frame, NULL, fd, GF_FOP_INODELK);
+        if (!local) {
+                op_errno = ENOMEM;
+                goto err;
+        }
+
+        local->call_cnt = 1;
+        local->lock_type = lock->l_type;
+
+        lock_subvol = dht_get_lock_subvolume (this, lock, local);
+        if (!lock_subvol) {
                 gf_msg_debug (this->name, 0,
-                              "no cached subvolume for fd=%p", fd);
+                              "no lock subvolume for fd=%p", fd);
                 op_errno = EINVAL;
                 goto err;
         }
 
 
-        STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+        STACK_WIND (frame, dht_finodelk_cbk, lock_subvol,
+                    lock_subvol->fops->finodelk,
                     volume, fd, cmd, lock, xdata);
 
         return 0;
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
index eb0f1c8..6bc7237 100644
--- a/xlators/cluster/dht/src/dht-messages.h
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -45,7 +45,7 @@
  */
 
 #define GLFS_DHT_BASE                   GLFS_MSGID_COMP_DHT
-#define GLFS_DHT_NUM_MESSAGES           114
+#define GLFS_DHT_NUM_MESSAGES           116
 #define GLFS_MSGID_END          (GLFS_DHT_BASE + GLFS_DHT_NUM_MESSAGES + 1)
 
 /* Messages with message IDs */
@@ -1057,6 +1057,12 @@
  */
 #define DHT_MSG_PARENT_LAYOUT_CHANGED  (GLFS_DHT_BASE + 114)
 
+/*
+ * @messageid 109116
+ * @diagnosis
+ * @recommendedaction None
+ */
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED  (GLFS_DHT_BASE + 116)
 
 #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
 #endif /* _DHT_MESSAGES_H_ */
-- 
1.7.1