cb8e9e
From 4a5e97e0e726229f8295ad01c1dd2b97f88828b1 Mon Sep 17 00:00:00 2001
cb8e9e
From: Raghavendra Bhat <raghavendra@redhat.com>
cb8e9e
Date: Sat, 27 Jun 2015 13:17:32 +0530
cb8e9e
Subject: [PATCH 283/304] features/bit-rot-stub: fail the fop if inode context get fails
cb8e9e
cb8e9e
         Upstream reivew: Reviewed-on: http://review.gluster.org/11449
cb8e9e
cb8e9e
In stub, for fops like readv, writev etc, if the the object is bad, then the fop
cb8e9e
is denied. But for checking if the object is bad inode context should be
cb8e9e
checked. Now, if the inode context is not there, then the fop is allowed to
cb8e9e
continue. This patch fixes it and the fop is unwound with an error, if the inode
cb8e9e
context is not found.
cb8e9e
cb8e9e
> Change-Id: I5ea4d4fc1a91387f7f9d13ca8cb43c88429f02b0
cb8e9e
> BUG: 1243391
cb8e9e
> Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
cb8e9e
> Reviewed-on: http://review.gluster.org/11449
cb8e9e
> Tested-by: NetBSD Build System <jenkins@build.gluster.org>
cb8e9e
> Reviewed-by: Venky Shankar <vshankar@redhat.com>
cb8e9e
> Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
cb8e9e
cb8e9e
Change-Id: Icb07ac086b69ced60828e15bac421ef7ed31fb88
cb8e9e
BUG: 1245165
cb8e9e
Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
cb8e9e
Reviewed-on: https://code.engineering.redhat.com/gerrit/55760
cb8e9e
---
cb8e9e
 tests/bitrot/bug-internal-xattrs-check-1243391.t |   42 ++++++++++++
cb8e9e
 xlators/features/bit-rot/src/stub/bit-rot-stub.c |   77 ++++++++++++++++++++--
cb8e9e
 xlators/features/bit-rot/src/stub/bit-rot-stub.h |   42 +++++++-----
cb8e9e
 3 files changed, 138 insertions(+), 23 deletions(-)
cb8e9e
 create mode 100644 tests/bitrot/bug-internal-xattrs-check-1243391.t
cb8e9e
cb8e9e
diff --git a/tests/bitrot/bug-internal-xattrs-check-1243391.t b/tests/bitrot/bug-internal-xattrs-check-1243391.t
cb8e9e
new file mode 100644
cb8e9e
index 0000000..bc9c125
cb8e9e
--- /dev/null
cb8e9e
+++ b/tests/bitrot/bug-internal-xattrs-check-1243391.t
cb8e9e
@@ -0,0 +1,42 @@
cb8e9e
+#!/bin/bash
cb8e9e
+
cb8e9e
+. $(dirname $0)/../include.rc
cb8e9e
+. $(dirname $0)/../volume.rc
cb8e9e
+
cb8e9e
+cleanup;
cb8e9e
+
cb8e9e
+TEST glusterd
cb8e9e
+TEST pidof glusterd
cb8e9e
+
cb8e9e
+## Create a distribute volume (B=2)
cb8e9e
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
cb8e9e
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
cb8e9e
+EXPECT 'Created' volinfo_field $V0 'Status';
cb8e9e
+EXPECT '2' brick_count $V0
cb8e9e
+
cb8e9e
+
cb8e9e
+## Start the volume
cb8e9e
+TEST $CLI volume start $V0;
cb8e9e
+EXPECT 'Started' volinfo_field $V0 'Status';
cb8e9e
+
cb8e9e
+## Mount the volume
cb8e9e
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
cb8e9e
+
cb8e9e
+echo "123" >> $M0/file;
cb8e9e
+
cb8e9e
+TEST ! setfattr -n "trusted.glusterfs.set-signature" -v "123" $M0/file;
cb8e9e
+TEST ! setfattr -n "trusted.glusterfs.get-signature" -v "123" $M0/file;
cb8e9e
+
cb8e9e
+# sign xattr
cb8e9e
+TEST ! setfattr -n "trusted.bit-rot.signature" -v "123" $M0/file;
cb8e9e
+TEST ! setfattr -x "trusted.bit-rot.signature" $M0/file;
cb8e9e
+
cb8e9e
+# versioning xattr
cb8e9e
+TEST ! setfattr -n "trusted.bit-rot.version" -v "123" $M0/file;
cb8e9e
+TEST ! setfattr -x "trusted.bit-rot.version" $M0/file;
cb8e9e
+
cb8e9e
+# bad file xattr
cb8e9e
+TEST ! setfattr -n "trusted.bit-rot.bad-file" -v "123" $M0/file;
cb8e9e
+TEST ! setfattr -x "trusted.bit-rot.bad-file" $M0/file;
cb8e9e
+
cb8e9e
+cleanup;
cb8e9e
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
cb8e9e
index 3b3d4d1..52f0d5c 100644
cb8e9e
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
cb8e9e
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
cb8e9e
@@ -457,6 +457,40 @@ br_stub_mark_inode_modified (xlator_t *this, br_stub_local_t *local)
cb8e9e
 }
cb8e9e
 
cb8e9e
 /**
cb8e9e
+ * The possible return values from br_stub_is_bad_object () are:
cb8e9e
+ * 1) 0  => as per the inode context object is not bad
cb8e9e
+ * 2) -1 => Failed to get the inode context itself
cb8e9e
+ * 3) -2 => As per the inode context object is bad
cb8e9e
+ * Both -ve values means the fop which called this function is failed
cb8e9e
+ * and error is returned upwards.
cb8e9e
+ */
cb8e9e
+static int
cb8e9e
+br_stub_check_bad_object (xlator_t *this, inode_t *inode, int32_t *op_ret,
cb8e9e
+                           int32_t *op_errno)
cb8e9e
+{
cb8e9e
+        int ret = -1;
cb8e9e
+
cb8e9e
+        ret = br_stub_is_bad_object (this, inode);
cb8e9e
+        if (ret == -2) {
cb8e9e
+                gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
cb8e9e
+                        "%s is a bad object. Returning",
cb8e9e
+                        uuid_utoa (inode->gfid));
cb8e9e
+                *op_ret = -1;
cb8e9e
+                *op_errno = EIO;
cb8e9e
+        }
cb8e9e
+
cb8e9e
+        if (ret == -1) {
cb8e9e
+                gf_msg (this->name, GF_LOG_ERROR, 0,
cb8e9e
+                        BRS_MSG_GET_INODE_CONTEXT_FAILED, "could not get inode"
cb8e9e
+                        " context for %s", uuid_utoa (inode->gfid));
cb8e9e
+                *op_ret = -1;
cb8e9e
+                *op_errno = EINVAL;
cb8e9e
+        }
cb8e9e
+
cb8e9e
+        return ret;
cb8e9e
+}
cb8e9e
+
cb8e9e
+/**
cb8e9e
  * callback for inode/fd versioning
cb8e9e
  */
cb8e9e
 int
cb8e9e
@@ -1100,6 +1134,12 @@ br_stub_fsetxattr (call_frame_t *frame, xlator_t *this,
cb8e9e
                 goto done;
cb8e9e
         }
cb8e9e
 
cb8e9e
+        if (dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE)) {
cb8e9e
+                br_stub_handle_internal_xattr (frame, this, fd,
cb8e9e
+                                               GLUSTERFS_GET_OBJECT_SIGNATURE);
cb8e9e
+                goto done;
cb8e9e
+        }
cb8e9e
+
cb8e9e
         /* object reopen request */
cb8e9e
         ret = dict_get_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, &val;;
cb8e9e
         if (!ret) {
cb8e9e
@@ -1140,6 +1180,7 @@ br_stub_setxattr (call_frame_t *frame, xlator_t *this,
cb8e9e
         char    *format                    = "(%s:%s)";
cb8e9e
 
cb8e9e
         if (dict_get (dict, GLUSTERFS_SET_OBJECT_SIGNATURE) ||
cb8e9e
+            dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE) ||
cb8e9e
             dict_get (dict, BR_REOPEN_SIGN_HINT_KEY) ||
cb8e9e
             dict_get (dict, BITROT_OBJECT_BAD_KEY) ||
cb8e9e
             dict_get (dict, BITROT_SIGNING_VERSION_KEY) ||
cb8e9e
@@ -1582,13 +1623,16 @@ br_stub_readv (call_frame_t *frame, xlator_t *this,
cb8e9e
 {
cb8e9e
         int32_t              op_ret   = -1;
cb8e9e
         int32_t              op_errno = EINVAL;
cb8e9e
+        int32_t              ret      = -1;
cb8e9e
 
cb8e9e
         GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
cb8e9e
         GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
cb8e9e
         GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
cb8e9e
         GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
cb8e9e
 
cb8e9e
-        BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
cb8e9e
+        ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
cb8e9e
+        if (ret)
cb8e9e
+                goto unwind;
cb8e9e
 
cb8e9e
         STACK_WIND_TAIL (frame, FIRST_CHILD(this),
cb8e9e
                          FIRST_CHILD(this)->fops->readv, fd, size, offset,
cb8e9e
@@ -1682,7 +1726,9 @@ br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
cb8e9e
         if (ret)
cb8e9e
                 goto unwind;
cb8e9e
 
cb8e9e
-        BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
cb8e9e
+        ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
cb8e9e
+        if (ret)
cb8e9e
+                goto unwind;
cb8e9e
 
cb8e9e
         /**
cb8e9e
          * The inode is not dirty and also witnessed atleast one successful
cb8e9e
@@ -1803,7 +1849,9 @@ br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
cb8e9e
         if (ret)
cb8e9e
                 goto unwind;
cb8e9e
 
cb8e9e
-        BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
cb8e9e
+        ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
cb8e9e
+        if (ret)
cb8e9e
+                goto unwind;
cb8e9e
 
cb8e9e
         if (!inc_version && modified)
cb8e9e
                 goto wind;
cb8e9e
@@ -1935,7 +1983,9 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
cb8e9e
         if (ret)
cb8e9e
                 goto cleanup_fd;
cb8e9e
 
cb8e9e
-        BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
cb8e9e
+        ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
cb8e9e
+        if (ret)
cb8e9e
+                goto unwind;
cb8e9e
 
cb8e9e
         if (!inc_version && modified)
cb8e9e
                 goto wind;
cb8e9e
@@ -2029,7 +2079,9 @@ br_stub_open (call_frame_t *frame, xlator_t *this,
cb8e9e
 
cb8e9e
         ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
cb8e9e
 
cb8e9e
-        BR_STUB_HANDLE_BAD_OBJECT (this, loc->inode, op_ret, op_errno, unwind);
cb8e9e
+        ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
cb8e9e
+        if (ret)
cb8e9e
+                goto unwind;
cb8e9e
 
cb8e9e
         if (frame->root->pid == GF_CLIENT_PID_SCRUB)
cb8e9e
                 goto wind;
cb8e9e
@@ -2211,6 +2263,13 @@ unwind:
cb8e9e
  * calls can be avoided and bad objects can be caught instantly. Fetching the
cb8e9e
  * xattr is needed only in lookups when there is a brick restart or inode
cb8e9e
  * forget.
cb8e9e
+ *
cb8e9e
+ * If the dict (@xattr) is NULL, then how should that be handled? Fail the
cb8e9e
+ * lookup operation? Or let it continue with version being initialized to
cb8e9e
+ * BITROT_DEFAULT_CURRENT_VERSION. But what if the version was different
cb8e9e
+ * on disk (and also a right signature was there), but posix failed to
cb8e9e
+ * successfully allocate the dict? Posix does not treat call back xdata
cb8e9e
+ * creattion failure as the lookup failure.
cb8e9e
  */
cb8e9e
 static inline int32_t
cb8e9e
 br_stub_lookup_version (xlator_t *this,
cb8e9e
@@ -2235,6 +2294,14 @@ br_stub_lookup_version (xlator_t *this,
cb8e9e
                    || (status == BR_VXATTR_STATUS_UNSIGNED))
cb8e9e
                         ? obuf->ongoingversion : BITROT_DEFAULT_CURRENT_VERSION;
cb8e9e
 
cb8e9e
+        /**
cb8e9e
+         * If signature is there, but version is not therem then that status is
cb8e9e
+         * is treated as INVALID. So in that case, we should not initialize the
cb8e9e
+         * inode context with wrong version names etc.
cb8e9e
+         */
cb8e9e
+        if (status == BR_VXATTR_STATUS_INVALID)
cb8e9e
+                return -1;
cb8e9e
+
cb8e9e
         return br_stub_init_inode_versions (this, NULL, inode, version,
cb8e9e
                                             _gf_true, bad_object);
cb8e9e
 }
cb8e9e
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
cb8e9e
index e5649fc..260fe18 100644
cb8e9e
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h
cb8e9e
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
cb8e9e
@@ -390,32 +390,32 @@ br_stub_remove_vxattrs (dict_t *xattr)
cb8e9e
         }
cb8e9e
 }
cb8e9e
 
cb8e9e
-#define BR_STUB_HANDLE_BAD_OBJECT(this, inode, op_ret, op_errno, label) \
cb8e9e
-        do {                                                            \
cb8e9e
-                if (br_stub_is_bad_object (this, inode)) {              \
cb8e9e
-                        gf_msg (this->name, GF_LOG_ERROR, 0,            \
cb8e9e
-                                 BRS_MSG_BAD_OBJECT_ACCESS,             \
cb8e9e
-                                 "%s is a bad object. Returning",       \
cb8e9e
-                                 uuid_utoa (inode->gfid));              \
cb8e9e
-                        op_ret = -1;                                    \
cb8e9e
-                        op_errno = EIO;                                 \
cb8e9e
-                        goto label;                                     \
cb8e9e
-                }                                                       \
cb8e9e
-        } while (0)
cb8e9e
-
cb8e9e
-static inline gf_boolean_t
cb8e9e
+/**
cb8e9e
+ * This function returns the below values for different situations
cb8e9e
+ * 0  => as per the inode context object is not bad
cb8e9e
+ * -1 => Failed to get the inode context itself
cb8e9e
+ * -2 => As per the inode context object is bad
cb8e9e
+ * Both -ve values means the fop which called this function is failed
cb8e9e
+ * and error is returned upwards.
cb8e9e
+ * In future if needed or more errors have to be handled, then those
cb8e9e
+ * errors can be made into enums.
cb8e9e
+ */
cb8e9e
+static inline int
cb8e9e
 br_stub_is_bad_object (xlator_t *this, inode_t *inode)
cb8e9e
 {
cb8e9e
-        gf_boolean_t         bad_object = _gf_false;
cb8e9e
+        int                  bad_object = 0;
cb8e9e
+        gf_boolean_t         tmp        = _gf_false;
cb8e9e
         uint64_t             ctx_addr   = 0;
cb8e9e
         br_stub_inode_ctx_t *ctx        = NULL;
cb8e9e
         int32_t              ret        = -1;
cb8e9e
 
cb8e9e
         ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
cb8e9e
         if (ret) {
cb8e9e
-                gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_CONTEXT_FAILED,
cb8e9e
+                gf_msg (this->name, GF_LOG_ERROR, 0,
cb8e9e
+                        BRS_MSG_GET_INODE_CONTEXT_FAILED,
cb8e9e
                         "failed to get the inode context for the inode %s",
cb8e9e
                         uuid_utoa (inode->gfid));
cb8e9e
+                bad_object = -1;
cb8e9e
                 goto out;
cb8e9e
         }
cb8e9e
 
cb8e9e
@@ -423,7 +423,9 @@ br_stub_is_bad_object (xlator_t *this, inode_t *inode)
cb8e9e
 
cb8e9e
         LOCK (&inode->lock);
cb8e9e
         {
cb8e9e
-                bad_object = __br_stub_is_bad_object (ctx);
cb8e9e
+                tmp = __br_stub_is_bad_object (ctx);
cb8e9e
+                if (tmp)
cb8e9e
+                        bad_object = -2;
cb8e9e
         }
cb8e9e
         UNLOCK (&inode->lock);
cb8e9e
 
cb8e9e
@@ -459,12 +461,16 @@ out:
cb8e9e
         return ret;
cb8e9e
 }
cb8e9e
 
cb8e9e
+/**
cb8e9e
+ * There is a possibility that dict_set might fail. The o/p of dict_set is
cb8e9e
+ * given to the caller and the caller has to decide what to do.
cb8e9e
+ */
cb8e9e
 static inline int32_t
cb8e9e
 br_stub_mark_xdata_bad_object (xlator_t *this, inode_t *inode, dict_t *xdata)
cb8e9e
 {
cb8e9e
         int32_t    ret = 0;
cb8e9e
 
cb8e9e
-        if (br_stub_is_bad_object (this, inode))
cb8e9e
+        if (br_stub_is_bad_object (this, inode) == -2)
cb8e9e
                 ret = dict_set_int32 (xdata, GLUSTERFS_BAD_INODE, 1);
cb8e9e
 
cb8e9e
         return ret;
cb8e9e
-- 
cb8e9e
1.7.1
cb8e9e