|
|
cb8e9e |
From 4a5e97e0e726229f8295ad01c1dd2b97f88828b1 Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
cb8e9e |
Date: Sat, 27 Jun 2015 13:17:32 +0530
|
|
|
cb8e9e |
Subject: [PATCH 283/304] features/bit-rot-stub: fail the fop if inode context get fails
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Upstream reivew: Reviewed-on: http://review.gluster.org/11449
|
|
|
cb8e9e |
|
|
|
cb8e9e |
In stub, for fops like readv, writev etc, if the the object is bad, then the fop
|
|
|
cb8e9e |
is denied. But for checking if the object is bad inode context should be
|
|
|
cb8e9e |
checked. Now, if the inode context is not there, then the fop is allowed to
|
|
|
cb8e9e |
continue. This patch fixes it and the fop is unwound with an error, if the inode
|
|
|
cb8e9e |
context is not found.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
> Change-Id: I5ea4d4fc1a91387f7f9d13ca8cb43c88429f02b0
|
|
|
cb8e9e |
> BUG: 1243391
|
|
|
cb8e9e |
> Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
cb8e9e |
> Reviewed-on: http://review.gluster.org/11449
|
|
|
cb8e9e |
> Tested-by: NetBSD Build System <jenkins@build.gluster.org>
|
|
|
cb8e9e |
> Reviewed-by: Venky Shankar <vshankar@redhat.com>
|
|
|
cb8e9e |
> Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: Icb07ac086b69ced60828e15bac421ef7ed31fb88
|
|
|
cb8e9e |
BUG: 1245165
|
|
|
cb8e9e |
Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/55760
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
tests/bitrot/bug-internal-xattrs-check-1243391.t | 42 ++++++++++++
|
|
|
cb8e9e |
xlators/features/bit-rot/src/stub/bit-rot-stub.c | 77 ++++++++++++++++++++--
|
|
|
cb8e9e |
xlators/features/bit-rot/src/stub/bit-rot-stub.h | 42 +++++++-----
|
|
|
cb8e9e |
3 files changed, 138 insertions(+), 23 deletions(-)
|
|
|
cb8e9e |
create mode 100644 tests/bitrot/bug-internal-xattrs-check-1243391.t
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/tests/bitrot/bug-internal-xattrs-check-1243391.t b/tests/bitrot/bug-internal-xattrs-check-1243391.t
|
|
|
cb8e9e |
new file mode 100644
|
|
|
cb8e9e |
index 0000000..bc9c125
|
|
|
cb8e9e |
--- /dev/null
|
|
|
cb8e9e |
+++ b/tests/bitrot/bug-internal-xattrs-check-1243391.t
|
|
|
cb8e9e |
@@ -0,0 +1,42 @@
|
|
|
cb8e9e |
+#!/bin/bash
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+. $(dirname $0)/../include.rc
|
|
|
cb8e9e |
+. $(dirname $0)/../volume.rc
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+cleanup;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+TEST glusterd
|
|
|
cb8e9e |
+TEST pidof glusterd
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+## Create a distribute volume (B=2)
|
|
|
cb8e9e |
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
|
|
|
cb8e9e |
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
|
|
|
cb8e9e |
+EXPECT 'Created' volinfo_field $V0 'Status';
|
|
|
cb8e9e |
+EXPECT '2' brick_count $V0
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+## Start the volume
|
|
|
cb8e9e |
+TEST $CLI volume start $V0;
|
|
|
cb8e9e |
+EXPECT 'Started' volinfo_field $V0 'Status';
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+## Mount the volume
|
|
|
cb8e9e |
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+echo "123" >> $M0/file;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+TEST ! setfattr -n "trusted.glusterfs.set-signature" -v "123" $M0/file;
|
|
|
cb8e9e |
+TEST ! setfattr -n "trusted.glusterfs.get-signature" -v "123" $M0/file;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+# sign xattr
|
|
|
cb8e9e |
+TEST ! setfattr -n "trusted.bit-rot.signature" -v "123" $M0/file;
|
|
|
cb8e9e |
+TEST ! setfattr -x "trusted.bit-rot.signature" $M0/file;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+# versioning xattr
|
|
|
cb8e9e |
+TEST ! setfattr -n "trusted.bit-rot.version" -v "123" $M0/file;
|
|
|
cb8e9e |
+TEST ! setfattr -x "trusted.bit-rot.version" $M0/file;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+# bad file xattr
|
|
|
cb8e9e |
+TEST ! setfattr -n "trusted.bit-rot.bad-file" -v "123" $M0/file;
|
|
|
cb8e9e |
+TEST ! setfattr -x "trusted.bit-rot.bad-file" $M0/file;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+cleanup;
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
index 3b3d4d1..52f0d5c 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
@@ -457,6 +457,40 @@ br_stub_mark_inode_modified (xlator_t *this, br_stub_local_t *local)
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
+ * The possible return values from br_stub_is_bad_object () are:
|
|
|
cb8e9e |
+ * 1) 0 => as per the inode context object is not bad
|
|
|
cb8e9e |
+ * 2) -1 => Failed to get the inode context itself
|
|
|
cb8e9e |
+ * 3) -2 => As per the inode context object is bad
|
|
|
cb8e9e |
+ * Both -ve values means the fop which called this function is failed
|
|
|
cb8e9e |
+ * and error is returned upwards.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+static int
|
|
|
cb8e9e |
+br_stub_check_bad_object (xlator_t *this, inode_t *inode, int32_t *op_ret,
|
|
|
cb8e9e |
+ int32_t *op_errno)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ int ret = -1;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ ret = br_stub_is_bad_object (this, inode);
|
|
|
cb8e9e |
+ if (ret == -2) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
|
|
|
cb8e9e |
+ "%s is a bad object. Returning",
|
|
|
cb8e9e |
+ uuid_utoa (inode->gfid));
|
|
|
cb8e9e |
+ *op_ret = -1;
|
|
|
cb8e9e |
+ *op_errno = EIO;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (ret == -1) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "could not get inode"
|
|
|
cb8e9e |
+ " context for %s", uuid_utoa (inode->gfid));
|
|
|
cb8e9e |
+ *op_ret = -1;
|
|
|
cb8e9e |
+ *op_errno = EINVAL;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ return ret;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+/**
|
|
|
cb8e9e |
* callback for inode/fd versioning
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
int
|
|
|
cb8e9e |
@@ -1100,6 +1134,12 @@ br_stub_fsetxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
goto done;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ if (dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE)) {
|
|
|
cb8e9e |
+ br_stub_handle_internal_xattr (frame, this, fd,
|
|
|
cb8e9e |
+ GLUSTERFS_GET_OBJECT_SIGNATURE);
|
|
|
cb8e9e |
+ goto done;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
/* object reopen request */
|
|
|
cb8e9e |
ret = dict_get_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, &val;;
|
|
|
cb8e9e |
if (!ret) {
|
|
|
cb8e9e |
@@ -1140,6 +1180,7 @@ br_stub_setxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
char *format = "(%s:%s)";
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (dict_get (dict, GLUSTERFS_SET_OBJECT_SIGNATURE) ||
|
|
|
cb8e9e |
+ dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE) ||
|
|
|
cb8e9e |
dict_get (dict, BR_REOPEN_SIGN_HINT_KEY) ||
|
|
|
cb8e9e |
dict_get (dict, BITROT_OBJECT_BAD_KEY) ||
|
|
|
cb8e9e |
dict_get (dict, BITROT_SIGNING_VERSION_KEY) ||
|
|
|
cb8e9e |
@@ -1582,13 +1623,16 @@ br_stub_readv (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
int32_t op_ret = -1;
|
|
|
cb8e9e |
int32_t op_errno = EINVAL;
|
|
|
cb8e9e |
+ int32_t ret = -1;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
|
|
|
cb8e9e |
+ if (ret)
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
STACK_WIND_TAIL (frame, FIRST_CHILD(this),
|
|
|
cb8e9e |
FIRST_CHILD(this)->fops->readv, fd, size, offset,
|
|
|
cb8e9e |
@@ -1682,7 +1726,9 @@ br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
|
|
|
cb8e9e |
+ if (ret)
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
* The inode is not dirty and also witnessed atleast one successful
|
|
|
cb8e9e |
@@ -1803,7 +1849,9 @@ br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
|
|
|
cb8e9e |
+ if (ret)
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (!inc_version && modified)
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
@@ -1935,7 +1983,9 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto cleanup_fd;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
|
|
|
cb8e9e |
+ if (ret)
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (!inc_version && modified)
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
@@ -2029,7 +2079,9 @@ br_stub_open (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- BR_STUB_HANDLE_BAD_OBJECT (this, loc->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+ ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno);
|
|
|
cb8e9e |
+ if (ret)
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (frame->root->pid == GF_CLIENT_PID_SCRUB)
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
@@ -2211,6 +2263,13 @@ unwind:
|
|
|
cb8e9e |
* calls can be avoided and bad objects can be caught instantly. Fetching the
|
|
|
cb8e9e |
* xattr is needed only in lookups when there is a brick restart or inode
|
|
|
cb8e9e |
* forget.
|
|
|
cb8e9e |
+ *
|
|
|
cb8e9e |
+ * If the dict (@xattr) is NULL, then how should that be handled? Fail the
|
|
|
cb8e9e |
+ * lookup operation? Or let it continue with version being initialized to
|
|
|
cb8e9e |
+ * BITROT_DEFAULT_CURRENT_VERSION. But what if the version was different
|
|
|
cb8e9e |
+ * on disk (and also a right signature was there), but posix failed to
|
|
|
cb8e9e |
+ * successfully allocate the dict? Posix does not treat call back xdata
|
|
|
cb8e9e |
+ * creattion failure as the lookup failure.
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
static inline int32_t
|
|
|
cb8e9e |
br_stub_lookup_version (xlator_t *this,
|
|
|
cb8e9e |
@@ -2235,6 +2294,14 @@ br_stub_lookup_version (xlator_t *this,
|
|
|
cb8e9e |
|| (status == BR_VXATTR_STATUS_UNSIGNED))
|
|
|
cb8e9e |
? obuf->ongoingversion : BITROT_DEFAULT_CURRENT_VERSION;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ /**
|
|
|
cb8e9e |
+ * If signature is there, but version is not therem then that status is
|
|
|
cb8e9e |
+ * is treated as INVALID. So in that case, we should not initialize the
|
|
|
cb8e9e |
+ * inode context with wrong version names etc.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+ if (status == BR_VXATTR_STATUS_INVALID)
|
|
|
cb8e9e |
+ return -1;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
return br_stub_init_inode_versions (this, NULL, inode, version,
|
|
|
cb8e9e |
_gf_true, bad_object);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
|
|
|
cb8e9e |
index e5649fc..260fe18 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
|
|
|
cb8e9e |
@@ -390,32 +390,32 @@ br_stub_remove_vxattrs (dict_t *xattr)
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
-#define BR_STUB_HANDLE_BAD_OBJECT(this, inode, op_ret, op_errno, label) \
|
|
|
cb8e9e |
- do { \
|
|
|
cb8e9e |
- if (br_stub_is_bad_object (this, inode)) { \
|
|
|
cb8e9e |
- gf_msg (this->name, GF_LOG_ERROR, 0, \
|
|
|
cb8e9e |
- BRS_MSG_BAD_OBJECT_ACCESS, \
|
|
|
cb8e9e |
- "%s is a bad object. Returning", \
|
|
|
cb8e9e |
- uuid_utoa (inode->gfid)); \
|
|
|
cb8e9e |
- op_ret = -1; \
|
|
|
cb8e9e |
- op_errno = EIO; \
|
|
|
cb8e9e |
- goto label; \
|
|
|
cb8e9e |
- } \
|
|
|
cb8e9e |
- } while (0)
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
-static inline gf_boolean_t
|
|
|
cb8e9e |
+/**
|
|
|
cb8e9e |
+ * This function returns the below values for different situations
|
|
|
cb8e9e |
+ * 0 => as per the inode context object is not bad
|
|
|
cb8e9e |
+ * -1 => Failed to get the inode context itself
|
|
|
cb8e9e |
+ * -2 => As per the inode context object is bad
|
|
|
cb8e9e |
+ * Both -ve values means the fop which called this function is failed
|
|
|
cb8e9e |
+ * and error is returned upwards.
|
|
|
cb8e9e |
+ * In future if needed or more errors have to be handled, then those
|
|
|
cb8e9e |
+ * errors can be made into enums.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+static inline int
|
|
|
cb8e9e |
br_stub_is_bad_object (xlator_t *this, inode_t *inode)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- gf_boolean_t bad_object = _gf_false;
|
|
|
cb8e9e |
+ int bad_object = 0;
|
|
|
cb8e9e |
+ gf_boolean_t tmp = _gf_false;
|
|
|
cb8e9e |
uint64_t ctx_addr = 0;
|
|
|
cb8e9e |
br_stub_inode_ctx_t *ctx = NULL;
|
|
|
cb8e9e |
int32_t ret = -1;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
|
|
|
cb8e9e |
if (ret) {
|
|
|
cb8e9e |
- gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_CONTEXT_FAILED,
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
+ BRS_MSG_GET_INODE_CONTEXT_FAILED,
|
|
|
cb8e9e |
"failed to get the inode context for the inode %s",
|
|
|
cb8e9e |
uuid_utoa (inode->gfid));
|
|
|
cb8e9e |
+ bad_object = -1;
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -423,7 +423,9 @@ br_stub_is_bad_object (xlator_t *this, inode_t *inode)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
LOCK (&inode->lock);
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- bad_object = __br_stub_is_bad_object (ctx);
|
|
|
cb8e9e |
+ tmp = __br_stub_is_bad_object (ctx);
|
|
|
cb8e9e |
+ if (tmp)
|
|
|
cb8e9e |
+ bad_object = -2;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
UNLOCK (&inode->lock);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -459,12 +461,16 @@ out:
|
|
|
cb8e9e |
return ret;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+/**
|
|
|
cb8e9e |
+ * There is a possibility that dict_set might fail. The o/p of dict_set is
|
|
|
cb8e9e |
+ * given to the caller and the caller has to decide what to do.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
static inline int32_t
|
|
|
cb8e9e |
br_stub_mark_xdata_bad_object (xlator_t *this, inode_t *inode, dict_t *xdata)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
int32_t ret = 0;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (br_stub_is_bad_object (this, inode))
|
|
|
cb8e9e |
+ if (br_stub_is_bad_object (this, inode) == -2)
|
|
|
cb8e9e |
ret = dict_set_int32 (xdata, GLUSTERFS_BAD_INODE, 1);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
return ret;
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.7.1
|
|
|
cb8e9e |
|