From 4a5e97e0e726229f8295ad01c1dd2b97f88828b1 Mon Sep 17 00:00:00 2001 From: Raghavendra Bhat Date: Sat, 27 Jun 2015 13:17:32 +0530 Subject: [PATCH 283/304] features/bit-rot-stub: fail the fop if inode context get fails Upstream reivew: Reviewed-on: http://review.gluster.org/11449 In stub, for fops like readv, writev etc, if the the object is bad, then the fop is denied. But for checking if the object is bad inode context should be checked. Now, if the inode context is not there, then the fop is allowed to continue. This patch fixes it and the fop is unwound with an error, if the inode context is not found. > Change-Id: I5ea4d4fc1a91387f7f9d13ca8cb43c88429f02b0 > BUG: 1243391 > Signed-off-by: Raghavendra Bhat > Reviewed-on: http://review.gluster.org/11449 > Tested-by: NetBSD Build System > Reviewed-by: Venky Shankar > Signed-off-by: Raghavendra Bhat Change-Id: Icb07ac086b69ced60828e15bac421ef7ed31fb88 BUG: 1245165 Signed-off-by: Raghavendra Bhat Reviewed-on: https://code.engineering.redhat.com/gerrit/55760 --- tests/bitrot/bug-internal-xattrs-check-1243391.t | 42 ++++++++++++ xlators/features/bit-rot/src/stub/bit-rot-stub.c | 77 ++++++++++++++++++++-- xlators/features/bit-rot/src/stub/bit-rot-stub.h | 42 +++++++----- 3 files changed, 138 insertions(+), 23 deletions(-) create mode 100644 tests/bitrot/bug-internal-xattrs-check-1243391.t diff --git a/tests/bitrot/bug-internal-xattrs-check-1243391.t b/tests/bitrot/bug-internal-xattrs-check-1243391.t new file mode 100644 index 0000000..bc9c125 --- /dev/null +++ b/tests/bitrot/bug-internal-xattrs-check-1243391.t @@ -0,0 +1,42 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +## Create a distribute volume (B=2) +TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2; +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; +EXPECT '2' brick_count $V0 + + +## Start the volume +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +## Mount the volume +TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0; + +echo "123" >> $M0/file; + +TEST ! setfattr -n "trusted.glusterfs.set-signature" -v "123" $M0/file; +TEST ! setfattr -n "trusted.glusterfs.get-signature" -v "123" $M0/file; + +# sign xattr +TEST ! setfattr -n "trusted.bit-rot.signature" -v "123" $M0/file; +TEST ! setfattr -x "trusted.bit-rot.signature" $M0/file; + +# versioning xattr +TEST ! setfattr -n "trusted.bit-rot.version" -v "123" $M0/file; +TEST ! setfattr -x "trusted.bit-rot.version" $M0/file; + +# bad file xattr +TEST ! setfattr -n "trusted.bit-rot.bad-file" -v "123" $M0/file; +TEST ! setfattr -x "trusted.bit-rot.bad-file" $M0/file; + +cleanup; diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 3b3d4d1..52f0d5c 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -457,6 +457,40 @@ br_stub_mark_inode_modified (xlator_t *this, br_stub_local_t *local) } /** + * The possible return values from br_stub_is_bad_object () are: + * 1) 0 => as per the inode context object is not bad + * 2) -1 => Failed to get the inode context itself + * 3) -2 => As per the inode context object is bad + * Both -ve values means the fop which called this function is failed + * and error is returned upwards. + */ +static int +br_stub_check_bad_object (xlator_t *this, inode_t *inode, int32_t *op_ret, + int32_t *op_errno) +{ + int ret = -1; + + ret = br_stub_is_bad_object (this, inode); + if (ret == -2) { + gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS, + "%s is a bad object. Returning", + uuid_utoa (inode->gfid)); + *op_ret = -1; + *op_errno = EIO; + } + + if (ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "could not get inode" + " context for %s", uuid_utoa (inode->gfid)); + *op_ret = -1; + *op_errno = EINVAL; + } + + return ret; +} + +/** * callback for inode/fd versioning */ int @@ -1100,6 +1134,12 @@ br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, goto done; } + if (dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE)) { + br_stub_handle_internal_xattr (frame, this, fd, + GLUSTERFS_GET_OBJECT_SIGNATURE); + goto done; + } + /* object reopen request */ ret = dict_get_uint32 (dict, BR_REOPEN_SIGN_HINT_KEY, &val); if (!ret) { @@ -1140,6 +1180,7 @@ br_stub_setxattr (call_frame_t *frame, xlator_t *this, char *format = "(%s:%s)"; if (dict_get (dict, GLUSTERFS_SET_OBJECT_SIGNATURE) || + dict_get (dict, GLUSTERFS_GET_OBJECT_SIGNATURE) || dict_get (dict, BR_REOPEN_SIGN_HINT_KEY) || dict_get (dict, BITROT_OBJECT_BAD_KEY) || dict_get (dict, BITROT_SIGNING_VERSION_KEY) || @@ -1582,13 +1623,16 @@ br_stub_readv (call_frame_t *frame, xlator_t *this, { int32_t op_ret = -1; int32_t op_errno = EINVAL; + int32_t ret = -1; GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); GF_VALIDATE_OR_GOTO (this->name, frame, unwind); GF_VALIDATE_OR_GOTO (this->name, fd, unwind); GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind); - BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind); + ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; STACK_WIND_TAIL (frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, fd, size, offset, @@ -1682,7 +1726,9 @@ br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret) goto unwind; - BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind); + ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; /** * The inode is not dirty and also witnessed atleast one successful @@ -1803,7 +1849,9 @@ br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret) goto unwind; - BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind); + ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; if (!inc_version && modified) goto wind; @@ -1935,7 +1983,9 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret) goto cleanup_fd; - BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind); + ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; if (!inc_version && modified) goto wind; @@ -2029,7 +2079,9 @@ br_stub_open (call_frame_t *frame, xlator_t *this, ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; - BR_STUB_HANDLE_BAD_OBJECT (this, loc->inode, op_ret, op_errno, unwind); + ret = br_stub_check_bad_object (this, fd->inode, &op_ret, &op_errno); + if (ret) + goto unwind; if (frame->root->pid == GF_CLIENT_PID_SCRUB) goto wind; @@ -2211,6 +2263,13 @@ unwind: * calls can be avoided and bad objects can be caught instantly. Fetching the * xattr is needed only in lookups when there is a brick restart or inode * forget. + * + * If the dict (@xattr) is NULL, then how should that be handled? Fail the + * lookup operation? Or let it continue with version being initialized to + * BITROT_DEFAULT_CURRENT_VERSION. But what if the version was different + * on disk (and also a right signature was there), but posix failed to + * successfully allocate the dict? Posix does not treat call back xdata + * creattion failure as the lookup failure. */ static inline int32_t br_stub_lookup_version (xlator_t *this, @@ -2235,6 +2294,14 @@ br_stub_lookup_version (xlator_t *this, || (status == BR_VXATTR_STATUS_UNSIGNED)) ? obuf->ongoingversion : BITROT_DEFAULT_CURRENT_VERSION; + /** + * If signature is there, but version is not therem then that status is + * is treated as INVALID. So in that case, we should not initialize the + * inode context with wrong version names etc. + */ + if (status == BR_VXATTR_STATUS_INVALID) + return -1; + return br_stub_init_inode_versions (this, NULL, inode, version, _gf_true, bad_object); } diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h index e5649fc..260fe18 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h @@ -390,32 +390,32 @@ br_stub_remove_vxattrs (dict_t *xattr) } } -#define BR_STUB_HANDLE_BAD_OBJECT(this, inode, op_ret, op_errno, label) \ - do { \ - if (br_stub_is_bad_object (this, inode)) { \ - gf_msg (this->name, GF_LOG_ERROR, 0, \ - BRS_MSG_BAD_OBJECT_ACCESS, \ - "%s is a bad object. Returning", \ - uuid_utoa (inode->gfid)); \ - op_ret = -1; \ - op_errno = EIO; \ - goto label; \ - } \ - } while (0) - -static inline gf_boolean_t +/** + * This function returns the below values for different situations + * 0 => as per the inode context object is not bad + * -1 => Failed to get the inode context itself + * -2 => As per the inode context object is bad + * Both -ve values means the fop which called this function is failed + * and error is returned upwards. + * In future if needed or more errors have to be handled, then those + * errors can be made into enums. + */ +static inline int br_stub_is_bad_object (xlator_t *this, inode_t *inode) { - gf_boolean_t bad_object = _gf_false; + int bad_object = 0; + gf_boolean_t tmp = _gf_false; uint64_t ctx_addr = 0; br_stub_inode_ctx_t *ctx = NULL; int32_t ret = -1; ret = br_stub_get_inode_ctx (this, inode, &ctx_addr); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_CONTEXT_FAILED, + gf_msg (this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the inode context for the inode %s", uuid_utoa (inode->gfid)); + bad_object = -1; goto out; } @@ -423,7 +423,9 @@ br_stub_is_bad_object (xlator_t *this, inode_t *inode) LOCK (&inode->lock); { - bad_object = __br_stub_is_bad_object (ctx); + tmp = __br_stub_is_bad_object (ctx); + if (tmp) + bad_object = -2; } UNLOCK (&inode->lock); @@ -459,12 +461,16 @@ out: return ret; } +/** + * There is a possibility that dict_set might fail. The o/p of dict_set is + * given to the caller and the caller has to decide what to do. + */ static inline int32_t br_stub_mark_xdata_bad_object (xlator_t *this, inode_t *inode, dict_t *xdata) { int32_t ret = 0; - if (br_stub_is_bad_object (this, inode)) + if (br_stub_is_bad_object (this, inode) == -2) ret = dict_set_int32 (xdata, GLUSTERFS_BAD_INODE, 1); return ret; -- 1.7.1