From 27465e9f8b567db4a5265b1cfd0f08f300667416 Mon Sep 17 00:00:00 2001 From: Raghavendra Bhat Date: Tue, 26 May 2015 19:22:14 +0530 Subject: [PATCH 185/190] features/bit-rot-stub: deny access to bad objects Backport of http://review.gluster.org/11126 * Access to bad objects (especially operations such as open, readv, writev) should be denied to prevent applications from getting wrong data. * Do not allow anyone apart from scrubber to set bad object xattr. * Do not allow bad object xattr to be removed. Change-Id: Id4e43b8318a7b0822231485c60bbc551b9adf7e8 BUG: 1224227 Signed-off-by: Raghavendra Bhat Reviewed-on: https://code.engineering.redhat.com/gerrit/51757 Reviewed-by: Venky Shankar Tested-by: Venky Shankar --- libglusterfs/src/glusterfs.h | 3 + xlators/features/bit-rot/src/bitd/bit-rot.c | 7 +- xlators/features/bit-rot/src/stub/bit-rot-common.h | 15 +- .../bit-rot/src/stub/bit-rot-stub-messages.h | 28 ++ xlators/features/bit-rot/src/stub/bit-rot-stub.c | 310 ++++++++++++++++++-- xlators/features/bit-rot/src/stub/bit-rot-stub.h | 94 ++++++ xlators/performance/quick-read/src/quick-read.c | 5 + 7 files changed, 430 insertions(+), 32 deletions(-) diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index c00bf55..97965ab 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -127,6 +127,9 @@ #define BITROT_CURRENT_VERSION_KEY "trusted.bit-rot.version" #define BITROT_SIGNING_VERSION_KEY "trusted.bit-rot.signature" +/* globally usable bad file marker */ +#define GLUSTERFS_BAD_INODE "glusterfs.bad-inode" + /* on-disk size of signing xattr (not the signature itself) */ #define BITROT_SIGNING_XATTR_SIZE_KEY "trusted.glusterfs.bit-rot.size" diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c index cf9e8e2..94063cb 100644 --- a/xlators/features/bit-rot/src/bitd/bit-rot.c +++ b/xlators/features/bit-rot/src/bitd/bit-rot.c @@ -164,11 +164,10 @@ bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd) if (fd) ret = syncop_fgetxattr (child->xl, fd, &xattr, - "trusted.glusterfs.bad-file", NULL, - NULL); + BITROT_OBJECT_BAD_KEY, NULL, NULL); else if (loc) - ret = syncop_getxattr (child->xl, loc, &xattr, - "trusted.glusterfs.bad-file", NULL, + ret = syncop_getxattr (child->xl, loc, + &xattr, BITROT_OBJECT_BAD_KEY, NULL, NULL); if (!ret) { diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h index a8285d2..f8d03de 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-common.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h @@ -41,12 +41,23 @@ typedef enum br_sign_state { } br_sign_state_t; static inline br_vxattr_status_t -br_version_xattr_state (dict_t *xattr, - br_version_t **obuf, br_signature_t **sbuf) +br_version_xattr_state (dict_t *xattr, br_version_t **obuf, + br_signature_t **sbuf, gf_boolean_t *objbad) { int32_t ret = 0; int32_t vxattr = 0; br_vxattr_status_t status; + void *data = NULL; + + /** + * The key being present in the dict indicates the xattr was set on + * disk. The presence of xattr itself as of now is suffecient to say + * the the object is bad. + */ + *objbad = _gf_false; + ret = dict_get_bin (xattr, BITROT_OBJECT_BAD_KEY, (void **)&data); + if (!ret) + *objbad = _gf_true; ret = dict_get_bin (xattr, BITROT_CURRENT_VERSION_KEY, (void **)obuf); if (ret) diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index d940b65..db5736a 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -153,6 +153,34 @@ * @recommendedaction * */ +#define BRS_MSG_BAD_OBJ_MARK_FAIL (GLFS_BITROT_STUB_BASE + 16) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK (GLFS_BITROT_STUB_BASE + 17) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define BRS_MSG_REMOVE_BAD_OBJECT_XATTR (GLFS_BITROT_STUB_BASE + 18) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define BRS_MSG_BAD_OBJECT_ACCESS (GLFS_BITROT_STUB_BASE + 20) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" #endif /* !_BITROT_STUB_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 600eb80..de81510 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -237,7 +237,8 @@ br_stub_prepare_signing_request (dict_t *dict, */ static inline int br_stub_init_inode_versions (xlator_t *this, fd_t *fd, inode_t *inode, - unsigned long version, gf_boolean_t markdirty) + unsigned long version, gf_boolean_t markdirty, + gf_boolean_t bad_object) { int32_t ret = 0; br_stub_inode_ctx_t *ctx = NULL; @@ -252,17 +253,21 @@ br_stub_init_inode_versions (xlator_t *this, fd_t *fd, inode_t *inode, : __br_stub_mark_inode_synced (ctx); __br_stub_set_ongoing_version (ctx, version); + if (bad_object) + __br_stub_mark_object_bad (ctx); + if (fd) { ret = br_stub_add_fd_to_inode (this, fd, ctx); if (ret) goto free_ctx; } + ret = br_stub_set_inode_ctx (this, inode, ctx); if (ret) goto free_ctx; return 0; - free_ctx: +free_ctx: GF_FREE (ctx); error_return: return -1; @@ -290,7 +295,7 @@ br_stub_mod_inode_versions (xlator_t *this, ret = 0; } - unblock: +unblock: UNLOCK (&inode->lock); return ret; @@ -623,7 +628,7 @@ int32_t br_stub_perform_objsign (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, int flags, dict_t *xdata) { - STACK_WIND (frame, default_setxattr_cbk, + STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, xdata); @@ -900,13 +905,101 @@ br_stub_handle_object_reopen (call_frame_t *frame, STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); } +/** + * This function only handles bad file identification. Instead of checking in + * fops like open, readv, writev whether the object is bad or not by doing + * getxattr calls, better to catch them when scrubber marks it as bad. + * So this callback is called only when the fsetxattr is sent by the scrubber + * to mark the object as bad. + */ +int +br_stub_fsetxattr_bad_object_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + br_stub_local_t *local = NULL; + int32_t ret = -1; + + local = frame->local; + frame->local = NULL; + + if (op_ret < 0) + goto unwind; + + /* + * What to do if marking the object as bad fails? (i.e. in memory + * marking within the inode context. If we are here means fsetxattr + * fop has succeeded on disk and the bad object xattr has been set). + * We can return failure to scruber, but there is nothing the scrubber + * can do with it (it might assume that the on disk setxattr itself has + * failed). The main purpose of this operation is to help identify the + * bad object by checking the inode context itself (thus avoiding the + * necessity of doing a getxattr fop on the disk). + * + * So as of now, success itself is being returned even though inode + * context set operation fails. + * In future if there is any change in the policy which can handle this, + * then appropriate response should be sent (i.e. success or error). + */ + ret = br_stub_mark_object_bad (this, local->u.context.inode); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL, + "failed to mark object %s as bad", + uuid_utoa (local->u.context.inode->gfid)); + +unwind: + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); + br_stub_cleanup_local (local); + br_stub_dealloc_local (local); + return 0; +} + +static int32_t +br_stub_handle_bad_object_key (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) +{ + br_stub_local_t *local = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + if (frame->root->pid != GF_CLIENT_PID_SCRUB) { + gf_msg (this->name, GF_LOG_ERROR, 0, + BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, "bad object marking " + "on %s is not from the scrubber", + uuid_utoa (fd->inode->gfid)); + goto unwind; + } + + local = br_stub_alloc_local (this); + if (!local) { + gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY, + "failed to allocate memory for fsetxattr on %s", + uuid_utoa (fd->inode->gfid)); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + br_stub_fill_local (local, NULL, fd, fd->inode, + fd->inode->gfid, BR_STUB_NO_VERSIONING, 0); + frame->local = local; + + STACK_WIND (frame, br_stub_fsetxattr_bad_object_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, + xdata); + return 0; +unwind: + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); + return 0; +} + int br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, int flags, dict_t *xdata) { - int32_t ret = 0; - uint32_t val = 0; - br_isignature_t *sign = NULL; + int32_t ret = 0; + uint32_t val = 0; + br_isignature_t *sign = NULL; if (!IA_ISREG (fd->inode->ia_type)) goto wind; @@ -927,11 +1020,18 @@ br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, goto done; } - wind: - STACK_WIND (frame, default_setxattr_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd, - dict, flags, xdata); - done: + /* handle bad object */ + if (dict_get (dict, BITROT_OBJECT_BAD_KEY)) { + br_stub_handle_bad_object_key (frame, this, fd, + dict, flags, xdata); + goto done; + } + +wind: + STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, + xdata); +done: return 0; } @@ -940,6 +1040,59 @@ br_stub_fsetxattr (call_frame_t *frame, xlator_t *this, /** {{{ */ +/* {f}removexattr() */ + +int32_t +br_stub_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + if (!strcmp (BITROT_OBJECT_BAD_KEY, name)) { + gf_msg (this->name, GF_LOG_WARNING, 0, + BRS_MSG_REMOVE_BAD_OBJECT_XATTR, "Remove xattr called" + " on bad object xattr for file %s", loc->path); + goto unwind; + } + + STACK_WIND_TAIL (frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, name, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int32_t +br_stub_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + if (!strcmp (BITROT_OBJECT_BAD_KEY, name)) { + gf_msg (this->name, GF_LOG_WARNING, 0, + BRS_MSG_REMOVE_BAD_OBJECT_XATTR, "Remove xattr called" + " on bad object xattr for inode %s", + uuid_utoa (fd->inode->gfid)); + goto unwind; + } + + STACK_WIND_TAIL (frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, + fd, name, xdata); + return 0; +unwind: + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL); + return 0; +} + +/** }}} */ + +/** {{{ */ + /* {f}getxattr() */ int @@ -1044,6 +1197,7 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, br_vxattr_status_t status; br_stub_local_t *local = NULL; inode_t *inode = NULL; + gf_boolean_t bad_object = _gf_false; if (op_ret < 0) goto unwind; @@ -1055,7 +1209,11 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, inode = local->u.context.inode; op_ret = -1; - status = br_version_xattr_state (xattr, &obuf, &sbuf); + status = br_version_xattr_state (xattr, &obuf, &sbuf, &bad_object); + + op_errno = EIO; + if (bad_object) + goto delkeys; op_errno = EINVAL; if (status == BR_VXATTR_STATUS_INVALID) @@ -1286,6 +1444,31 @@ unwind: return 0; } +int32_t +br_stub_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO (this->name, frame, unwind); + GF_VALIDATE_OR_GOTO (this->name, fd, unwind); + GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind); + + BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind); + + STACK_WIND_TAIL (frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, + flags, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, NULL, 0, NULL, + NULL, NULL); + return 0; +} + /** * The first write response on the first fd in the list of fds will set * the flag to indicate that the inode is modified. The subsequent write @@ -1367,6 +1550,8 @@ br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret) goto unwind; + BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind); + /** * The inode is not dirty and also witnessed atleast one successful * modification operation. Therefore, subsequent operations need not @@ -1486,6 +1671,8 @@ br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret) goto unwind; + BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind); + if (!inc_version && modified) goto wind; @@ -1616,6 +1803,8 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret) goto cleanup_fd; + BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind); + if (!inc_version && modified) goto wind; @@ -1689,15 +1878,14 @@ br_stub_open (call_frame_t *frame, xlator_t *this, int32_t ret = -1; br_stub_inode_ctx_t *ctx = NULL; uint64_t ctx_addr = 0; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); GF_VALIDATE_OR_GOTO (this->name, loc, unwind); GF_VALIDATE_OR_GOTO (this->name, fd, unwind); GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind); - if (frame->root->pid == GF_CLIENT_PID_SCRUB) - goto wind; - ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); if (ret) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -1708,6 +1896,12 @@ br_stub_open (call_frame_t *frame, xlator_t *this, } ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + + BR_STUB_HANDLE_BAD_OBJECT (this, loc->inode, op_ret, op_errno, unwind); + + if (frame->root->pid == GF_CLIENT_PID_SCRUB) + goto wind; + if (flags == O_RDONLY) goto wind; @@ -1725,7 +1919,7 @@ wind: FIRST_CHILD (this)->fops->open, loc, flags, fd, xdata); return 0; unwind: - STACK_UNWIND_STRICT (open, frame, -1, EINVAL, NULL, NULL); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, NULL, NULL); return 0; } @@ -1784,7 +1978,7 @@ br_stub_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr); if (ret < 0) { ret = br_stub_init_inode_versions (this, fd, inode, version, - _gf_true); + _gf_true, _gf_false); if (ret) { op_ret = -1; op_errno = EINVAL; @@ -1834,7 +2028,7 @@ br_stub_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; ret = br_stub_init_inode_versions (this, NULL, inode, version, - _gf_true); + _gf_true, _gf_false); /** * Like lookup, if init_inode_versions fail, return EINVAL */ @@ -1869,6 +2063,23 @@ unwind: /** }}} */ +/** + * As of now, only lookup searches for bad object xattr and marks the + * object as bad in its inode context if the xattr is present. But there + * is a possibility that, at the time of the lookup the object was not + * marked bad (i.e. bad object xattr was not set), and later its marked + * as bad. In this case, object is not bad, so when a fop such as open or + * readv or writev comes on the object, the fop will be sent downward instead + * of sending as error upwards. + * The solution for this is to do a getxattr for the below list of fops. + * lookup, readdirp, open, readv, writev. + * But doing getxattr for each of the above fops might be costly. + * So another method followed is to catch the bad file marking by the scrubber + * and set that info within the object's inode context. In this way getxattr + * calls can be avoided and bad objects can be caught instantly. Fetching the + * xattr is needed only in lookups when there is a brick restart or inode + * forget. + */ static inline int32_t br_stub_lookup_version (xlator_t *this, uuid_t gfid, inode_t *inode, dict_t *xattr) @@ -1877,6 +2088,7 @@ br_stub_lookup_version (xlator_t *this, br_version_t *obuf = NULL; br_signature_t *sbuf = NULL; br_vxattr_status_t status; + gf_boolean_t bad_object = _gf_false; /** * versioning xattrs were requested from POSIX. if available, figure @@ -1886,13 +2098,13 @@ br_stub_lookup_version (xlator_t *this, * operation (such as write(), etc..) triggers synchronization to * disk. */ - status = br_version_xattr_state (xattr, &obuf, &sbuf); - + status = br_version_xattr_state (xattr, &obuf, &sbuf, &bad_object); version = ((status == BR_VXATTR_STATUS_FULL) || (status == BR_VXATTR_STATUS_UNSIGNED)) ? obuf->ongoingversion : BITROT_DEFAULT_CURRENT_VERSION; - return br_stub_init_inode_versions (this, NULL, - inode, version, _gf_true); + + return br_stub_init_inode_versions (this, NULL, inode, version, + _gf_true, bad_object); } @@ -1975,6 +2187,9 @@ br_stub_readdirp (call_frame_t *frame, xlator_t *this, ret = dict_set_uint32 (dict, BITROT_SIGNING_VERSION_KEY, 0); if (ret) goto unwind; + ret = dict_set_uint32 (dict, BITROT_OBJECT_BAD_KEY, 0); + if (ret) + goto unwind; STACK_WIND (frame, br_stub_readdirp_cbk, FIRST_CHILD (this), FIRST_CHILD(this)->fops->readdirp, fd, size, @@ -2009,18 +2224,51 @@ br_stub_lookup_cbk (call_frame_t *frame, void *cookie, goto unwind; if (!IA_ISREG (stbuf->ia_type)) goto unwind; - if (cookie != (void *) BR_STUB_REQUEST_COOKIE) + + /** + * If the object is bad, then "bad inode" marker has to be sent back + * in resoinse, for revalidated lookups as well. Some xlators such as + * quick-read might cache the data in revalidated lookup as fresh + * lookup would anyway have sent "bad inode" marker. + * In general send bad inode marker for every lookup operation on the + * bad object. + */ + if (cookie != (void *) BR_STUB_REQUEST_COOKIE) { + ret = br_stub_mark_xdata_bad_object (this, inode, xattr); + if (ret) { + op_ret = -1; + op_errno = EIO; + goto unwind; + } + goto delkey; + } ret = br_stub_lookup_version (this, stbuf->ia_gfid, inode, xattr); if (ret < 0) { op_ret = -1; op_errno = EINVAL; + goto delkey; + } + + /** + * If the object is bad, send "bad inode" marker back in response + * for xlator(s) to act accordingly (such as quick-read, etc..) + */ + ret = br_stub_mark_xdata_bad_object (this, inode, xattr); + if (ret) { + /** + * aaha! bad object, but sorry we would not + * satisfy the request on allocation failures. + */ + op_ret = -1; + op_errno = EIO; + goto unwind; } - delkey: +delkey: br_stub_remove_vxattrs (xattr); - unwind: +unwind: STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf, xattr, postparent); @@ -2037,6 +2285,10 @@ br_stub_lookup (call_frame_t *frame, uint64_t ctx_addr = 0; gf_boolean_t xref = _gf_false; + GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind); + GF_VALIDATE_OR_GOTO (this->name, loc, unwind); + GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind); + ret = br_stub_get_inode_ctx (this, loc->inode, &ctx_addr); if (ret < 0) ctx_addr = 0; @@ -2069,6 +2321,9 @@ br_stub_lookup (call_frame_t *frame, ret = dict_set_uint32 (xdata, BITROT_SIGNING_VERSION_KEY, 0); if (ret) goto unwind; + ret = dict_set_uint32 (xdata, BITROT_OBJECT_BAD_KEY, 0); + if (ret) + goto unwind; cookie = (void *) BR_STUB_REQUEST_COOKIE; wind: @@ -2335,6 +2590,9 @@ struct xlator_fops fops = { .truncate = br_stub_truncate, .ftruncate = br_stub_ftruncate, .mknod = br_stub_mknod, + .readv = br_stub_readv, + .removexattr = br_stub_removexattr, + .fremovexattr = br_stub_fremovexattr, }; struct xlator_cbks cbks = { diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h index 48c7a37..e5649fc 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h @@ -37,6 +37,7 @@ typedef struct br_stub_inode_ctx { int info_sign; struct list_head fd_list; /* list of open fds or fds participating in write operations */ + gf_boolean_t bad_object; } br_stub_inode_ctx_t; typedef struct br_stub_fd { @@ -85,6 +86,18 @@ typedef struct br_stub_private { struct mem_pool *local_pool; } br_stub_private_t; +static inline gf_boolean_t +__br_stub_is_bad_object (br_stub_inode_ctx_t *ctx) +{ + return ctx->bad_object; +} + +static inline void +__br_stub_mark_object_bad (br_stub_inode_ctx_t *ctx) +{ + ctx->bad_object = _gf_true; +} + /* inode writeback helpers */ static inline void __br_stub_mark_inode_dirty (br_stub_inode_ctx_t *ctx) @@ -370,12 +383,93 @@ static inline void br_stub_remove_vxattrs (dict_t *xattr) { if (xattr) { + dict_del (xattr, BITROT_OBJECT_BAD_KEY); dict_del (xattr, BITROT_CURRENT_VERSION_KEY); dict_del (xattr, BITROT_SIGNING_VERSION_KEY); dict_del (xattr, BITROT_SIGNING_XATTR_SIZE_KEY); } } +#define BR_STUB_HANDLE_BAD_OBJECT(this, inode, op_ret, op_errno, label) \ + do { \ + if (br_stub_is_bad_object (this, inode)) { \ + gf_msg (this->name, GF_LOG_ERROR, 0, \ + BRS_MSG_BAD_OBJECT_ACCESS, \ + "%s is a bad object. Returning", \ + uuid_utoa (inode->gfid)); \ + op_ret = -1; \ + op_errno = EIO; \ + goto label; \ + } \ + } while (0) + +static inline gf_boolean_t +br_stub_is_bad_object (xlator_t *this, inode_t *inode) +{ + gf_boolean_t bad_object = _gf_false; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + int32_t ret = -1; + + ret = br_stub_get_inode_ctx (this, inode, &ctx_addr); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_CONTEXT_FAILED, + "failed to get the inode context for the inode %s", + uuid_utoa (inode->gfid)); + goto out; + } + + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + + LOCK (&inode->lock); + { + bad_object = __br_stub_is_bad_object (ctx); + } + UNLOCK (&inode->lock); + +out: + return bad_object; +} + +static inline int32_t +br_stub_mark_object_bad (xlator_t *this, inode_t *inode) +{ + int32_t ret = -1; + uint64_t ctx_addr = 0; + br_stub_inode_ctx_t *ctx = NULL; + + ret = br_stub_get_inode_ctx (this, inode, &ctx_addr); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the " + "inode context for the inode %s", + uuid_utoa (inode->gfid)); + goto out; + } + + ctx = (br_stub_inode_ctx_t *)(long)ctx_addr; + + LOCK (&inode->lock); + { + __br_stub_mark_object_bad (ctx); + } + UNLOCK (&inode->lock); + +out: + return ret; +} + +static inline int32_t +br_stub_mark_xdata_bad_object (xlator_t *this, inode_t *inode, dict_t *xdata) +{ + int32_t ret = 0; + + if (br_stub_is_bad_object (this, inode)) + ret = dict_set_int32 (xdata, GLUSTERFS_BAD_INODE, 1); + + return ret; +} + int32_t br_stub_add_fd_to_inode (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx); diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index c6913ee..1426ae5 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -409,6 +409,11 @@ qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } + if (dict_get (xdata, GLUSTERFS_BAD_INODE)) { + qr_inode_prune (this, inode); + goto out; + } + if (dict_get (xdata, "sh-failed")) { qr_inode_prune (this, inode); goto out; -- 1.7.1