|
|
cb8e9e |
From 27465e9f8b567db4a5265b1cfd0f08f300667416 Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
cb8e9e |
Date: Tue, 26 May 2015 19:22:14 +0530
|
|
|
cb8e9e |
Subject: [PATCH 185/190] features/bit-rot-stub: deny access to bad objects
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Backport of http://review.gluster.org/11126
|
|
|
cb8e9e |
|
|
|
cb8e9e |
* Access to bad objects (especially operations such as open, readv, writev)
|
|
|
cb8e9e |
should be denied to prevent applications from getting wrong data.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
* Do not allow anyone apart from scrubber to set bad object xattr.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
* Do not allow bad object xattr to be removed.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: Id4e43b8318a7b0822231485c60bbc551b9adf7e8
|
|
|
cb8e9e |
BUG: 1224227
|
|
|
cb8e9e |
Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/51757
|
|
|
cb8e9e |
Reviewed-by: Venky Shankar <vshankar@redhat.com>
|
|
|
cb8e9e |
Tested-by: Venky Shankar <vshankar@redhat.com>
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
libglusterfs/src/glusterfs.h | 3 +
|
|
|
cb8e9e |
xlators/features/bit-rot/src/bitd/bit-rot.c | 7 +-
|
|
|
cb8e9e |
xlators/features/bit-rot/src/stub/bit-rot-common.h | 15 +-
|
|
|
cb8e9e |
.../bit-rot/src/stub/bit-rot-stub-messages.h | 28 ++
|
|
|
cb8e9e |
xlators/features/bit-rot/src/stub/bit-rot-stub.c | 310 ++++++++++++++++++--
|
|
|
cb8e9e |
xlators/features/bit-rot/src/stub/bit-rot-stub.h | 94 ++++++
|
|
|
cb8e9e |
xlators/performance/quick-read/src/quick-read.c | 5 +
|
|
|
cb8e9e |
7 files changed, 430 insertions(+), 32 deletions(-)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
|
|
|
cb8e9e |
index c00bf55..97965ab 100644
|
|
|
cb8e9e |
--- a/libglusterfs/src/glusterfs.h
|
|
|
cb8e9e |
+++ b/libglusterfs/src/glusterfs.h
|
|
|
cb8e9e |
@@ -127,6 +127,9 @@
|
|
|
cb8e9e |
#define BITROT_CURRENT_VERSION_KEY "trusted.bit-rot.version"
|
|
|
cb8e9e |
#define BITROT_SIGNING_VERSION_KEY "trusted.bit-rot.signature"
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+/* globally usable bad file marker */
|
|
|
cb8e9e |
+#define GLUSTERFS_BAD_INODE "glusterfs.bad-inode"
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
/* on-disk size of signing xattr (not the signature itself) */
|
|
|
cb8e9e |
#define BITROT_SIGNING_XATTR_SIZE_KEY "trusted.glusterfs.bit-rot.size"
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
cb8e9e |
index cf9e8e2..94063cb 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
cb8e9e |
@@ -164,11 +164,10 @@ bitd_is_bad_file (xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (fd)
|
|
|
cb8e9e |
ret = syncop_fgetxattr (child->xl, fd, &xattr,
|
|
|
cb8e9e |
- "trusted.glusterfs.bad-file", NULL,
|
|
|
cb8e9e |
- NULL);
|
|
|
cb8e9e |
+ BITROT_OBJECT_BAD_KEY, NULL, NULL);
|
|
|
cb8e9e |
else if (loc)
|
|
|
cb8e9e |
- ret = syncop_getxattr (child->xl, loc, &xattr,
|
|
|
cb8e9e |
- "trusted.glusterfs.bad-file", NULL,
|
|
|
cb8e9e |
+ ret = syncop_getxattr (child->xl, loc,
|
|
|
cb8e9e |
+ &xattr, BITROT_OBJECT_BAD_KEY, NULL,
|
|
|
cb8e9e |
NULL);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (!ret) {
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h
|
|
|
cb8e9e |
index a8285d2..f8d03de 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/stub/bit-rot-common.h
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h
|
|
|
cb8e9e |
@@ -41,12 +41,23 @@ typedef enum br_sign_state {
|
|
|
cb8e9e |
} br_sign_state_t;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
static inline br_vxattr_status_t
|
|
|
cb8e9e |
-br_version_xattr_state (dict_t *xattr,
|
|
|
cb8e9e |
- br_version_t **obuf, br_signature_t **sbuf)
|
|
|
cb8e9e |
+br_version_xattr_state (dict_t *xattr, br_version_t **obuf,
|
|
|
cb8e9e |
+ br_signature_t **sbuf, gf_boolean_t *objbad)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
int32_t ret = 0;
|
|
|
cb8e9e |
int32_t vxattr = 0;
|
|
|
cb8e9e |
br_vxattr_status_t status;
|
|
|
cb8e9e |
+ void *data = NULL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ /**
|
|
|
cb8e9e |
+ * The key being present in the dict indicates the xattr was set on
|
|
|
cb8e9e |
+ * disk. The presence of xattr itself as of now is suffecient to say
|
|
|
cb8e9e |
+ * the the object is bad.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+ *objbad = _gf_false;
|
|
|
cb8e9e |
+ ret = dict_get_bin (xattr, BITROT_OBJECT_BAD_KEY, (void **)&data);
|
|
|
cb8e9e |
+ if (!ret)
|
|
|
cb8e9e |
+ *objbad = _gf_true;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ret = dict_get_bin (xattr, BITROT_CURRENT_VERSION_KEY, (void **)obuf);
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
|
|
|
cb8e9e |
index d940b65..db5736a 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
|
|
|
cb8e9e |
@@ -153,6 +153,34 @@
|
|
|
cb8e9e |
* @recommendedaction
|
|
|
cb8e9e |
*
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
+#define BRS_MSG_BAD_OBJ_MARK_FAIL (GLFS_BITROT_STUB_BASE + 16)
|
|
|
cb8e9e |
+/*!
|
|
|
cb8e9e |
+ * @messageid
|
|
|
cb8e9e |
+ * @diagnosis
|
|
|
cb8e9e |
+ * @recommendedaction
|
|
|
cb8e9e |
+ *
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK (GLFS_BITROT_STUB_BASE + 17)
|
|
|
cb8e9e |
+/*!
|
|
|
cb8e9e |
+ * @messageid
|
|
|
cb8e9e |
+ * @diagnosis
|
|
|
cb8e9e |
+ * @recommendedaction
|
|
|
cb8e9e |
+ *
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+#define BRS_MSG_REMOVE_BAD_OBJECT_XATTR (GLFS_BITROT_STUB_BASE + 18)
|
|
|
cb8e9e |
+/*!
|
|
|
cb8e9e |
+ * @messageid
|
|
|
cb8e9e |
+ * @diagnosis
|
|
|
cb8e9e |
+ * @recommendedaction
|
|
|
cb8e9e |
+ *
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+#define BRS_MSG_BAD_OBJECT_ACCESS (GLFS_BITROT_STUB_BASE + 20)
|
|
|
cb8e9e |
+/*!
|
|
|
cb8e9e |
+ * @messageid
|
|
|
cb8e9e |
+ * @diagnosis
|
|
|
cb8e9e |
+ * @recommendedaction
|
|
|
cb8e9e |
+ *
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
/*------------*/
|
|
|
cb8e9e |
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
|
|
|
cb8e9e |
#endif /* !_BITROT_STUB_MESSAGES_H_ */
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
index 600eb80..de81510 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
@@ -237,7 +237,8 @@ br_stub_prepare_signing_request (dict_t *dict,
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
static inline int
|
|
|
cb8e9e |
br_stub_init_inode_versions (xlator_t *this, fd_t *fd, inode_t *inode,
|
|
|
cb8e9e |
- unsigned long version, gf_boolean_t markdirty)
|
|
|
cb8e9e |
+ unsigned long version, gf_boolean_t markdirty,
|
|
|
cb8e9e |
+ gf_boolean_t bad_object)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
int32_t ret = 0;
|
|
|
cb8e9e |
br_stub_inode_ctx_t *ctx = NULL;
|
|
|
cb8e9e |
@@ -252,17 +253,21 @@ br_stub_init_inode_versions (xlator_t *this, fd_t *fd, inode_t *inode,
|
|
|
cb8e9e |
: __br_stub_mark_inode_synced (ctx);
|
|
|
cb8e9e |
__br_stub_set_ongoing_version (ctx, version);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ if (bad_object)
|
|
|
cb8e9e |
+ __br_stub_mark_object_bad (ctx);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
if (fd) {
|
|
|
cb8e9e |
ret = br_stub_add_fd_to_inode (this, fd, ctx);
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto free_ctx;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
ret = br_stub_set_inode_ctx (this, inode, ctx);
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto free_ctx;
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- free_ctx:
|
|
|
cb8e9e |
+free_ctx:
|
|
|
cb8e9e |
GF_FREE (ctx);
|
|
|
cb8e9e |
error_return:
|
|
|
cb8e9e |
return -1;
|
|
|
cb8e9e |
@@ -290,7 +295,7 @@ br_stub_mod_inode_versions (xlator_t *this,
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ret = 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
- unblock:
|
|
|
cb8e9e |
+unblock:
|
|
|
cb8e9e |
UNLOCK (&inode->lock);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
return ret;
|
|
|
cb8e9e |
@@ -623,7 +628,7 @@ int32_t
|
|
|
cb8e9e |
br_stub_perform_objsign (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
fd_t *fd, dict_t *dict, int flags, dict_t *xdata)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- STACK_WIND (frame, default_setxattr_cbk,
|
|
|
cb8e9e |
+ STACK_WIND (frame, default_fsetxattr_cbk,
|
|
|
cb8e9e |
FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd,
|
|
|
cb8e9e |
dict, flags, xdata);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -900,13 +905,101 @@ br_stub_handle_object_reopen (call_frame_t *frame,
|
|
|
cb8e9e |
STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+/**
|
|
|
cb8e9e |
+ * This function only handles bad file identification. Instead of checking in
|
|
|
cb8e9e |
+ * fops like open, readv, writev whether the object is bad or not by doing
|
|
|
cb8e9e |
+ * getxattr calls, better to catch them when scrubber marks it as bad.
|
|
|
cb8e9e |
+ * So this callback is called only when the fsetxattr is sent by the scrubber
|
|
|
cb8e9e |
+ * to mark the object as bad.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+int
|
|
|
cb8e9e |
+br_stub_fsetxattr_bad_object_cbk (call_frame_t *frame, void *cookie,
|
|
|
cb8e9e |
+ xlator_t *this, int32_t op_ret,
|
|
|
cb8e9e |
+ int32_t op_errno, dict_t *xdata)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ br_stub_local_t *local = NULL;
|
|
|
cb8e9e |
+ int32_t ret = -1;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ local = frame->local;
|
|
|
cb8e9e |
+ frame->local = NULL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (op_ret < 0)
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ /*
|
|
|
cb8e9e |
+ * What to do if marking the object as bad fails? (i.e. in memory
|
|
|
cb8e9e |
+ * marking within the inode context. If we are here means fsetxattr
|
|
|
cb8e9e |
+ * fop has succeeded on disk and the bad object xattr has been set).
|
|
|
cb8e9e |
+ * We can return failure to scruber, but there is nothing the scrubber
|
|
|
cb8e9e |
+ * can do with it (it might assume that the on disk setxattr itself has
|
|
|
cb8e9e |
+ * failed). The main purpose of this operation is to help identify the
|
|
|
cb8e9e |
+ * bad object by checking the inode context itself (thus avoiding the
|
|
|
cb8e9e |
+ * necessity of doing a getxattr fop on the disk).
|
|
|
cb8e9e |
+ *
|
|
|
cb8e9e |
+ * So as of now, success itself is being returned even though inode
|
|
|
cb8e9e |
+ * context set operation fails.
|
|
|
cb8e9e |
+ * In future if there is any change in the policy which can handle this,
|
|
|
cb8e9e |
+ * then appropriate response should be sent (i.e. success or error).
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+ ret = br_stub_mark_object_bad (this, local->u.context.inode);
|
|
|
cb8e9e |
+ if (ret)
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL,
|
|
|
cb8e9e |
+ "failed to mark object %s as bad",
|
|
|
cb8e9e |
+ uuid_utoa (local->u.context.inode->gfid));
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+unwind:
|
|
|
cb8e9e |
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
|
|
|
cb8e9e |
+ br_stub_cleanup_local (local);
|
|
|
cb8e9e |
+ br_stub_dealloc_local (local);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+static int32_t
|
|
|
cb8e9e |
+br_stub_handle_bad_object_key (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
cb8e9e |
+ dict_t *dict, int flags, dict_t *xdata)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ br_stub_local_t *local = NULL;
|
|
|
cb8e9e |
+ int32_t op_ret = -1;
|
|
|
cb8e9e |
+ int32_t op_errno = EINVAL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (frame->root->pid != GF_CLIENT_PID_SCRUB) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
+ BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, "bad object marking "
|
|
|
cb8e9e |
+ "on %s is not from the scrubber",
|
|
|
cb8e9e |
+ uuid_utoa (fd->inode->gfid));
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ local = br_stub_alloc_local (this);
|
|
|
cb8e9e |
+ if (!local) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_MEMORY,
|
|
|
cb8e9e |
+ "failed to allocate memory for fsetxattr on %s",
|
|
|
cb8e9e |
+ uuid_utoa (fd->inode->gfid));
|
|
|
cb8e9e |
+ op_ret = -1;
|
|
|
cb8e9e |
+ op_errno = ENOMEM;
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ br_stub_fill_local (local, NULL, fd, fd->inode,
|
|
|
cb8e9e |
+ fd->inode->gfid, BR_STUB_NO_VERSIONING, 0);
|
|
|
cb8e9e |
+ frame->local = local;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ STACK_WIND (frame, br_stub_fsetxattr_bad_object_cbk, FIRST_CHILD (this),
|
|
|
cb8e9e |
+ FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags,
|
|
|
cb8e9e |
+ xdata);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+unwind:
|
|
|
cb8e9e |
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
int
|
|
|
cb8e9e |
br_stub_fsetxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
fd_t *fd, dict_t *dict, int flags, dict_t *xdata)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- int32_t ret = 0;
|
|
|
cb8e9e |
- uint32_t val = 0;
|
|
|
cb8e9e |
- br_isignature_t *sign = NULL;
|
|
|
cb8e9e |
+ int32_t ret = 0;
|
|
|
cb8e9e |
+ uint32_t val = 0;
|
|
|
cb8e9e |
+ br_isignature_t *sign = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (!IA_ISREG (fd->inode->ia_type))
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
@@ -927,11 +1020,18 @@ br_stub_fsetxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
goto done;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- wind:
|
|
|
cb8e9e |
- STACK_WIND (frame, default_setxattr_cbk,
|
|
|
cb8e9e |
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsetxattr, fd,
|
|
|
cb8e9e |
- dict, flags, xdata);
|
|
|
cb8e9e |
- done:
|
|
|
cb8e9e |
+ /* handle bad object */
|
|
|
cb8e9e |
+ if (dict_get (dict, BITROT_OBJECT_BAD_KEY)) {
|
|
|
cb8e9e |
+ br_stub_handle_bad_object_key (frame, this, fd,
|
|
|
cb8e9e |
+ dict, flags, xdata);
|
|
|
cb8e9e |
+ goto done;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+wind:
|
|
|
cb8e9e |
+ STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD (this),
|
|
|
cb8e9e |
+ FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags,
|
|
|
cb8e9e |
+ xdata);
|
|
|
cb8e9e |
+done:
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -940,6 +1040,59 @@ br_stub_fsetxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/** {{{ */
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+/* {f}removexattr() */
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+int32_t
|
|
|
cb8e9e |
+br_stub_removexattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
+ loc_t *loc, const char *name, dict_t *xdata)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ int32_t op_ret = -1;
|
|
|
cb8e9e |
+ int32_t op_errno = EINVAL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (!strcmp (BITROT_OBJECT_BAD_KEY, name)) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_WARNING, 0,
|
|
|
cb8e9e |
+ BRS_MSG_REMOVE_BAD_OBJECT_XATTR, "Remove xattr called"
|
|
|
cb8e9e |
+ " on bad object xattr for file %s", loc->path);
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
|
|
|
cb8e9e |
+ FIRST_CHILD(this)->fops->removexattr,
|
|
|
cb8e9e |
+ loc, name, xdata);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+unwind:
|
|
|
cb8e9e |
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+int32_t
|
|
|
cb8e9e |
+br_stub_fremovexattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
+ fd_t *fd, const char *name, dict_t *xdata)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ int32_t op_ret = -1;
|
|
|
cb8e9e |
+ int32_t op_errno = EINVAL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (!strcmp (BITROT_OBJECT_BAD_KEY, name)) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_WARNING, 0,
|
|
|
cb8e9e |
+ BRS_MSG_REMOVE_BAD_OBJECT_XATTR, "Remove xattr called"
|
|
|
cb8e9e |
+ " on bad object xattr for inode %s",
|
|
|
cb8e9e |
+ uuid_utoa (fd->inode->gfid));
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
|
|
|
cb8e9e |
+ FIRST_CHILD(this)->fops->fremovexattr,
|
|
|
cb8e9e |
+ fd, name, xdata);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+unwind:
|
|
|
cb8e9e |
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+/** }}} */
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+/** {{{ */
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
/* {f}getxattr() */
|
|
|
cb8e9e |
|
|
|
cb8e9e |
int
|
|
|
cb8e9e |
@@ -1044,6 +1197,7 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
br_vxattr_status_t status;
|
|
|
cb8e9e |
br_stub_local_t *local = NULL;
|
|
|
cb8e9e |
inode_t *inode = NULL;
|
|
|
cb8e9e |
+ gf_boolean_t bad_object = _gf_false;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (op_ret < 0)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
@@ -1055,7 +1209,11 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
inode = local->u.context.inode;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
op_ret = -1;
|
|
|
cb8e9e |
- status = br_version_xattr_state (xattr, &obuf, &sbuf);
|
|
|
cb8e9e |
+ status = br_version_xattr_state (xattr, &obuf, &sbuf, &bad_object);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ op_errno = EIO;
|
|
|
cb8e9e |
+ if (bad_object)
|
|
|
cb8e9e |
+ goto delkeys;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
op_errno = EINVAL;
|
|
|
cb8e9e |
if (status == BR_VXATTR_STATUS_INVALID)
|
|
|
cb8e9e |
@@ -1286,6 +1444,31 @@ unwind:
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+int32_t
|
|
|
cb8e9e |
+br_stub_readv (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ int32_t op_ret = -1;
|
|
|
cb8e9e |
+ int32_t op_errno = EINVAL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO (this->name, frame, unwind);
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ STACK_WIND_TAIL (frame, FIRST_CHILD(this),
|
|
|
cb8e9e |
+ FIRST_CHILD(this)->fops->readv, fd, size, offset,
|
|
|
cb8e9e |
+ flags, xdata);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+unwind:
|
|
|
cb8e9e |
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, NULL, 0, NULL,
|
|
|
cb8e9e |
+ NULL, NULL);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
* The first write response on the first fd in the list of fds will set
|
|
|
cb8e9e |
* the flag to indicate that the inode is modified. The subsequent write
|
|
|
cb8e9e |
@@ -1367,6 +1550,8 @@ br_stub_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
* The inode is not dirty and also witnessed atleast one successful
|
|
|
cb8e9e |
* modification operation. Therefore, subsequent operations need not
|
|
|
cb8e9e |
@@ -1486,6 +1671,8 @@ br_stub_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
if (!inc_version && modified)
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1616,6 +1803,8 @@ br_stub_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto cleanup_fd;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ BR_STUB_HANDLE_BAD_OBJECT (this, fd->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
if (!inc_version && modified)
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1689,15 +1878,14 @@ br_stub_open (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
int32_t ret = -1;
|
|
|
cb8e9e |
br_stub_inode_ctx_t *ctx = NULL;
|
|
|
cb8e9e |
uint64_t ctx_addr = 0;
|
|
|
cb8e9e |
+ int32_t op_ret = -1;
|
|
|
cb8e9e |
+ int32_t op_errno = EINVAL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO (this->name, fd->inode, unwind);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (frame->root->pid == GF_CLIENT_PID_SCRUB)
|
|
|
cb8e9e |
- goto wind;
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr);
|
|
|
cb8e9e |
if (ret) {
|
|
|
cb8e9e |
gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
@@ -1708,6 +1896,12 @@ br_stub_open (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ BR_STUB_HANDLE_BAD_OBJECT (this, loc->inode, op_ret, op_errno, unwind);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (frame->root->pid == GF_CLIENT_PID_SCRUB)
|
|
|
cb8e9e |
+ goto wind;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
if (flags == O_RDONLY)
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1725,7 +1919,7 @@ wind:
|
|
|
cb8e9e |
FIRST_CHILD (this)->fops->open, loc, flags, fd, xdata);
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
unwind:
|
|
|
cb8e9e |
- STACK_UNWIND_STRICT (open, frame, -1, EINVAL, NULL, NULL);
|
|
|
cb8e9e |
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, NULL, NULL);
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1784,7 +1978,7 @@ br_stub_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
ret = br_stub_get_inode_ctx (this, fd->inode, &ctx_addr);
|
|
|
cb8e9e |
if (ret < 0) {
|
|
|
cb8e9e |
ret = br_stub_init_inode_versions (this, fd, inode, version,
|
|
|
cb8e9e |
- _gf_true);
|
|
|
cb8e9e |
+ _gf_true, _gf_false);
|
|
|
cb8e9e |
if (ret) {
|
|
|
cb8e9e |
op_ret = -1;
|
|
|
cb8e9e |
op_errno = EINVAL;
|
|
|
cb8e9e |
@@ -1834,7 +2028,7 @@ br_stub_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ret = br_stub_init_inode_versions (this, NULL, inode, version,
|
|
|
cb8e9e |
- _gf_true);
|
|
|
cb8e9e |
+ _gf_true, _gf_false);
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
* Like lookup, if init_inode_versions fail, return EINVAL
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
@@ -1869,6 +2063,23 @@ unwind:
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/** }}} */
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+/**
|
|
|
cb8e9e |
+ * As of now, only lookup searches for bad object xattr and marks the
|
|
|
cb8e9e |
+ * object as bad in its inode context if the xattr is present. But there
|
|
|
cb8e9e |
+ * is a possibility that, at the time of the lookup the object was not
|
|
|
cb8e9e |
+ * marked bad (i.e. bad object xattr was not set), and later its marked
|
|
|
cb8e9e |
+ * as bad. In this case, object is not bad, so when a fop such as open or
|
|
|
cb8e9e |
+ * readv or writev comes on the object, the fop will be sent downward instead
|
|
|
cb8e9e |
+ * of sending as error upwards.
|
|
|
cb8e9e |
+ * The solution for this is to do a getxattr for the below list of fops.
|
|
|
cb8e9e |
+ * lookup, readdirp, open, readv, writev.
|
|
|
cb8e9e |
+ * But doing getxattr for each of the above fops might be costly.
|
|
|
cb8e9e |
+ * So another method followed is to catch the bad file marking by the scrubber
|
|
|
cb8e9e |
+ * and set that info within the object's inode context. In this way getxattr
|
|
|
cb8e9e |
+ * calls can be avoided and bad objects can be caught instantly. Fetching the
|
|
|
cb8e9e |
+ * xattr is needed only in lookups when there is a brick restart or inode
|
|
|
cb8e9e |
+ * forget.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
static inline int32_t
|
|
|
cb8e9e |
br_stub_lookup_version (xlator_t *this,
|
|
|
cb8e9e |
uuid_t gfid, inode_t *inode, dict_t *xattr)
|
|
|
cb8e9e |
@@ -1877,6 +2088,7 @@ br_stub_lookup_version (xlator_t *this,
|
|
|
cb8e9e |
br_version_t *obuf = NULL;
|
|
|
cb8e9e |
br_signature_t *sbuf = NULL;
|
|
|
cb8e9e |
br_vxattr_status_t status;
|
|
|
cb8e9e |
+ gf_boolean_t bad_object = _gf_false;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
* versioning xattrs were requested from POSIX. if available, figure
|
|
|
cb8e9e |
@@ -1886,13 +2098,13 @@ br_stub_lookup_version (xlator_t *this,
|
|
|
cb8e9e |
* operation (such as write(), etc..) triggers synchronization to
|
|
|
cb8e9e |
* disk.
|
|
|
cb8e9e |
*/
|
|
|
cb8e9e |
- status = br_version_xattr_state (xattr, &obuf, &sbuf);
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
+ status = br_version_xattr_state (xattr, &obuf, &sbuf, &bad_object);
|
|
|
cb8e9e |
version = ((status == BR_VXATTR_STATUS_FULL)
|
|
|
cb8e9e |
|| (status == BR_VXATTR_STATUS_UNSIGNED))
|
|
|
cb8e9e |
? obuf->ongoingversion : BITROT_DEFAULT_CURRENT_VERSION;
|
|
|
cb8e9e |
- return br_stub_init_inode_versions (this, NULL,
|
|
|
cb8e9e |
- inode, version, _gf_true);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ return br_stub_init_inode_versions (this, NULL, inode, version,
|
|
|
cb8e9e |
+ _gf_true, bad_object);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1975,6 +2187,9 @@ br_stub_readdirp (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
ret = dict_set_uint32 (dict, BITROT_SIGNING_VERSION_KEY, 0);
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
+ ret = dict_set_uint32 (dict, BITROT_OBJECT_BAD_KEY, 0);
|
|
|
cb8e9e |
+ if (ret)
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
STACK_WIND (frame, br_stub_readdirp_cbk, FIRST_CHILD (this),
|
|
|
cb8e9e |
FIRST_CHILD(this)->fops->readdirp, fd, size,
|
|
|
cb8e9e |
@@ -2009,18 +2224,51 @@ br_stub_lookup_cbk (call_frame_t *frame, void *cookie,
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
if (!IA_ISREG (stbuf->ia_type))
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
- if (cookie != (void *) BR_STUB_REQUEST_COOKIE)
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ /**
|
|
|
cb8e9e |
+ * If the object is bad, then "bad inode" marker has to be sent back
|
|
|
cb8e9e |
+ * in resoinse, for revalidated lookups as well. Some xlators such as
|
|
|
cb8e9e |
+ * quick-read might cache the data in revalidated lookup as fresh
|
|
|
cb8e9e |
+ * lookup would anyway have sent "bad inode" marker.
|
|
|
cb8e9e |
+ * In general send bad inode marker for every lookup operation on the
|
|
|
cb8e9e |
+ * bad object.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+ if (cookie != (void *) BR_STUB_REQUEST_COOKIE) {
|
|
|
cb8e9e |
+ ret = br_stub_mark_xdata_bad_object (this, inode, xattr);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ op_ret = -1;
|
|
|
cb8e9e |
+ op_errno = EIO;
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
goto delkey;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
|
|
|
cb8e9e |
ret = br_stub_lookup_version (this, stbuf->ia_gfid, inode, xattr);
|
|
|
cb8e9e |
if (ret < 0) {
|
|
|
cb8e9e |
op_ret = -1;
|
|
|
cb8e9e |
op_errno = EINVAL;
|
|
|
cb8e9e |
+ goto delkey;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ /**
|
|
|
cb8e9e |
+ * If the object is bad, send "bad inode" marker back in response
|
|
|
cb8e9e |
+ * for xlator(s) to act accordingly (such as quick-read, etc..)
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+ ret = br_stub_mark_xdata_bad_object (this, inode, xattr);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ /**
|
|
|
cb8e9e |
+ * aaha! bad object, but sorry we would not
|
|
|
cb8e9e |
+ * satisfy the request on allocation failures.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+ op_ret = -1;
|
|
|
cb8e9e |
+ op_errno = EIO;
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- delkey:
|
|
|
cb8e9e |
+delkey:
|
|
|
cb8e9e |
br_stub_remove_vxattrs (xattr);
|
|
|
cb8e9e |
- unwind:
|
|
|
cb8e9e |
+unwind:
|
|
|
cb8e9e |
STACK_UNWIND_STRICT (lookup, frame,
|
|
|
cb8e9e |
op_ret, op_errno, inode, stbuf, xattr, postparent);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -2037,6 +2285,10 @@ br_stub_lookup (call_frame_t *frame,
|
|
|
cb8e9e |
uint64_t ctx_addr = 0;
|
|
|
cb8e9e |
gf_boolean_t xref = _gf_false;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
ret = br_stub_get_inode_ctx (this, loc->inode, &ctx_addr);
|
|
|
cb8e9e |
if (ret < 0)
|
|
|
cb8e9e |
ctx_addr = 0;
|
|
|
cb8e9e |
@@ -2069,6 +2321,9 @@ br_stub_lookup (call_frame_t *frame,
|
|
|
cb8e9e |
ret = dict_set_uint32 (xdata, BITROT_SIGNING_VERSION_KEY, 0);
|
|
|
cb8e9e |
if (ret)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
+ ret = dict_set_uint32 (xdata, BITROT_OBJECT_BAD_KEY, 0);
|
|
|
cb8e9e |
+ if (ret)
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
cookie = (void *) BR_STUB_REQUEST_COOKIE;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
wind:
|
|
|
cb8e9e |
@@ -2335,6 +2590,9 @@ struct xlator_fops fops = {
|
|
|
cb8e9e |
.truncate = br_stub_truncate,
|
|
|
cb8e9e |
.ftruncate = br_stub_ftruncate,
|
|
|
cb8e9e |
.mknod = br_stub_mknod,
|
|
|
cb8e9e |
+ .readv = br_stub_readv,
|
|
|
cb8e9e |
+ .removexattr = br_stub_removexattr,
|
|
|
cb8e9e |
+ .fremovexattr = br_stub_fremovexattr,
|
|
|
cb8e9e |
};
|
|
|
cb8e9e |
|
|
|
cb8e9e |
struct xlator_cbks cbks = {
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
|
|
|
cb8e9e |
index 48c7a37..e5649fc 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.h
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
|
|
|
cb8e9e |
@@ -37,6 +37,7 @@ typedef struct br_stub_inode_ctx {
|
|
|
cb8e9e |
int info_sign;
|
|
|
cb8e9e |
struct list_head fd_list; /* list of open fds or fds participating in
|
|
|
cb8e9e |
write operations */
|
|
|
cb8e9e |
+ gf_boolean_t bad_object;
|
|
|
cb8e9e |
} br_stub_inode_ctx_t;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
typedef struct br_stub_fd {
|
|
|
cb8e9e |
@@ -85,6 +86,18 @@ typedef struct br_stub_private {
|
|
|
cb8e9e |
struct mem_pool *local_pool;
|
|
|
cb8e9e |
} br_stub_private_t;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+static inline gf_boolean_t
|
|
|
cb8e9e |
+__br_stub_is_bad_object (br_stub_inode_ctx_t *ctx)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ return ctx->bad_object;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+static inline void
|
|
|
cb8e9e |
+__br_stub_mark_object_bad (br_stub_inode_ctx_t *ctx)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ ctx->bad_object = _gf_true;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
/* inode writeback helpers */
|
|
|
cb8e9e |
static inline void
|
|
|
cb8e9e |
__br_stub_mark_inode_dirty (br_stub_inode_ctx_t *ctx)
|
|
|
cb8e9e |
@@ -370,12 +383,93 @@ static inline void
|
|
|
cb8e9e |
br_stub_remove_vxattrs (dict_t *xattr)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
if (xattr) {
|
|
|
cb8e9e |
+ dict_del (xattr, BITROT_OBJECT_BAD_KEY);
|
|
|
cb8e9e |
dict_del (xattr, BITROT_CURRENT_VERSION_KEY);
|
|
|
cb8e9e |
dict_del (xattr, BITROT_SIGNING_VERSION_KEY);
|
|
|
cb8e9e |
dict_del (xattr, BITROT_SIGNING_XATTR_SIZE_KEY);
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+#define BR_STUB_HANDLE_BAD_OBJECT(this, inode, op_ret, op_errno, label) \
|
|
|
cb8e9e |
+ do { \
|
|
|
cb8e9e |
+ if (br_stub_is_bad_object (this, inode)) { \
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0, \
|
|
|
cb8e9e |
+ BRS_MSG_BAD_OBJECT_ACCESS, \
|
|
|
cb8e9e |
+ "%s is a bad object. Returning", \
|
|
|
cb8e9e |
+ uuid_utoa (inode->gfid)); \
|
|
|
cb8e9e |
+ op_ret = -1; \
|
|
|
cb8e9e |
+ op_errno = EIO; \
|
|
|
cb8e9e |
+ goto label; \
|
|
|
cb8e9e |
+ } \
|
|
|
cb8e9e |
+ } while (0)
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+static inline gf_boolean_t
|
|
|
cb8e9e |
+br_stub_is_bad_object (xlator_t *this, inode_t *inode)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ gf_boolean_t bad_object = _gf_false;
|
|
|
cb8e9e |
+ uint64_t ctx_addr = 0;
|
|
|
cb8e9e |
+ br_stub_inode_ctx_t *ctx = NULL;
|
|
|
cb8e9e |
+ int32_t ret = -1;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_CONTEXT_FAILED,
|
|
|
cb8e9e |
+ "failed to get the inode context for the inode %s",
|
|
|
cb8e9e |
+ uuid_utoa (inode->gfid));
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ LOCK (&inode->lock);
|
|
|
cb8e9e |
+ {
|
|
|
cb8e9e |
+ bad_object = __br_stub_is_bad_object (ctx);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ UNLOCK (&inode->lock);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+out:
|
|
|
cb8e9e |
+ return bad_object;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+static inline int32_t
|
|
|
cb8e9e |
+br_stub_mark_object_bad (xlator_t *this, inode_t *inode)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ int32_t ret = -1;
|
|
|
cb8e9e |
+ uint64_t ctx_addr = 0;
|
|
|
cb8e9e |
+ br_stub_inode_ctx_t *ctx = NULL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ gf_msg (this->name, GF_LOG_ERROR, 0,
|
|
|
cb8e9e |
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "failed to get the "
|
|
|
cb8e9e |
+ "inode context for the inode %s",
|
|
|
cb8e9e |
+ uuid_utoa (inode->gfid));
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ LOCK (&inode->lock);
|
|
|
cb8e9e |
+ {
|
|
|
cb8e9e |
+ __br_stub_mark_object_bad (ctx);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ UNLOCK (&inode->lock);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+out:
|
|
|
cb8e9e |
+ return ret;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+static inline int32_t
|
|
|
cb8e9e |
+br_stub_mark_xdata_bad_object (xlator_t *this, inode_t *inode, dict_t *xdata)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ int32_t ret = 0;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (br_stub_is_bad_object (this, inode))
|
|
|
cb8e9e |
+ ret = dict_set_int32 (xdata, GLUSTERFS_BAD_INODE, 1);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ return ret;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
int32_t
|
|
|
cb8e9e |
br_stub_add_fd_to_inode (xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
|
|
|
cb8e9e |
index c6913ee..1426ae5 100644
|
|
|
cb8e9e |
--- a/xlators/performance/quick-read/src/quick-read.c
|
|
|
cb8e9e |
+++ b/xlators/performance/quick-read/src/quick-read.c
|
|
|
cb8e9e |
@@ -409,6 +409,11 @@ qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ if (dict_get (xdata, GLUSTERFS_BAD_INODE)) {
|
|
|
cb8e9e |
+ qr_inode_prune (this, inode);
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
if (dict_get (xdata, "sh-failed")) {
|
|
|
cb8e9e |
qr_inode_prune (this, inode);
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.7.1
|
|
|
cb8e9e |
|