|
|
cb8e9e |
From a3b9ad5909923b24bec518565d945003bfecee69 Mon Sep 17 00:00:00 2001
|
|
|
cb8e9e |
From: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
cb8e9e |
Date: Wed, 27 May 2015 17:00:36 +0530
|
|
|
cb8e9e |
Subject: [PATCH 156/190] features/bit-rot: check for both inmemory and ondisk staleness.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
* Let bit-rot stub check both on disk ongoing version, signed version xattrs and
|
|
|
cb8e9e |
the in memory flags in the inode and then decide whether the inode is stale or
|
|
|
cb8e9e |
not. This information is used by one shot crawler in BitD to decide whether to
|
|
|
cb8e9e |
trigger the sign for the object or skip it.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
NOTE: The above check should be done only for BitD. For scrubber its still the
|
|
|
cb8e9e |
old way of comparing on disk ongoing version with signed version.
|
|
|
cb8e9e |
|
|
|
cb8e9e |
* BitD's one shot crawler should not sign zero byte objects if they do not contain
|
|
|
cb8e9e |
signature. (Means the object was just created and nothing was written to it).
|
|
|
cb8e9e |
|
|
|
cb8e9e |
Change-Id: I6941aefc2981bf79a6aeb476e660f79908e165a8
|
|
|
cb8e9e |
BUG: 1232309
|
|
|
cb8e9e |
Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
cb8e9e |
Reviewed-on: https://code.engineering.redhat.com/gerrit/51738
|
|
|
cb8e9e |
---
|
|
|
cb8e9e |
xlators/features/bit-rot/src/bitd/bit-rot.c | 14 +-
|
|
|
cb8e9e |
xlators/features/bit-rot/src/stub/bit-rot-stub.c | 143 ++++++++++++++++++++--
|
|
|
cb8e9e |
2 files changed, 138 insertions(+), 19 deletions(-)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
cb8e9e |
index a4821ba..228cf34 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
cb8e9e |
@@ -858,7 +858,6 @@ br_check_object_need_sign (xlator_t *this, dict_t *xattr, br_child_t *child)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
int32_t ret = -1;
|
|
|
cb8e9e |
gf_boolean_t need_sign = _gf_false;
|
|
|
cb8e9e |
- struct timeval tv = {0,};
|
|
|
cb8e9e |
br_isignature_out_t *sign = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
GF_VALIDATE_OR_GOTO ("bit-rot", this, out);
|
|
|
cb8e9e |
@@ -873,11 +872,8 @@ br_check_object_need_sign (xlator_t *this, dict_t *xattr, br_child_t *child)
|
|
|
cb8e9e |
goto out;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- tv.tv_sec = ntohl (sign->time[0]);
|
|
|
cb8e9e |
- tv.tv_usec = ntohl (sign->time[1]);
|
|
|
cb8e9e |
-
|
|
|
cb8e9e |
/* Object has been opened and hence dirty. Do not sign it */
|
|
|
cb8e9e |
- if (sign->stale && !br_time_equal (child, &tv))
|
|
|
cb8e9e |
+ if (sign->stale)
|
|
|
cb8e9e |
need_sign = _gf_true;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
out:
|
|
|
cb8e9e |
@@ -1007,7 +1003,11 @@ bitd_oneshot_crawl (xlator_t *subvol,
|
|
|
cb8e9e |
op_errno = -ret;
|
|
|
cb8e9e |
br_log_object (this, "getxattr", linked_inode->gfid, op_errno);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (op_errno == ENODATA)
|
|
|
cb8e9e |
+ /**
|
|
|
cb8e9e |
+ * No need to sign the zero byte objects as the signing
|
|
|
cb8e9e |
+ * happens upon first modification of the object.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+ if (op_errno == ENODATA && (iatt.ia_size != 0))
|
|
|
cb8e9e |
need_signing = _gf_true;
|
|
|
cb8e9e |
if (op_errno == EINVAL)
|
|
|
cb8e9e |
gf_log (this->name, GF_LOG_WARNING, "Partial version "
|
|
|
cb8e9e |
@@ -1236,7 +1236,7 @@ br_brick_connect (xlator_t *this, br_child_t *child)
|
|
|
cb8e9e |
|
|
|
cb8e9e |
memcpy (child->brick_path, stub->export, strlen (stub->export) + 1);
|
|
|
cb8e9e |
child->tv.tv_sec = ntohl (stub->timebuf[0]);
|
|
|
cb8e9e |
- child->tv.tv_usec = ntohl (stub->timebuf[0]);
|
|
|
cb8e9e |
+ child->tv.tv_usec = ntohl (stub->timebuf[1]);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (priv->iamscrubber)
|
|
|
cb8e9e |
ret = br_enact_scrubber (this, child);
|
|
|
cb8e9e |
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
index 4f0605d..d4aecdc 100644
|
|
|
cb8e9e |
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
|
|
|
cb8e9e |
@@ -949,6 +949,79 @@ br_stub_listxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+/**
|
|
|
cb8e9e |
+ * ONE SHOT CRAWLER from BitD signs the objects that it encounters while
|
|
|
cb8e9e |
+ * crawling, if the object is identified as stale by the stub. Stub follows
|
|
|
cb8e9e |
+ * the below logic to mark an object as stale or not.
|
|
|
cb8e9e |
+ * If the ongoing version and the signed_version match, then the object is not
|
|
|
cb8e9e |
+ * stale. Just return. Otherwise if they does not match, then it means one
|
|
|
cb8e9e |
+ * of the below things.
|
|
|
cb8e9e |
+ * 1) If the inode does not need write back of the version and the sign state is
|
|
|
cb8e9e |
+ * is NORMAL, then some active i/o is going on the object. So skip it.
|
|
|
cb8e9e |
+ * A notification will be sent to trigger the sign once the release is
|
|
|
cb8e9e |
+ * received on the object.
|
|
|
cb8e9e |
+ * 2) If inode does not need writeback of the version and the sign state is
|
|
|
cb8e9e |
+ * either reopen wait or quick sign, then it means:
|
|
|
cb8e9e |
+ * A) BitD restarted and it is not sure whether the object it encountered
|
|
|
cb8e9e |
+ * while crawling is in its timer wheel or not. Since there is no way to
|
|
|
cb8e9e |
+ * scan the timer wheel as of now, ONE SHOT CRAWLER just goes ahead and
|
|
|
cb8e9e |
+ * signs the object. Since the inode does not need writeback, version will
|
|
|
cb8e9e |
+ * not be incremented and directly the object will be signed.
|
|
|
cb8e9e |
+ * 3) If the inode needs writeback, then it means the inode was forgotten after
|
|
|
cb8e9e |
+ * the versioning and it has to be signed now.
|
|
|
cb8e9e |
+ *
|
|
|
cb8e9e |
+ * This is the algorithm followed:
|
|
|
cb8e9e |
+ * if (ongoing_version == signed_version); then
|
|
|
cb8e9e |
+ * object_is_not_stale;
|
|
|
cb8e9e |
+ * return;
|
|
|
cb8e9e |
+ * else; then
|
|
|
cb8e9e |
+ * if (!inode_needs_writeback && inode_sign_state != NORMAL); then
|
|
|
cb8e9e |
+ * object_is_stale;
|
|
|
cb8e9e |
+ * if (inode_needs_writeback); then
|
|
|
cb8e9e |
+ * object_is_stale;
|
|
|
cb8e9e |
+ *
|
|
|
cb8e9e |
+ * For SCRUBBER, no need to check for the sign state and inode writeback.
|
|
|
cb8e9e |
+ * If the ondisk ongoingversion and the ondisk signed version does not match,
|
|
|
cb8e9e |
+ * then treat the object as stale.
|
|
|
cb8e9e |
+ */
|
|
|
cb8e9e |
+char
|
|
|
cb8e9e |
+br_stub_is_object_stale (xlator_t *this, call_frame_t *frame, inode_t *inode,
|
|
|
cb8e9e |
+ br_version_t *obuf, br_signature_t *sbuf)
|
|
|
cb8e9e |
+{
|
|
|
cb8e9e |
+ uint64_t ctx_addr = 0;
|
|
|
cb8e9e |
+ br_stub_inode_ctx_t *ctx = NULL;
|
|
|
cb8e9e |
+ int32_t ret = -1;
|
|
|
cb8e9e |
+ char stale = 0;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (obuf->ongoingversion == sbuf->signedversion)
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ if (frame->root->pid == GF_CLIENT_PID_SCRUB) {
|
|
|
cb8e9e |
+ stale = 1;
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ ret = br_stub_get_inode_ctx (this, inode, &ctx_addr);
|
|
|
cb8e9e |
+ if (ret) {
|
|
|
cb8e9e |
+ gf_log (this->name, GF_LOG_ERROR, "failed to get the inode "
|
|
|
cb8e9e |
+ "context for %s", uuid_utoa (inode->gfid));
|
|
|
cb8e9e |
+ goto out;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ LOCK (&inode->lock);
|
|
|
cb8e9e |
+ {
|
|
|
cb8e9e |
+ if ((!__br_stub_is_inode_dirty (ctx) &&
|
|
|
cb8e9e |
+ ctx->info_sign != BR_SIGN_NORMAL) ||
|
|
|
cb8e9e |
+ __br_stub_is_inode_dirty (ctx))
|
|
|
cb8e9e |
+ stale = 1;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+ UNLOCK (&inode->lock);
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+out:
|
|
|
cb8e9e |
+ return stale;
|
|
|
cb8e9e |
+}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
int
|
|
|
cb8e9e |
br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
@@ -961,12 +1034,18 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
br_signature_t *sbuf = NULL;
|
|
|
cb8e9e |
br_isignature_out_t *sign = NULL;
|
|
|
cb8e9e |
br_vxattr_status_t status;
|
|
|
cb8e9e |
+ br_stub_local_t *local = NULL;
|
|
|
cb8e9e |
+ inode_t *inode = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
if (op_ret < 0)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
if (cookie != (void *) BR_STUB_REQUEST_COOKIE)
|
|
|
cb8e9e |
goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
+ local = frame->local;
|
|
|
cb8e9e |
+ frame->local = NULL;
|
|
|
cb8e9e |
+ inode = local->u.context.inode;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
op_ret = -1;
|
|
|
cb8e9e |
status = br_version_xattr_state (xattr, &obuf, &sbuf);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1005,7 +1084,7 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/* Object's dirty state & current signed version */
|
|
|
cb8e9e |
sign->version = sbuf->signedversion;
|
|
|
cb8e9e |
- sign->stale = (obuf->ongoingversion != sbuf->signedversion) ? 1 : 0;
|
|
|
cb8e9e |
+ sign->stale = br_stub_is_object_stale (this, frame, inode, obuf, sbuf);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/* Object's signature */
|
|
|
cb8e9e |
sign->signaturelen = signaturelen;
|
|
|
cb8e9e |
@@ -1025,6 +1104,10 @@ br_stub_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|
|
|
cb8e9e |
|
|
|
cb8e9e |
unwind:
|
|
|
cb8e9e |
STACK_UNWIND (frame, op_ret, op_errno, xattr, xdata);
|
|
|
cb8e9e |
+ if (local) {
|
|
|
cb8e9e |
+ br_stub_cleanup_local (local);
|
|
|
cb8e9e |
+ br_stub_dealloc_local (local);
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1070,9 +1153,16 @@ int
|
|
|
cb8e9e |
br_stub_getxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
loc_t *loc, const char *name, dict_t *xdata)
|
|
|
cb8e9e |
{
|
|
|
cb8e9e |
- void *cookie = NULL;
|
|
|
cb8e9e |
- uuid_t rootgfid = {0, };
|
|
|
cb8e9e |
- fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk;
|
|
|
cb8e9e |
+ void *cookie = NULL;
|
|
|
cb8e9e |
+ uuid_t rootgfid = {0, };
|
|
|
cb8e9e |
+ fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk;
|
|
|
cb8e9e |
+ int32_t op_ret = -1;
|
|
|
cb8e9e |
+ int32_t op_errno = EINVAL;
|
|
|
cb8e9e |
+ br_stub_local_t *local = NULL;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO ("bit-rot-stub", this, unwind);
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO (this->name, loc, unwind);
|
|
|
cb8e9e |
+ GF_VALIDATE_OR_GOTO (this->name, loc->inode, unwind);
|
|
|
cb8e9e |
|
|
|
cb8e9e |
rootgfid[15] = 1;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1081,10 +1171,8 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (br_stub_is_internal_xattr (name)) {
|
|
|
cb8e9e |
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
|
|
|
cb8e9e |
- return 0;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ if (br_stub_is_internal_xattr (name))
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
* this special extended attribute is allowed only on root
|
|
|
cb8e9e |
@@ -1104,6 +1192,18 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
if (name && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE,
|
|
|
cb8e9e |
strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) {
|
|
|
cb8e9e |
cookie = (void *) BR_STUB_REQUEST_COOKIE;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ local = br_stub_alloc_local (this);
|
|
|
cb8e9e |
+ if (!local) {
|
|
|
cb8e9e |
+ op_ret = -1;
|
|
|
cb8e9e |
+ op_errno = ENOMEM;
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ br_stub_fill_local (local, NULL, NULL, loc->inode,
|
|
|
cb8e9e |
+ loc->inode->gfid,
|
|
|
cb8e9e |
+ BR_STUB_NO_VERSIONING, 0);
|
|
|
cb8e9e |
+ frame->local = local;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
wind:
|
|
|
cb8e9e |
@@ -1111,6 +1211,9 @@ br_stub_getxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
(frame, cbk, cookie, FIRST_CHILD (this),
|
|
|
cb8e9e |
FIRST_CHILD (this)->fops->getxattr, loc, name, xdata);
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
+unwind:
|
|
|
cb8e9e |
+ STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
int
|
|
|
cb8e9e |
@@ -1120,6 +1223,9 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
void *cookie = NULL;
|
|
|
cb8e9e |
uuid_t rootgfid = {0, };
|
|
|
cb8e9e |
fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk;
|
|
|
cb8e9e |
+ int32_t op_ret = -1;
|
|
|
cb8e9e |
+ int32_t op_errno = EINVAL;
|
|
|
cb8e9e |
+ br_stub_local_t *local = NULL;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
rootgfid[15] = 1;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
@@ -1128,10 +1234,8 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
goto wind;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
- if (br_stub_is_internal_xattr (name)) {
|
|
|
cb8e9e |
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
|
|
|
cb8e9e |
- return 0;
|
|
|
cb8e9e |
- }
|
|
|
cb8e9e |
+ if (br_stub_is_internal_xattr (name))
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
* this special extended attribute is allowed only on root
|
|
|
cb8e9e |
@@ -1150,6 +1254,18 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
if (name && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE,
|
|
|
cb8e9e |
strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) {
|
|
|
cb8e9e |
cookie = (void *) BR_STUB_REQUEST_COOKIE;
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ local = br_stub_alloc_local (this);
|
|
|
cb8e9e |
+ if (!local) {
|
|
|
cb8e9e |
+ op_ret = -1;
|
|
|
cb8e9e |
+ op_errno = ENOMEM;
|
|
|
cb8e9e |
+ goto unwind;
|
|
|
cb8e9e |
+ }
|
|
|
cb8e9e |
+
|
|
|
cb8e9e |
+ br_stub_fill_local (local, NULL, fd, fd->inode,
|
|
|
cb8e9e |
+ fd->inode->gfid,
|
|
|
cb8e9e |
+ BR_STUB_NO_VERSIONING, 0);
|
|
|
cb8e9e |
+ frame->local = local;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
wind:
|
|
|
cb8e9e |
@@ -1157,6 +1273,9 @@ br_stub_fgetxattr (call_frame_t *frame, xlator_t *this,
|
|
|
cb8e9e |
(frame, cbk, cookie, FIRST_CHILD (this),
|
|
|
cb8e9e |
FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata);
|
|
|
cb8e9e |
return 0;
|
|
|
cb8e9e |
+unwind:
|
|
|
cb8e9e |
+ STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
|
|
|
cb8e9e |
+ return 0;
|
|
|
cb8e9e |
}
|
|
|
cb8e9e |
|
|
|
cb8e9e |
/**
|
|
|
cb8e9e |
--
|
|
|
cb8e9e |
1.7.1
|
|
|
cb8e9e |
|