|
|
74096c |
From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001
|
|
|
74096c |
From: Raghavendra Bhat <raghavendra@redhat.com>
|
|
|
74096c |
Date: Mon, 11 Mar 2019 12:16:50 -0400
|
|
|
74096c |
Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files
|
|
|
74096c |
during oneshot crawl
|
|
|
74096c |
|
|
|
74096c |
Currently bit-rot feature has an issue with disabling and reenabling it
|
|
|
74096c |
on the same volume. Consider enabling bit-rot detection which goes on to
|
|
|
74096c |
crawl and sign all the files present in the volume. Then some files are
|
|
|
74096c |
modified and the bit-rot daemon goes on to sign the modified files with
|
|
|
74096c |
the correct signature. Now, disable bit-rot feature. While, signing and
|
|
|
74096c |
scrubbing are not happening, previous checksums of the files continue to
|
|
|
74096c |
exist as extended attributes. Now, if some files with checksum xattrs get
|
|
|
74096c |
modified, they are not signed with new signature as the feature is off.
|
|
|
74096c |
|
|
|
74096c |
At this point, if the feature is enabled again, the bit rot daemon will
|
|
|
74096c |
go and sign those files which does not have any bit-rot specific xattrs
|
|
|
74096c |
(i.e. those files which were created after bit-rot was disabled). Whereas
|
|
|
74096c |
the files with bit-rot xattrs wont get signed with proper new checksum.
|
|
|
74096c |
At this point if scrubber runs, it finds the on disk checksum and the actual
|
|
|
74096c |
checksum of the file to be different (because the file got modified) and
|
|
|
74096c |
marks the file as corrupted.
|
|
|
74096c |
|
|
|
74096c |
FIX:
|
|
|
74096c |
|
|
|
74096c |
The fix is to unconditionally sign the files when the bit-rot daemon
|
|
|
74096c |
comes up (instead of skipping the files with bit-rot xattrs).
|
|
|
74096c |
|
|
|
74096c |
upstream fix:
|
|
|
74096c |
> patch: https://review.gluster.org/#/c/glusterfs/+/22360/
|
|
|
74096c |
> fixes: #bz1700078
|
|
|
74096c |
> Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
|
|
|
74096c |
|
|
|
74096c |
Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
|
|
|
74096c |
BUG: 1851424
|
|
|
74096c |
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
|
|
|
74096c |
Reviewed-on: https://code.engineering.redhat.com/gerrit/208305
|
|
|
74096c |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
74096c |
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
|
|
74096c |
---
|
|
|
74096c |
tests/bitrot/bug-1700078.t | 87 +++++++++++++++++++++++++++++
|
|
|
74096c |
xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++-
|
|
|
74096c |
2 files changed, 101 insertions(+), 1 deletion(-)
|
|
|
74096c |
create mode 100644 tests/bitrot/bug-1700078.t
|
|
|
74096c |
|
|
|
74096c |
diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
|
|
|
74096c |
new file mode 100644
|
|
|
74096c |
index 0000000..f273742
|
|
|
74096c |
--- /dev/null
|
|
|
74096c |
+++ b/tests/bitrot/bug-1700078.t
|
|
|
74096c |
@@ -0,0 +1,87 @@
|
|
|
74096c |
+#!/bin/bash
|
|
|
74096c |
+
|
|
|
74096c |
+. $(dirname $0)/../include.rc
|
|
|
74096c |
+. $(dirname $0)/../volume.rc
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
+
|
|
|
74096c |
+## Start glusterd
|
|
|
74096c |
+TEST glusterd;
|
|
|
74096c |
+TEST pidof glusterd;
|
|
|
74096c |
+
|
|
|
74096c |
+## Lets create and start the volume
|
|
|
74096c |
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
|
|
|
74096c |
+TEST $CLI volume start $V0
|
|
|
74096c |
+
|
|
|
74096c |
+## Enable bitrot for volume $V0
|
|
|
74096c |
+TEST $CLI volume bitrot $V0 enable
|
|
|
74096c |
+
|
|
|
74096c |
+## Turn off quick-read so that it wont cache the contents
|
|
|
74096c |
+# of the file in lookup. For corrupted files, it might
|
|
|
74096c |
+# end up in reads being served from the cache instead of
|
|
|
74096c |
+# an error.
|
|
|
74096c |
+TEST $CLI volume set $V0 performance.quick-read off
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
|
|
|
74096c |
+
|
|
|
74096c |
+## Set expiry-timeout to 1 sec
|
|
|
74096c |
+TEST $CLI volume set $V0 features.expiry-time 1
|
|
|
74096c |
+
|
|
|
74096c |
+##Mount $V0
|
|
|
74096c |
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
|
|
|
74096c |
+
|
|
|
74096c |
+## Turn off quick-read xlator so that, the contents are not served from the
|
|
|
74096c |
+# quick-read cache.
|
|
|
74096c |
+TEST $CLI volume set $V0 performance.quick-read off
|
|
|
74096c |
+
|
|
|
74096c |
+#Create sample file
|
|
|
74096c |
+TEST `echo "1234" > $M0/FILE1`
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
|
|
|
74096c |
+
|
|
|
74096c |
+##disable bitrot
|
|
|
74096c |
+TEST $CLI volume bitrot $V0 disable
|
|
|
74096c |
+
|
|
|
74096c |
+## modify the file
|
|
|
74096c |
+TEST `echo "write" >> $M0/FILE1`
|
|
|
74096c |
+
|
|
|
74096c |
+# unmount and remount when the file has to be accessed.
|
|
|
74096c |
+# This is to ensure that, when the remount happens,
|
|
|
74096c |
+# and the file is read, its contents are served from the
|
|
|
74096c |
+# brick instead of cache.
|
|
|
74096c |
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
|
|
74096c |
+
|
|
|
74096c |
+##enable bitrot
|
|
|
74096c |
+TEST $CLI volume bitrot $V0 enable
|
|
|
74096c |
+
|
|
|
74096c |
+# expiry time is set to 1 second. Hence sleep for 2 seconds for the
|
|
|
74096c |
+# oneshot crawler to finish its crawling and sign the file properly.
|
|
|
74096c |
+sleep 2
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
|
|
|
74096c |
+
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
|
|
|
74096c |
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
|
|
|
74096c |
+
|
|
|
74096c |
+## Ondemand scrub
|
|
|
74096c |
+TEST $CLI volume bitrot $V0 scrub ondemand
|
|
|
74096c |
+
|
|
|
74096c |
+# the scrub ondemand CLI command, just ensures that
|
|
|
74096c |
+# the scrubber has received the ondemand scrub directive
|
|
|
74096c |
+# and started. sleep for 2 seconds for scrubber to finish
|
|
|
74096c |
+# crawling and marking file(s) as bad (if if finds that
|
|
|
74096c |
+# corruption has happened) which are filesystem operations.
|
|
|
74096c |
+sleep 2
|
|
|
74096c |
+
|
|
|
74096c |
+TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
|
|
|
74096c |
+
|
|
|
74096c |
+##Mount $V0
|
|
|
74096c |
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
|
|
|
74096c |
+
|
|
|
74096c |
+TEST cat $M0/FILE1
|
|
|
74096c |
+
|
|
|
74096c |
+cleanup;
|
|
|
74096c |
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
74096c |
index b8feef7..424c0d5 100644
|
|
|
74096c |
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
74096c |
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
|
|
74096c |
@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
|
|
74096c |
int32_t ret = -1;
|
|
|
74096c |
inode_t *linked_inode = NULL;
|
|
|
74096c |
gf_boolean_t need_signing = _gf_false;
|
|
|
74096c |
+ gf_boolean_t need_reopen = _gf_true;
|
|
|
74096c |
|
|
|
74096c |
GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
|
|
|
74096c |
GF_VALIDATE_OR_GOTO("bit-rot", data, out);
|
|
|
74096c |
@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
|
|
74096c |
uuid_utoa(linked_inode->gfid));
|
|
|
74096c |
} else {
|
|
|
74096c |
need_signing = br_check_object_need_sign(this, xattr, child);
|
|
|
74096c |
+
|
|
|
74096c |
+ /*
|
|
|
74096c |
+ * If we are here means, bitrot daemon has started. Is it just
|
|
|
74096c |
+ * a simple restart of the daemon or is it started because the
|
|
|
74096c |
+ * feature is enabled is something hard to determine. Hence,
|
|
|
74096c |
+ * if need_signing is false (because bit-rot version and signature
|
|
|
74096c |
+ * are present), then still go ahead and sign it.
|
|
|
74096c |
+ */
|
|
|
74096c |
+ if (!need_signing) {
|
|
|
74096c |
+ need_signing = _gf_true;
|
|
|
74096c |
+ need_reopen = _gf_true;
|
|
|
74096c |
+ }
|
|
|
74096c |
}
|
|
|
74096c |
|
|
|
74096c |
if (!need_signing)
|
|
|
74096c |
@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
|
|
74096c |
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
|
|
|
74096c |
"Triggering signing for %s [GFID: %s | Brick: %s]", loc.path,
|
|
|
74096c |
uuid_utoa(linked_inode->gfid), child->brick_path);
|
|
|
74096c |
- br_trigger_sign(this, child, linked_inode, &loc, _gf_true);
|
|
|
74096c |
+ br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
|
|
|
74096c |
|
|
|
74096c |
ret = 0;
|
|
|
74096c |
|
|
|
74096c |
--
|
|
|
74096c |
1.8.3.1
|
|
|
74096c |
|