14f8ab
From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001
14f8ab
From: Raghavendra Bhat <raghavendra@redhat.com>
14f8ab
Date: Mon, 11 Mar 2019 12:16:50 -0400
14f8ab
Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files
14f8ab
 during oneshot crawl
14f8ab
14f8ab
Currently bit-rot feature has an issue with disabling and reenabling it
14f8ab
on the same volume. Consider enabling bit-rot detection which goes on to
14f8ab
crawl and sign all the files present in the volume. Then some files are
14f8ab
modified and the bit-rot daemon goes on to sign the modified files with
14f8ab
the correct signature. Now, disable bit-rot feature. While, signing and
14f8ab
scrubbing are not happening, previous checksums of the files continue to
14f8ab
exist as extended attributes. Now, if some files with checksum xattrs get
14f8ab
modified, they are not signed with new signature as the feature is off.
14f8ab
14f8ab
At this point, if the feature is enabled again, the bit rot daemon will
14f8ab
go and sign those files which does not have any bit-rot specific xattrs
14f8ab
(i.e. those files which were created after bit-rot was disabled). Whereas
14f8ab
the files with bit-rot xattrs wont get signed with proper new checksum.
14f8ab
At this point if scrubber runs, it finds the on disk checksum and the actual
14f8ab
checksum of the file to be different (because the file got modified) and
14f8ab
marks the file as corrupted.
14f8ab
14f8ab
FIX:
14f8ab
14f8ab
The fix is to unconditionally sign the files when the bit-rot daemon
14f8ab
comes up (instead of skipping the files with bit-rot xattrs).
14f8ab
14f8ab
upstream fix:
14f8ab
	> patch: https://review.gluster.org/#/c/glusterfs/+/22360/
14f8ab
	> fixes: #bz1700078
14f8ab
	> Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
14f8ab
14f8ab
Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
14f8ab
BUG: 1851424
14f8ab
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
14f8ab
Reviewed-on: https://code.engineering.redhat.com/gerrit/208305
14f8ab
Tested-by: RHGS Build Bot <nigelb@redhat.com>
14f8ab
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
14f8ab
---
14f8ab
 tests/bitrot/bug-1700078.t                  | 87 +++++++++++++++++++++++++++++
14f8ab
 xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++-
14f8ab
 2 files changed, 101 insertions(+), 1 deletion(-)
14f8ab
 create mode 100644 tests/bitrot/bug-1700078.t
14f8ab
14f8ab
diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
14f8ab
new file mode 100644
14f8ab
index 0000000..f273742
14f8ab
--- /dev/null
14f8ab
+++ b/tests/bitrot/bug-1700078.t
14f8ab
@@ -0,0 +1,87 @@
14f8ab
+#!/bin/bash
14f8ab
+
14f8ab
+. $(dirname $0)/../include.rc
14f8ab
+. $(dirname $0)/../volume.rc
14f8ab
+
14f8ab
+cleanup;
14f8ab
+
14f8ab
+## Start glusterd
14f8ab
+TEST glusterd;
14f8ab
+TEST pidof glusterd;
14f8ab
+
14f8ab
+## Lets create and start the volume
14f8ab
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
14f8ab
+TEST $CLI volume start $V0
14f8ab
+
14f8ab
+## Enable bitrot for volume $V0
14f8ab
+TEST $CLI volume bitrot $V0 enable
14f8ab
+
14f8ab
+## Turn off quick-read so that it wont cache the contents
14f8ab
+# of the file in lookup. For corrupted files, it might
14f8ab
+# end up in reads being served from the cache instead of
14f8ab
+# an error.
14f8ab
+TEST $CLI volume set $V0 performance.quick-read off
14f8ab
+
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
14f8ab
+
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
14f8ab
+
14f8ab
+## Set expiry-timeout to 1 sec
14f8ab
+TEST $CLI volume set $V0 features.expiry-time 1
14f8ab
+
14f8ab
+##Mount $V0
14f8ab
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
14f8ab
+
14f8ab
+## Turn off quick-read xlator so that, the contents are not served from the
14f8ab
+# quick-read cache.
14f8ab
+TEST $CLI volume set $V0 performance.quick-read off
14f8ab
+
14f8ab
+#Create sample file
14f8ab
+TEST `echo "1234" > $M0/FILE1`
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
14f8ab
+
14f8ab
+##disable bitrot
14f8ab
+TEST $CLI volume bitrot $V0 disable
14f8ab
+
14f8ab
+## modify the file
14f8ab
+TEST `echo "write" >> $M0/FILE1`
14f8ab
+
14f8ab
+# unmount and remount when the file has to be accessed.
14f8ab
+# This is to ensure that, when the remount happens,
14f8ab
+# and the file is read, its contents are served from the
14f8ab
+# brick instead of cache.
14f8ab
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
14f8ab
+
14f8ab
+##enable bitrot
14f8ab
+TEST $CLI volume bitrot $V0 enable
14f8ab
+
14f8ab
+# expiry time is set to 1 second. Hence sleep for 2 seconds for the
14f8ab
+# oneshot crawler to finish its crawling and sign the file properly.
14f8ab
+sleep 2
14f8ab
+
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
14f8ab
+
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
14f8ab
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
14f8ab
+
14f8ab
+## Ondemand scrub
14f8ab
+TEST $CLI volume bitrot $V0 scrub ondemand
14f8ab
+
14f8ab
+# the scrub ondemand CLI command, just ensures that
14f8ab
+# the scrubber has received the ondemand scrub directive
14f8ab
+# and started. sleep for 2 seconds for scrubber to finish
14f8ab
+# crawling and marking file(s) as bad (if if finds that
14f8ab
+# corruption has happened) which are filesystem operations.
14f8ab
+sleep 2
14f8ab
+
14f8ab
+TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
14f8ab
+
14f8ab
+##Mount $V0
14f8ab
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
14f8ab
+
14f8ab
+TEST cat $M0/FILE1
14f8ab
+
14f8ab
+cleanup;
14f8ab
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
14f8ab
index b8feef7..424c0d5 100644
14f8ab
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
14f8ab
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
14f8ab
@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
14f8ab
     int32_t ret = -1;
14f8ab
     inode_t *linked_inode = NULL;
14f8ab
     gf_boolean_t need_signing = _gf_false;
14f8ab
+    gf_boolean_t need_reopen = _gf_true;
14f8ab
 
14f8ab
     GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
14f8ab
     GF_VALIDATE_OR_GOTO("bit-rot", data, out);
14f8ab
@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
14f8ab
                    uuid_utoa(linked_inode->gfid));
14f8ab
     } else {
14f8ab
         need_signing = br_check_object_need_sign(this, xattr, child);
14f8ab
+
14f8ab
+        /*
14f8ab
+         * If we are here means, bitrot daemon has started. Is it just
14f8ab
+         * a simple restart of the daemon or is it started because the
14f8ab
+         * feature is enabled is something hard to determine. Hence,
14f8ab
+         * if need_signing is false (because bit-rot version and signature
14f8ab
+         * are present), then still go ahead and sign it.
14f8ab
+         */
14f8ab
+        if (!need_signing) {
14f8ab
+            need_signing = _gf_true;
14f8ab
+            need_reopen = _gf_true;
14f8ab
+        }
14f8ab
     }
14f8ab
 
14f8ab
     if (!need_signing)
14f8ab
@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
14f8ab
     gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
14f8ab
            "Triggering signing for %s [GFID: %s | Brick: %s]", loc.path,
14f8ab
            uuid_utoa(linked_inode->gfid), child->brick_path);
14f8ab
-    br_trigger_sign(this, child, linked_inode, &loc, _gf_true);
14f8ab
+    br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
14f8ab
 
14f8ab
     ret = 0;
14f8ab
 
14f8ab
-- 
14f8ab
1.8.3.1
14f8ab