From 40ef45a975c7b94ccf710203d92cbee3d4616631 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Nov 18 2022 20:34:45 +0000 Subject: Backport fixes for corruption caused by compressed receive fallback Patch name in the btrfs-progs repo: btrfs-progs: receive: fix a silent data loss bug with encoded writes --- diff --git a/SOURCES/1112-btrfs-progs-receive-add-debug-messages-when-processi.patch b/SOURCES/1112-btrfs-progs-receive-add-debug-messages-when-processi.patch new file mode 100644 index 0000000..84e6086 --- /dev/null +++ b/SOURCES/1112-btrfs-progs-receive-add-debug-messages-when-processi.patch @@ -0,0 +1,60 @@ +From 6c27db46dacb827f46dfbd5fa83c828fd81db991 Mon Sep 17 00:00:00 2001 +Message-Id: <6c27db46dacb827f46dfbd5fa83c828fd81db991.1668799549.git.boris@bur.io> +In-Reply-To: +References: +From: Filipe Manana +Date: Tue, 15 Nov 2022 16:25:24 +0000 +Subject: [PATCH 1/3] btrfs-progs: receive: add debug messages when processing + encoded writes + +Unlike for commands from the v1 stream, we have no debug messages logged +when processing encoded write commands, which makes it harder to debug +issues. + +So add log messages, when the log verbosity level is >= 3, for encoded +write commands, mentioning the value of all fields and also log when we +fallback from an encoded write to the decompress and write approach. + +The log messages look like this: + + encoded_write f3 - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0 + encoded_write f3 - falling back to decompress and write due to errno 22 ("Invalid argument") + +Signed-off-by: Filipe Manana +Reviewed-by: Boris Burkov +--- + cmds/receive.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/cmds/receive.c b/cmds/receive.c +index af3138d5..ce615e7c 100644 +--- a/cmds/receive.c ++++ b/cmds/receive.c +@@ -1246,6 +1246,13 @@ static int process_encoded_write(const char *path, const void *data, u64 offset, + .encryption = encryption, + }; + ++ if (bconf.verbose >= 3) ++ fprintf(stderr, ++"encoded_write %s - offset=%llu, len=%llu, unencoded_offset=%llu, unencoded_file_len=%llu, unencoded_len=%llu, compression=%u, encryption=%u\n", ++ path, offset, len, unencoded_offset, unencoded_file_len, ++ unencoded_len, compression, encryption); ++ ++ + if (encryption) { + error("encoded_write: encryption not supported"); + return -EOPNOTSUPP; +@@ -1271,6 +1278,10 @@ static int process_encoded_write(const char *path, const void *data, u64 offset, + error("encoded_write: writing to %s failed: %m", path); + return ret; + } ++ if (bconf.verbose >= 3) ++ fprintf(stderr, ++"encoded_write %s - falling back to decompress and write due to errno %d (\"%m\")\n", ++ path, errno); + } + + return decompress_and_write(rctx, data, offset, len, unencoded_file_len, +-- +2.38.1 + diff --git a/SOURCES/1113-btrfs-progs-receive-add-debug-messages-when-processi.patch b/SOURCES/1113-btrfs-progs-receive-add-debug-messages-when-processi.patch new file mode 100644 index 0000000..614e7f0 --- /dev/null +++ b/SOURCES/1113-btrfs-progs-receive-add-debug-messages-when-processi.patch @@ -0,0 +1,40 @@ +From 4e6d18589dc8cba4c8414a2f03c14c31bd400a6c Mon Sep 17 00:00:00 2001 +Message-Id: <4e6d18589dc8cba4c8414a2f03c14c31bd400a6c.1668799549.git.boris@bur.io> +In-Reply-To: +References: +From: Filipe Manana +Date: Tue, 15 Nov 2022 16:25:25 +0000 +Subject: [PATCH 2/3] btrfs-progs: receive: add debug messages when processing + fallocate + +Unlike for commands from the v1 stream, we have no debug messages logged +when processing fallocate commands, which makes it harder to debug issues. + +So add log messages, when the log verbosity level is >= 3, for fallocate +commands, mentioning the value of all fields. + +Signed-off-by: Filipe Manana +Reviewed-by: Boris Burkov +--- + cmds/receive.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/cmds/receive.c b/cmds/receive.c +index ce615e7c..9f73b072 100644 +--- a/cmds/receive.c ++++ b/cmds/receive.c +@@ -1296,6 +1296,11 @@ static int process_fallocate(const char *path, int mode, u64 offset, u64 len, + struct btrfs_receive *rctx = user; + char full_path[PATH_MAX]; + ++ if (bconf.verbose >= 3) ++ fprintf(stderr, ++ "fallocate %s - offset=%llu, len=%llu, mode=%d\n", ++ path, offset, len, mode); ++ + ret = path_cat_out(full_path, rctx->full_subvol_path, path); + if (ret < 0) { + error("fallocate: path invalid: %s", path); +-- +2.38.1 + diff --git a/SOURCES/1114-btrfs-progs-receive-fix-silent-data-loss-after-fall-.patch b/SOURCES/1114-btrfs-progs-receive-fix-silent-data-loss-after-fall-.patch new file mode 100644 index 0000000..f77ddd6 --- /dev/null +++ b/SOURCES/1114-btrfs-progs-receive-fix-silent-data-loss-after-fall-.patch @@ -0,0 +1,165 @@ +From bbd4bb4fd663da72d0a9806e53d84601d03532b2 Mon Sep 17 00:00:00 2001 +Message-Id: +In-Reply-To: +References: +From: Filipe Manana +Date: Tue, 15 Nov 2022 16:25:26 +0000 +Subject: [PATCH 3/3] btrfs-progs: receive: fix silent data loss after fall + back from encoded write + +When attempting an encoded write, if it fails for some specific reason +like -EINVAL (when an offset is not sector size aligned) or -ENOSPC, we +then fallback into decompressing the data and writing it using regular +buffered IO. This logic however is not correct, one of the reasons is +that it assumes the encoded offset is smaller than the unencoded file +length and that they can be compared, but one is an offset and the other +is a length, not an end offset, so they can't be compared to get correct +results. This bad logic will often result in not copying all data, or even +no data at all, resulting in a silent data loss. This is easily seen in +with the following reproducer: + + $ cat test.sh + #!/bin/bash + + DEV=/dev/sdj + MNT=/mnt/sdj + + umount $DEV &> /dev/null + mkfs.btrfs -f $DEV > /dev/null + mount -o compress $DEV $MNT + + # File foo has a size of 33K, not aligned to the sector size. + xfs_io -f -c "pwrite -S 0xab 0 33K" $MNT/foo + + xfs_io -f -c "pwrite -S 0xcd 0 64K" $MNT/bar + + # Now clone the first 32K of file bar into foo at offset 0. + xfs_io -c "reflink $MNT/bar 0 0 32K" $MNT/foo + + # Snapshot the default subvolume and create a full send stream (v2). + btrfs subvolume snapshot -r $MNT $MNT/snap + + btrfs send --compressed-data -f /tmp/test.send $MNT/snap + + echo -e "\nFile bar in the original filesystem:" + od -A d -t x1 $MNT/snap/bar + + umount $MNT + mkfs.btrfs -f $DEV > /dev/null + mount $DEV $MNT + + echo -e "\nReceiving stream in a new filesystem..." + btrfs receive -f /tmp/test.send $MNT + + echo -e "\nFile bar in the new filesystem:" + od -A d -t x1 $MNT/snap/bar + + umount $MNT + +Running the test without this patch: + + $ ./test.sh + (...) + File bar in the original filesystem: + 0000000 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd + * + 0065536 + + Receiving stream in a new filesystem... + At subvol snap + + File bar in the new filesystem: + 0000000 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd + * + 0033792 + +We end up with file bar having less data, and a smaller size, than in the +original filesystem. + +This happens because when processing file bar, send issues the following +commands: + + clone bar - source=foo source offset=0 offset=0 length=32768 + write bar - offset=32768 length=1024 + encoded_write bar - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0 + +The first 32K are cloned from file foo, as that file ranged is shared +between the files. + +Then there's a regular write operation for the file range [32K, 33K[, +since file foo has different data from bar for that file range. + +Finally for the remainder of file bar, the send side issues an encoded +write since the extent is compressed in the source filesystem, for the +file offset 33792 (33K), remaining 31K of data. The receiver will try the +encoded write, but that fails with -EINVAL since the offset 33K is not +sector size aligned, so it will fallback to decompressing the data and +writing it using regular buffered writes. However that results in doing +no writes at decompress_and_write() because 'pos' is initialized to the +value of 33K (unencoded_offset) and unencoded_file_len is 31K, so the +while loop has no iterations. + +Another case where we can fallback to decompression plus regular buffered +writes is when the destination filesystem has a sector size larger then +the sector size of the source filesystem (for example when the source +filesystem is on x86_64 with a 4K sector size and the destination +filesystem is PowerPC with a 64K sector size). In that scenario encoded +write attempts wil fail with -EINVAL due to offsets not being aligned with +the sector size of the destination filesystem, and the receive will +attempt the fallback of decompressing the buffer and writing the +decompressed using regular buffered IO. + +Fix this by tracking the number of written bytes instead, and increment +it, and the unencoded offset, after each write. + +Fixes: d20e759fc917 ("btrfs-progs: receive: encoded_write fallback to explicit decode and write") +Signed-off-by: Filipe Manana +Reviewed-by: Boris Burkov +--- + cmds/receive.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/cmds/receive.c b/cmds/receive.c +index 9f73b072..7756b42c 100644 +--- a/cmds/receive.c ++++ b/cmds/receive.c +@@ -1155,10 +1155,9 @@ static int decompress_and_write(struct btrfs_receive *rctx, + u32 compression) + { + int ret = 0; +- size_t pos; +- ssize_t w; + char *unencoded_data; + int sector_shift = 0; ++ u64 written = 0; + + unencoded_data = calloc(unencoded_len, 1); + if (!unencoded_data) { +@@ -1209,17 +1208,19 @@ static int decompress_and_write(struct btrfs_receive *rctx, + goto out; + } + +- pos = unencoded_offset; +- while (pos < unencoded_file_len) { +- w = pwrite(rctx->write_fd, unencoded_data + pos, +- unencoded_file_len - pos, offset); ++ while (written < unencoded_file_len) { ++ ssize_t w; ++ ++ w = pwrite(rctx->write_fd, unencoded_data + unencoded_offset, ++ unencoded_file_len - written, offset); + if (w < 0) { + ret = -errno; + error("writing unencoded data failed: %m"); + goto out; + } +- pos += w; ++ written += w; + offset += w; ++ unencoded_offset += w; + } + out: + free(unencoded_data); +-- +2.38.1 + diff --git a/SPECS/btrfs-progs.spec b/SPECS/btrfs-progs.spec index fb94cb8..f4755dd 100644 --- a/SPECS/btrfs-progs.spec +++ b/SPECS/btrfs-progs.spec @@ -1,6 +1,6 @@ Name: btrfs-progs Version: 5.16.2 -Release: 1.1%{?dist} +Release: 1.2%{?dist} Summary: Userspace programs for btrfs License: GPLv2 @@ -27,6 +27,9 @@ Patch1108: 1108-btrfs-progs-receive-process-fallocate-commands.patch Patch1109: 1109-btrfs-progs-receive-process-setflags-ioctl-commands.patch Patch1110: 1110-btrfs-progs-send-stream-v2-ioctl-flags.patch Patch1111: 1111-btrfs-progs-receive-add-tests-for-basic-encoded_writ.patch +Patch1112: 1112-btrfs-progs-receive-add-debug-messages-when-processi.patch +Patch1113: 1113-btrfs-progs-receive-add-debug-messages-when-processi.patch +Patch1114: 1114-btrfs-progs-receive-fix-silent-data-loss-after-fall-.patch %endif BuildRequires: gnupg2 @@ -162,6 +165,9 @@ popd %{python3_sitearch}/btrfsutil-*.egg-info/ %changelog +* Fri Nov 18 2022 Boris Burkov - 5.16.2-1.2 +- Backport compressed receive backport corruption fix + * Mon Apr 04 2022 Omar Sandoval - 5.16.2-1.1 - Adapt for CentOS Hyperscale - Add compressed send/receive patches to Facebook build