#3 Backport fixes for corruption caused by compressed receive fallback
Merged a year ago by dcavalca. Opened a year ago by borisb.
rpms/ borisb/btrfs-progs recv-fix-c9  into  c9s-sig-hyperscale

@@ -0,0 +1,60 @@ 

+ From 6c27db46dacb827f46dfbd5fa83c828fd81db991 Mon Sep 17 00:00:00 2001

+ Message-Id: <6c27db46dacb827f46dfbd5fa83c828fd81db991.1668799549.git.boris@bur.io>

+ In-Reply-To: <cover.1668799549.git.boris@bur.io>

+ References: <cover.1668799549.git.boris@bur.io>

+ From: Filipe Manana <fdmanana@suse.com>

+ Date: Tue, 15 Nov 2022 16:25:24 +0000

+ Subject: [PATCH 1/3] btrfs-progs: receive: add debug messages when processing

+  encoded writes

+ 

+ Unlike for commands from the v1 stream, we have no debug messages logged

+ when processing encoded write commands, which makes it harder to debug

+ issues.

+ 

+ So add log messages, when the log verbosity level is >= 3, for encoded

+ write commands, mentioning the value of all fields and also log when we

+ fallback from an encoded write to the decompress and write approach.

+ 

+ The log messages look like this:

+ 

+   encoded_write f3 - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0

+   encoded_write f3 - falling back to decompress and write due to errno 22 ("Invalid argument")

+ 

+ Signed-off-by: Filipe Manana <fdmanana@suse.com>

+ Reviewed-by: Boris Burkov <boris@bur.io>

+ ---

+  cmds/receive.c | 11 +++++++++++

+  1 file changed, 11 insertions(+)

+ 

+ diff --git a/cmds/receive.c b/cmds/receive.c

+ index af3138d5..ce615e7c 100644

+ --- a/cmds/receive.c

+ +++ b/cmds/receive.c

+ @@ -1246,6 +1246,13 @@ static int process_encoded_write(const char *path, const void *data, u64 offset,

+  		.encryption = encryption,

+  	};

+  

+ +	if (bconf.verbose >= 3)

+ +		fprintf(stderr,

+ +"encoded_write %s - offset=%llu, len=%llu, unencoded_offset=%llu, unencoded_file_len=%llu, unencoded_len=%llu, compression=%u, encryption=%u\n",

+ +			path, offset, len, unencoded_offset, unencoded_file_len,

+ +			unencoded_len, compression, encryption);

+ +

+ +

+  	if (encryption) {

+  		error("encoded_write: encryption not supported");

+  		return -EOPNOTSUPP;

+ @@ -1271,6 +1278,10 @@ static int process_encoded_write(const char *path, const void *data, u64 offset,

+  			error("encoded_write: writing to %s failed: %m", path);

+  			return ret;

+  		}

+ +		if (bconf.verbose >= 3)

+ +			fprintf(stderr,

+ +"encoded_write %s - falling back to decompress and write due to errno %d (\"%m\")\n",

+ +				path, errno);

+  	}

+  

+  	return decompress_and_write(rctx, data, offset, len, unencoded_file_len,

+ -- 

+ 2.38.1

+ 

@@ -0,0 +1,40 @@ 

+ From 4e6d18589dc8cba4c8414a2f03c14c31bd400a6c Mon Sep 17 00:00:00 2001

+ Message-Id: <4e6d18589dc8cba4c8414a2f03c14c31bd400a6c.1668799549.git.boris@bur.io>

+ In-Reply-To: <cover.1668799549.git.boris@bur.io>

+ References: <cover.1668799549.git.boris@bur.io>

+ From: Filipe Manana <fdmanana@suse.com>

+ Date: Tue, 15 Nov 2022 16:25:25 +0000

+ Subject: [PATCH 2/3] btrfs-progs: receive: add debug messages when processing

+  fallocate

+ 

+ Unlike for commands from the v1 stream, we have no debug messages logged

+ when processing fallocate commands, which makes it harder to debug issues.

+ 

+ So add log messages, when the log verbosity level is >= 3, for fallocate

+ commands, mentioning the value of all fields.

+ 

+ Signed-off-by: Filipe Manana <fdmanana@suse.com>

+ Reviewed-by: Boris Burkov <boris@bur.io>

+ ---

+  cmds/receive.c | 5 +++++

+  1 file changed, 5 insertions(+)

+ 

+ diff --git a/cmds/receive.c b/cmds/receive.c

+ index ce615e7c..9f73b072 100644

+ --- a/cmds/receive.c

+ +++ b/cmds/receive.c

+ @@ -1296,6 +1296,11 @@ static int process_fallocate(const char *path, int mode, u64 offset, u64 len,

+  	struct btrfs_receive *rctx = user;

+  	char full_path[PATH_MAX];

+  

+ +	if (bconf.verbose >= 3)

+ +		fprintf(stderr,

+ +			"fallocate %s - offset=%llu, len=%llu, mode=%d\n",

+ +			path, offset, len, mode);

+ +

+  	ret = path_cat_out(full_path, rctx->full_subvol_path, path);

+  	if (ret < 0) {

+  		error("fallocate: path invalid: %s", path);

+ -- 

+ 2.38.1

+ 

@@ -0,0 +1,165 @@ 

+ From bbd4bb4fd663da72d0a9806e53d84601d03532b2 Mon Sep 17 00:00:00 2001

+ Message-Id: <bbd4bb4fd663da72d0a9806e53d84601d03532b2.1668799549.git.boris@bur.io>

+ In-Reply-To: <cover.1668799549.git.boris@bur.io>

+ References: <cover.1668799549.git.boris@bur.io>

+ From: Filipe Manana <fdmanana@suse.com>

+ Date: Tue, 15 Nov 2022 16:25:26 +0000

+ Subject: [PATCH 3/3] btrfs-progs: receive: fix silent data loss after fall

+  back from encoded write

+ 

+ When attempting an encoded write, if it fails for some specific reason

+ like -EINVAL (when an offset is not sector size aligned) or -ENOSPC, we

+ then fallback into decompressing the data and writing it using regular

+ buffered IO. This logic however is not correct, one of the reasons is

+ that it assumes the encoded offset is smaller than the unencoded file

+ length and that they can be compared, but one is an offset and the other

+ is a length, not an end offset, so they can't be compared to get correct

+ results. This bad logic will often result in not copying all data, or even

+ no data at all, resulting in a silent data loss. This is easily seen in

+ with the following reproducer:

+ 

+    $ cat test.sh

+    #!/bin/bash

+ 

+    DEV=/dev/sdj

+    MNT=/mnt/sdj

+ 

+    umount $DEV &> /dev/null

+    mkfs.btrfs -f $DEV > /dev/null

+    mount -o compress $DEV $MNT

+ 

+    # File foo has a size of 33K, not aligned to the sector size.

+    xfs_io -f -c "pwrite -S 0xab 0 33K" $MNT/foo

+ 

+    xfs_io -f -c "pwrite -S 0xcd 0 64K" $MNT/bar

+ 

+    # Now clone the first 32K of file bar into foo at offset 0.

+    xfs_io -c "reflink $MNT/bar 0 0 32K" $MNT/foo

+ 

+    # Snapshot the default subvolume and create a full send stream (v2).

+    btrfs subvolume snapshot -r $MNT $MNT/snap

+ 

+    btrfs send --compressed-data -f /tmp/test.send $MNT/snap

+ 

+    echo -e "\nFile bar in the original filesystem:"

+    od -A d -t x1 $MNT/snap/bar

+ 

+    umount $MNT

+    mkfs.btrfs -f $DEV > /dev/null

+    mount $DEV $MNT

+ 

+    echo -e "\nReceiving stream in a new filesystem..."

+    btrfs receive -f /tmp/test.send $MNT

+ 

+    echo -e "\nFile bar in the new filesystem:"

+    od -A d -t x1 $MNT/snap/bar

+ 

+    umount $MNT

+ 

+ Running the test without this patch:

+ 

+    $ ./test.sh

+    (...)

+    File bar in the original filesystem:

+    0000000 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd

+    *

+    0065536

+ 

+    Receiving stream in a new filesystem...

+    At subvol snap

+ 

+    File bar in the new filesystem:

+    0000000 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd

+    *

+    0033792

+ 

+ We end up with file bar having less data, and a smaller size, than in the

+ original filesystem.

+ 

+ This happens because when processing file bar, send issues the following

+ commands:

+ 

+    clone bar - source=foo source offset=0 offset=0 length=32768

+    write bar - offset=32768 length=1024

+    encoded_write bar - offset=33792, len=4096, unencoded_offset=33792, unencoded_file_len=31744, unencoded_len=65536, compression=1, encryption=0

+ 

+ The first 32K are cloned from file foo, as that file ranged is shared

+ between the files.

+ 

+ Then there's a regular write operation for the file range [32K, 33K[,

+ since file foo has different data from bar for that file range.

+ 

+ Finally for the remainder of file bar, the send side issues an encoded

+ write since the extent is compressed in the source filesystem, for the

+ file offset 33792 (33K), remaining 31K of data. The receiver will try the

+ encoded write, but that fails with -EINVAL since the offset 33K is not

+ sector size aligned, so it will fallback to decompressing the data and

+ writing it using regular buffered writes. However that results in doing

+ no writes at decompress_and_write() because 'pos' is initialized to the

+ value of 33K (unencoded_offset) and unencoded_file_len is 31K, so the

+ while loop has no iterations.

+ 

+ Another case where we can fallback to decompression plus regular buffered

+ writes is when the destination filesystem has a sector size larger then

+ the sector size of the source filesystem (for example when the source

+ filesystem is on x86_64 with a 4K sector size and the destination

+ filesystem is PowerPC with a 64K sector size). In that scenario encoded

+ write attempts wil fail with -EINVAL due to offsets not being aligned with

+ the sector size of the destination filesystem, and the receive will

+ attempt the fallback of decompressing the buffer and writing the

+ decompressed using regular buffered IO.

+ 

+ Fix this by tracking the number of written bytes instead, and increment

+ it, and the unencoded offset, after each write.

+ 

+ Fixes: d20e759fc917 ("btrfs-progs: receive: encoded_write fallback to explicit decode and write")

+ Signed-off-by: Filipe Manana <fdmanana@suse.com>

+ Reviewed-by: Boris Burkov <boris@bur.io>

+ ---

+  cmds/receive.c | 15 ++++++++-------

+  1 file changed, 8 insertions(+), 7 deletions(-)

+ 

+ diff --git a/cmds/receive.c b/cmds/receive.c

+ index 9f73b072..7756b42c 100644

+ --- a/cmds/receive.c

+ +++ b/cmds/receive.c

+ @@ -1155,10 +1155,9 @@ static int decompress_and_write(struct btrfs_receive *rctx,

+  				u32 compression)

+  {

+  	int ret = 0;

+ -	size_t pos;

+ -	ssize_t w;

+  	char *unencoded_data;

+  	int sector_shift = 0;

+ +	u64 written = 0;

+  

+  	unencoded_data = calloc(unencoded_len, 1);

+  	if (!unencoded_data) {

+ @@ -1209,17 +1208,19 @@ static int decompress_and_write(struct btrfs_receive *rctx,

+  		goto out;

+  	}

+  

+ -	pos = unencoded_offset;

+ -	while (pos < unencoded_file_len) {

+ -		w = pwrite(rctx->write_fd, unencoded_data + pos,

+ -			   unencoded_file_len - pos, offset);

+ +	while (written < unencoded_file_len) {

+ +		ssize_t w;

+ +

+ +		w = pwrite(rctx->write_fd, unencoded_data + unencoded_offset,

+ +			   unencoded_file_len - written, offset);

+  		if (w < 0) {

+  			ret = -errno;

+  			error("writing unencoded data failed: %m");

+  			goto out;

+  		}

+ -		pos += w;

+ +		written += w;

+  		offset += w;

+ +		unencoded_offset += w;

+  	}

+  out:

+  	free(unencoded_data);

+ -- 

+ 2.38.1

+ 

file modified
+7 -1
@@ -1,6 +1,6 @@ 

  Name:           btrfs-progs

  Version:        5.16.2

- Release:        1.1%{?dist}

+ Release:        1.2%{?dist}

  Summary:        Userspace programs for btrfs

  

  License:        GPLv2
@@ -27,6 +27,9 @@ 

  Patch1109:      1109-btrfs-progs-receive-process-setflags-ioctl-commands.patch

  Patch1110:      1110-btrfs-progs-send-stream-v2-ioctl-flags.patch

  Patch1111:      1111-btrfs-progs-receive-add-tests-for-basic-encoded_writ.patch

+ Patch1112:      1112-btrfs-progs-receive-add-debug-messages-when-processi.patch

+ Patch1113:      1113-btrfs-progs-receive-add-debug-messages-when-processi.patch

+ Patch1114:      1114-btrfs-progs-receive-fix-silent-data-loss-after-fall-.patch

  %endif

  

  BuildRequires:  gnupg2
@@ -162,6 +165,9 @@ 

  %{python3_sitearch}/btrfsutil-*.egg-info/

  

  %changelog

+ * Fri Nov 18 2022 Boris Burkov <boris@bur.io> - 5.16.2-1.2

+ - Backport compressed receive backport corruption fix

+ 

  * Mon Apr 04 2022 Omar Sandoval <osandov@osandov.com> - 5.16.2-1.1

  - Adapt for CentOS Hyperscale

  - Add compressed send/receive patches to Facebook build