Blame SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch

29b115
From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001
29b115
From: Hanna Reitz <hreitz@redhat.com>
29b115
Date: Tue, 5 Apr 2022 15:46:51 +0200
29b115
Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm
29b115
29b115
RH-Author: Hanna Reitz <hreitz@redhat.com>
29b115
RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
29b115
RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s)
29b115
RH-Bugzilla: 2072379
29b115
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
29b115
RH-Acked-by: Eric Blake <eblake@redhat.com>
29b115
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
29b115
29b115
One clear problem with how qcow2's refcount structure rebuild algorithm
29b115
used to be before "qcow2: Improve refcount structure rebuilding" was
29b115
that it is prone to failure for qcow2 images on block devices: There is
29b115
generally unused space after the actual image, and if that exceeds what
29b115
one refblock covers, the old algorithm would invariably write the
29b115
reftable past the block device's end, which cannot work.  The new
29b115
algorithm does not have this problem.
29b115
29b115
Test it with three tests:
29b115
(1) Create an image with more empty space at the end than what one
29b115
    refblock covers, see whether rebuilding the refcount structures
29b115
    results in a change in the image file length.  (It should not.)
29b115
29b115
(2) Leave precisely enough space somewhere at the beginning of the image
29b115
    for the new reftable (and the refblock for that place), see whether
29b115
    the new algorithm puts the reftable there.  (It should.)
29b115
29b115
(3) Test the original problem: Create (something like) a block device
29b115
    with a fixed size, then create a qcow2 image in there, write some
29b115
    data, and then have qemu-img check rebuild the refcount structures.
29b115
    Before HEAD^, the reftable would have been written past the image
29b115
    file end, i.e. outside of what the block device provides, which
29b115
    cannot work.  HEAD^ should have fixed that.
29b115
    ("Something like a block device" means a loop device if we can use
29b115
    one ("sudo -n losetup" works), or a FUSE block export with
29b115
    growable=false otherwise.)
29b115
29b115
Reviewed-by: Eric Blake <eblake@redhat.com>
29b115
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
29b115
Message-Id: <20220405134652.19278-3-hreitz@redhat.com>
29b115
(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162)
29b115
29b115
Conflicts:
29b115
- 108: The downstream qemu-storage-daemon does not support --daemonize,
29b115
  so this switch has been replaced by a loop waiting for the PID file to
29b115
  appear
29b115
29b115
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
29b115
---
29b115
 tests/qemu-iotests/108     | 263 ++++++++++++++++++++++++++++++++++++-
29b115
 tests/qemu-iotests/108.out |  81 ++++++++++++
29b115
 2 files changed, 343 insertions(+), 1 deletion(-)
29b115
29b115
diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
29b115
index 56339ab2c5..a3090e2875 100755
29b115
--- a/tests/qemu-iotests/108
29b115
+++ b/tests/qemu-iotests/108
29b115
@@ -30,13 +30,20 @@ status=1	# failure is the default!
29b115
 
29b115
 _cleanup()
29b115
 {
29b115
-	_cleanup_test_img
29b115
+    _cleanup_test_img
29b115
+    if [ -f "$TEST_DIR/qsd.pid" ]; then
29b115
+        qsd_pid=$(cat "$TEST_DIR/qsd.pid")
29b115
+        kill -KILL "$qsd_pid"
29b115
+        fusermount -u "$TEST_DIR/fuse-export" &>/dev/null
29b115
+    fi
29b115
+    rm -f "$TEST_DIR/fuse-export"
29b115
 }
29b115
 trap "_cleanup; exit \$status" 0 1 2 3 15
29b115
 
29b115
 # get standard environment, filters and checks
29b115
 . ./common.rc
29b115
 . ./common.filter
29b115
+. ./common.qemu
29b115
 
29b115
 # This tests qcow2-specific low-level functionality
29b115
 _supported_fmt qcow2
29b115
@@ -47,6 +54,22 @@ _supported_os Linux
29b115
 # files
29b115
 _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file
29b115
 
29b115
+# This test either needs sudo -n losetup or FUSE exports to work
29b115
+if sudo -n losetup &>/dev/null; then
29b115
+    loopdev=true
29b115
+else
29b115
+    loopdev=false
29b115
+
29b115
+    # QSD --export fuse will either yield "Parameter 'id' is missing"
29b115
+    # or "Invalid parameter 'fuse'", depending on whether there is
29b115
+    # FUSE support or not.
29b115
+    error=$($QSD --export fuse 2>&1)
29b115
+    if [[ $error = *"'fuse'"* ]]; then
29b115
+        _notrun 'Passwordless sudo for losetup or FUSE support required, but' \
29b115
+                'neither is available'
29b115
+    fi
29b115
+fi
29b115
+
29b115
 echo
29b115
 echo '=== Repairing an image without any refcount table ==='
29b115
 echo
29b115
@@ -138,6 +161,244 @@ _make_test_img 64M
29b115
 poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00"
29b115
 _check_test_img -r all
29b115
 
29b115
+echo
29b115
+echo '=== Check rebuilt reftable location ==='
29b115
+
29b115
+# In an earlier version of the refcount rebuild algorithm, the
29b115
+# reftable was generally placed at the image end (unless something was
29b115
+# allocated in the area covered by the refblock right before the image
29b115
+# file end, then we would try to place the reftable in that refblock).
29b115
+# This was later changed so the reftable would be placed in the
29b115
+# earliest possible location.  Test this.
29b115
+
29b115
+echo
29b115
+echo '--- Does the image size increase? ---'
29b115
+echo
29b115
+
29b115
+# First test: Just create some image, write some data to it, and
29b115
+# resize it so there is free space at the end of the image (enough
29b115
+# that it spans at least one full refblock, which for cluster_size=512
29b115
+# images, spans 128k).  With the old algorithm, the reftable would
29b115
+# have then been placed at the end of the image file, but with the new
29b115
+# one, it will be put in that free space.
29b115
+# We want to check whether the size of the image file increases due to
29b115
+# rebuilding the refcount structures (it should not).
29b115
+
29b115
+_make_test_img -o 'cluster_size=512' 1M
29b115
+# Write something
29b115
+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
29b115
+
29b115
+# Add free space
29b115
+file_len=$(stat -c '%s' "$TEST_IMG")
29b115
+truncate -s $((file_len + 256 * 1024)) "$TEST_IMG"
29b115
+
29b115
+# Corrupt the image by saying the image header was not allocated
29b115
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
29b115
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
29b115
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
29b115
+
29b115
+# Check whether rebuilding the refcount structures increases the image
29b115
+# file size
29b115
+file_len=$(stat -c '%s' "$TEST_IMG")
29b115
+echo
29b115
+# The only leaks there can be are the old refcount structures that are
29b115
+# leaked during rebuilding, no need to clutter the output with them
29b115
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
29b115
+echo
29b115
+post_repair_file_len=$(stat -c '%s' "$TEST_IMG")
29b115
+
29b115
+if [[ $file_len -eq $post_repair_file_len ]]; then
29b115
+    echo 'OK: Image size did not change'
29b115
+else
29b115
+    echo 'ERROR: Image size differs' \
29b115
+        "($file_len before, $post_repair_file_len after)"
29b115
+fi
29b115
+
29b115
+echo
29b115
+echo '--- Will the reftable occupy a hole specifically left for it?  ---'
29b115
+echo
29b115
+
29b115
+# Note: With cluster_size=512, every refblock covers 128k.
29b115
+# The reftable covers 8M per reftable cluster.
29b115
+
29b115
+# Create an image that requires two reftable clusters (just because
29b115
+# this is more interesting than a single-clustered reftable).
29b115
+_make_test_img -o 'cluster_size=512' 9M
29b115
+$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io
29b115
+
29b115
+# Writing 8M will have resized the reftable.  Unfortunately, doing so
29b115
+# will leave holes in the file, so we need to fill them up so we can
29b115
+# be sure the whole file is allocated.  Do that by writing
29b115
+# consecutively smaller chunks starting from 8 MB, until the file
29b115
+# length increases even with a chunk size of 512.  Then we must have
29b115
+# filled all holes.
29b115
+ofs=$((8 * 1024 * 1024))
29b115
+block_len=$((16 * 1024))
29b115
+while [[ $block_len -ge 512 ]]; do
29b115
+    file_len=$(stat -c '%s' "$TEST_IMG")
29b115
+    while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do
29b115
+        # Do not include this in the reference output, it does not
29b115
+        # really matter which qemu-io calls we do here exactly
29b115
+        $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null
29b115
+        ofs=$((ofs + block_len))
29b115
+    done
29b115
+    block_len=$((block_len / 2))
29b115
+done
29b115
+
29b115
+# Fill up to 9M (do not include this in the reference output either,
29b115
+# $ofs is random for all we know)
29b115
+$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null
29b115
+
29b115
+# Make space as follows:
29b115
+# - For the first refblock: Right at the beginning of the image (this
29b115
+#   refblock is placed in the first place possible),
29b115
+# - For the reftable somewhere soon afterwards, still near the
29b115
+#   beginning of the image (i.e. covered by the first refblock); the
29b115
+#   reftable too is placed in the first place possible, but only after
29b115
+#   all refblocks have been placed)
29b115
+# No space is needed for the other refblocks, because no refblock is
29b115
+# put before the space it covers.  In this test case, we do not mind
29b115
+# if they are placed at the image file's end.
29b115
+
29b115
+# Before we make that space, we have to find out the host offset of
29b115
+# the area that belonged to the two data clusters at guest offset 4k,
29b115
+# because we expect the reftable to be placed there, and we will have
29b115
+# to verify that it is.
29b115
+
29b115
+l1_offset=$(peek_file_be "$TEST_IMG" 40 8)
29b115
+l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8)
29b115
+l2_offset=$((l2_offset & 0x00fffffffffffe00))
29b115
+data_4k_offset=$(peek_file_be "$TEST_IMG" \
29b115
+                 $((l2_offset + 4096 / 512 * 8)) 8)
29b115
+data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00))
29b115
+
29b115
+$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io
29b115
+
29b115
+# Corrupt the image by saying the image header was not allocated
29b115
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
29b115
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
29b115
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
29b115
+
29b115
+echo
29b115
+# The only leaks there can be are the old refcount structures that are
29b115
+# leaked during rebuilding, no need to clutter the output with them
29b115
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
29b115
+echo
29b115
+
29b115
+# Check whether the reftable was put where we expected
29b115
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
29b115
+if [[ $rt_offset -eq $data_4k_offset ]]; then
29b115
+    echo 'OK: Reftable is where we expect it'
29b115
+else
29b115
+    echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset"
29b115
+fi
29b115
+
29b115
+echo
29b115
+echo '--- Rebuilding refcount structures on block devices ---'
29b115
+echo
29b115
+
29b115
+# A block device cannot really grow, at least not during qemu-img
29b115
+# check.  As mentioned in the above cases, rebuilding the refcount
29b115
+# structure may lead to new refcount structures being written after
29b115
+# the end of the image, and in the past that happened even if there
29b115
+# was more than sufficient space in the image.  Such post-EOF writes
29b115
+# will not work on block devices, so test that the new algorithm
29b115
+# avoids it.
29b115
+
29b115
+# If we have passwordless sudo and losetup, we can use those to create
29b115
+# a block device.  Otherwise, we can resort to qemu's FUSE export to
29b115
+# create a file that isn't growable, which effectively tests the same
29b115
+# thing.
29b115
+
29b115
+_cleanup_test_img
29b115
+truncate -s $((64 * 1024 * 1024)) "$TEST_IMG"
29b115
+
29b115
+if $loopdev; then
29b115
+    export_mp=$(sudo -n losetup --show -f "$TEST_IMG")
29b115
+    export_mp_driver=host_device
29b115
+    sudo -n chmod go+rw "$export_mp"
29b115
+else
29b115
+    # Create non-growable FUSE export that is a bit like an empty
29b115
+    # block device
29b115
+    export_mp="$TEST_DIR/fuse-export"
29b115
+    export_mp_driver=file
29b115
+    touch "$export_mp"
29b115
+
29b115
+    $QSD \
29b115
+        --blockdev file,node-name=export-node,filename="$TEST_IMG" \
29b115
+        --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
29b115
+        --pidfile "$TEST_DIR/qsd.pid" \
29b115
+        &
29b115
+
29b115
+    while [ ! -f "$TEST_DIR/qsd.pid" ]; do
29b115
+        sleep 0.1
29b115
+    done
29b115
+fi
29b115
+
29b115
+# Now create a qcow2 image on the device -- unfortunately, qemu-img
29b115
+# create force-creates the file, so we have to resort to the
29b115
+# blockdev-create job.
29b115
+_launch_qemu \
29b115
+    --blockdev $export_mp_driver,node-name=file,filename="$export_mp"
29b115
+
29b115
+_send_qemu_cmd \
29b115
+    $QEMU_HANDLE \
29b115
+    '{ "execute": "qmp_capabilities" }' \
29b115
+    'return'
29b115
+
29b115
+# Small cluster size again, so the image needs multiple refblocks
29b115
+_send_qemu_cmd \
29b115
+    $QEMU_HANDLE \
29b115
+    '{ "execute": "blockdev-create",
29b115
+       "arguments": {
29b115
+           "job-id": "create",
29b115
+           "options": {
29b115
+               "driver": "qcow2",
29b115
+               "file": "file",
29b115
+               "size": '$((64 * 1024 * 1024))',
29b115
+               "cluster-size": 512
29b115
+           } } }' \
29b115
+    '"concluded"'
29b115
+
29b115
+_send_qemu_cmd \
29b115
+    $QEMU_HANDLE \
29b115
+    '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \
29b115
+    'return'
29b115
+
29b115
+_send_qemu_cmd \
29b115
+    $QEMU_HANDLE \
29b115
+    '{ "execute": "quit" }' \
29b115
+    'return'
29b115
+
29b115
+wait=y _cleanup_qemu
29b115
+echo
29b115
+
29b115
+# Write some data
29b115
+$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io
29b115
+
29b115
+# Corrupt the image by saying the image header was not allocated
29b115
+rt_offset=$(peek_file_be "$export_mp" 48 8)
29b115
+rb_offset=$(peek_file_be "$export_mp" $rt_offset 8)
29b115
+poke_file "$export_mp" $rb_offset "\x00\x00"
29b115
+
29b115
+# Repairing such a simple case should just work
29b115
+# (We used to put the reftable at the end of the image file, which can
29b115
+# never work for non-growable devices.)
29b115
+echo
29b115
+TEST_IMG="$export_mp" _check_test_img -r all \
29b115
+    | grep -v '^Repairing cluster.*refcount=1 reference=0'
29b115
+
29b115
+if $loopdev; then
29b115
+    sudo -n losetup -d "$export_mp"
29b115
+else
29b115
+    qsd_pid=$(cat "$TEST_DIR/qsd.pid")
29b115
+    kill -TERM "$qsd_pid"
29b115
+    # Wait for process to exit (cannot `wait` because the QSD is daemonized)
29b115
+    while [ -f "$TEST_DIR/qsd.pid" ]; do
29b115
+        true
29b115
+    done
29b115
+fi
29b115
+
29b115
 # success, all done
29b115
 echo '*** done'
29b115
 rm -f $seq.full
29b115
diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out
29b115
index 75bab8dc84..b5401d788d 100644
29b115
--- a/tests/qemu-iotests/108.out
29b115
+++ b/tests/qemu-iotests/108.out
29b115
@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired:
29b115
     0 leaked clusters
29b115
     1 corruptions
29b115
 
29b115
+Double checking the fixed image now...
29b115
+No errors were found on the image.
29b115
+
29b115
+=== Check rebuilt reftable location ===
29b115
+
29b115
+--- Does the image size increase? ---
29b115
+
29b115
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
29b115
+wrote 65536/65536 bytes at offset 0
29b115
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
29b115
+
29b115
+ERROR cluster 0 refcount=0 reference=1
29b115
+Rebuilding refcount structure
29b115
+The following inconsistencies were found and repaired:
29b115
+
29b115
+    0 leaked clusters
29b115
+    1 corruptions
29b115
+
29b115
+Double checking the fixed image now...
29b115
+No errors were found on the image.
29b115
+
29b115
+OK: Image size did not change
29b115
+
29b115
+--- Will the reftable occupy a hole specifically left for it?  ---
29b115
+
29b115
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184
29b115
+wrote 8388608/8388608 bytes at offset 0
29b115
+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
29b115
+discard 512/512 bytes at offset 0
29b115
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
29b115
+discard 1024/1024 bytes at offset 4096
29b115
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
29b115
+
29b115
+ERROR cluster 0 refcount=0 reference=1
29b115
+Rebuilding refcount structure
29b115
+The following inconsistencies were found and repaired:
29b115
+
29b115
+    0 leaked clusters
29b115
+    1 corruptions
29b115
+
29b115
+Double checking the fixed image now...
29b115
+No errors were found on the image.
29b115
+
29b115
+OK: Reftable is where we expect it
29b115
+
29b115
+--- Rebuilding refcount structures on block devices ---
29b115
+
29b115
+{ "execute": "qmp_capabilities" }
29b115
+{"return": {}}
29b115
+{ "execute": "blockdev-create",
29b115
+       "arguments": {
29b115
+           "job-id": "create",
29b115
+           "options": {
29b115
+               "driver": "IMGFMT",
29b115
+               "file": "file",
29b115
+               "size": 67108864,
29b115
+               "cluster-size": 512
29b115
+           } } }
29b115
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}}
29b115
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}}
29b115
+{"return": {}}
29b115
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}}
29b115
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}}
29b115
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}}
29b115
+{ "execute": "job-dismiss", "arguments": { "id": "create" } }
29b115
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}}
29b115
+{"return": {}}
29b115
+{ "execute": "quit" }
29b115
+{"return": {}}
29b115
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
29b115
+
29b115
+wrote 65536/65536 bytes at offset 0
29b115
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
29b115
+
29b115
+ERROR cluster 0 refcount=0 reference=1
29b115
+Rebuilding refcount structure
29b115
+The following inconsistencies were found and repaired:
29b115
+
29b115
+    0 leaked clusters
29b115
+    1 corruptions
29b115
+
29b115
 Double checking the fixed image now...
29b115
 No errors were found on the image.
29b115
 *** done
29b115
-- 
29b115
2.31.1
29b115