60061b
From b56684f6c1bef4fb5bf87ac5a1106d3830c05ad0 Mon Sep 17 00:00:00 2001
60061b
From: Hanna Reitz <hreitz@redhat.com>
60061b
Date: Fri, 4 Feb 2022 12:10:10 +0100
60061b
Subject: [PATCH 4/6] iotests/281: Test lingering timers
60061b
60061b
RH-Author: Hanna Reitz <hreitz@redhat.com>
60061b
RH-MergeRequest: 117: block/nbd: Handle AioContext changes
60061b
RH-Commit: [4/6] aaad466941637a34224dc037bbea37d128b5676b
60061b
RH-Bugzilla: 2035185
60061b
RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
60061b
RH-Acked-by: Eric Blake <eblake@redhat.com>
60061b
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
60061b
60061b
Prior to "block/nbd: Delete reconnect delay timer when done" and
60061b
"block/nbd: Delete open timer when done", both of those timers would
60061b
remain scheduled even after successfully (re-)connecting to the server,
60061b
and they would not even be deleted when the BDS is deleted.
60061b
60061b
This test constructs exactly this situation:
60061b
(1) Configure an @open-timeout, so the open timer is armed, and
60061b
(2) Configure a @reconnect-delay and trigger a reconnect situation
60061b
    (which succeeds immediately), so the reconnect delay timer is armed.
60061b
Then we immediately delete the BDS, and sleep for longer than the
60061b
@open-timeout and @reconnect-delay.  Prior to said patches, this caused
60061b
one (or both) of the timer CBs to access already-freed data.
60061b
60061b
Accessing freed data may or may not crash, so this test can produce
60061b
false successes, but I do not know how to show the problem in a better
60061b
or more reliable way.  If you run this test on "block/nbd: Assert there
60061b
are no timers when closed" and without the fix patches mentioned above,
60061b
you should reliably see an assertion failure.
60061b
(But all other tests that use the reconnect delay timer (264 and 277)
60061b
will fail in that configuration, too; as will nbd-reconnect-on-open,
60061b
which uses the open timer.)
60061b
60061b
Remove this test from the quick group because of the two second sleep
60061b
this patch introduces.
60061b
60061b
(I decided to put this test case into 281, because the main bug this
60061b
series addresses is in the interaction of the NBD block driver and I/O
60061b
threads, which is precisely the scope of 281.  The test case for that
60061b
other bug will also be put into the test class added here.
60061b
60061b
Also, excuse the test class's name, I couldn't come up with anything
60061b
better.  The "yield" part will make sense two patches from now.)
60061b
60061b
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
60061b
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
60061b
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
60061b
(cherry picked from commit eaf1e85d4ddefdbd197f393fa9c5acc7ba8133b0)
60061b
60061b
Conflict:
60061b
- @open-timeout was introduced after the 6.2 release, and has not been
60061b
  backported.  Consequently, there is no open_timer, and we can (and
60061b
  must) drop the respective parts of the test here.
60061b
60061b
Signed-off-by: Hanna Reitz <hreitz@redhat.com>
60061b
---
60061b
 tests/qemu-iotests/281     | 73 ++++++++++++++++++++++++++++++++++++--
60061b
 tests/qemu-iotests/281.out |  4 +--
60061b
 2 files changed, 73 insertions(+), 4 deletions(-)
60061b
60061b
diff --git a/tests/qemu-iotests/281 b/tests/qemu-iotests/281
60061b
index 956698083f..13c588be75 100755
60061b
--- a/tests/qemu-iotests/281
60061b
+++ b/tests/qemu-iotests/281
60061b
@@ -1,5 +1,5 @@
60061b
 #!/usr/bin/env python3
60061b
-# group: rw quick
60061b
+# group: rw
60061b
 #
60061b
 # Test cases for blockdev + IOThread interactions
60061b
 #
60061b
@@ -20,8 +20,9 @@
60061b
 #
60061b
 
60061b
 import os
60061b
+import time
60061b
 import iotests
60061b
-from iotests import qemu_img
60061b
+from iotests import qemu_img, QemuStorageDaemon
60061b
 
60061b
 image_len = 64 * 1024 * 1024
60061b
 
60061b
@@ -243,6 +244,74 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase):
60061b
         # Hangs on failure, we expect this error.
60061b
         self.assert_qmp(result, 'error/class', 'GenericError')
60061b
 
60061b
+# Test for RHBZ#2033626
60061b
+class TestYieldingAndTimers(iotests.QMPTestCase):
60061b
+    sock = os.path.join(iotests.sock_dir, 'nbd.sock')
60061b
+    qsd = None
60061b
+
60061b
+    def setUp(self):
60061b
+        self.create_nbd_export()
60061b
+
60061b
+        # Simple VM with an NBD block device connected to the NBD export
60061b
+        # provided by the QSD
60061b
+        self.vm = iotests.VM()
60061b
+        self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' +
60061b
+                             f'server.path={self.sock},export=exp,' +
60061b
+                             'reconnect-delay=1')
60061b
+
60061b
+        self.vm.launch()
60061b
+
60061b
+    def tearDown(self):
60061b
+        self.stop_nbd_export()
60061b
+        self.vm.shutdown()
60061b
+
60061b
+    def test_timers_with_blockdev_del(self):
60061b
+        # Stop and restart the NBD server, and do some I/O on the client to
60061b
+        # trigger a reconnect and start the reconnect delay timer
60061b
+        self.stop_nbd_export()
60061b
+        self.create_nbd_export()
60061b
+
60061b
+        result = self.vm.qmp('human-monitor-command',
60061b
+                             command_line='qemu-io nbd "write 0 512"')
60061b
+        self.assert_qmp(result, 'return', '')
60061b
+
60061b
+        # Reconnect is done, so the reconnect delay timer should be gone.
60061b
+        # (But there used to be a bug where it remained active, for which this
60061b
+        # is a regression test.)
60061b
+
60061b
+        # Delete the BDS to see whether the timer is gone.  If it is not,
60061b
+        # it will remain active, fire later, and then access freed data.
60061b
+        # (Or, with "block/nbd: Assert there are no timers when closed"
60061b
+        # applied, the assertion added in that patch will fail.)
60061b
+        result = self.vm.qmp('blockdev-del', node_name='nbd')
60061b
+        self.assert_qmp(result, 'return', {})
60061b
+
60061b
+        # Give the timer some time to fire (it has a timeout of 1 s).
60061b
+        # (Sleeping in an iotest may ring some alarm bells, but note that if
60061b
+        # the timing is off here, the test will just always pass.  If we kill
60061b
+        # the VM too early, then we just kill the timer before it can fire,
60061b
+        # thus not see the error, and so the test will pass.)
60061b
+        time.sleep(2)
60061b
+
60061b
+    def create_nbd_export(self):
60061b
+        assert self.qsd is None
60061b
+
60061b
+        # Simple NBD export of a null-co BDS
60061b
+        self.qsd = QemuStorageDaemon(
60061b
+            '--blockdev',
60061b
+            'null-co,node-name=null,read-zeroes=true',
60061b
+
60061b
+            '--nbd-server',
60061b
+            f'addr.type=unix,addr.path={self.sock}',
60061b
+
60061b
+            '--export',
60061b
+            'nbd,id=exp,node-name=null,name=exp,writable=true'
60061b
+        )
60061b
+
60061b
+    def stop_nbd_export(self):
60061b
+        self.qsd.stop()
60061b
+        self.qsd = None
60061b
+
60061b
 if __name__ == '__main__':
60061b
     iotests.main(supported_fmts=['qcow2'],
60061b
                  supported_protocols=['file'])
60061b
diff --git a/tests/qemu-iotests/281.out b/tests/qemu-iotests/281.out
60061b
index 89968f35d7..914e3737bd 100644
60061b
--- a/tests/qemu-iotests/281.out
60061b
+++ b/tests/qemu-iotests/281.out
60061b
@@ -1,5 +1,5 @@
60061b
-....
60061b
+.....
60061b
 ----------------------------------------------------------------------
60061b
-Ran 4 tests
60061b
+Ran 5 tests
60061b
 
60061b
 OK
60061b
-- 
60061b
2.27.0
60061b