|
|
357786 |
From f94e1c832c510a3ca1b8bf0d383e7be2dce4909c Mon Sep 17 00:00:00 2001
|
|
|
357786 |
From: Markus Armbruster <armbru@redhat.com>
|
|
|
357786 |
Date: Wed, 9 May 2018 14:42:21 +0200
|
|
|
357786 |
Subject: [PATCH 13/13] cpus: Fix event order on resume of stopped guest
|
|
|
357786 |
|
|
|
357786 |
RH-Author: Markus Armbruster <armbru@redhat.com>
|
|
|
357786 |
Message-id: <20180509144221.14799-2-armbru@redhat.com>
|
|
|
357786 |
Patchwork-id: 80191
|
|
|
357786 |
O-Subject: [RHEL-7.6 qemu-kvm-rhev PATCH 1/1] cpus: Fix event order on resume of stopped guest
|
|
|
357786 |
Bugzilla: 1566153
|
|
|
357786 |
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
|
357786 |
RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
|
|
|
357786 |
RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
|
|
|
357786 |
|
|
|
357786 |
When resume of a stopped guest immediately runs into block device
|
|
|
357786 |
errors, the BLOCK_IO_ERROR event is sent before the RESUME event.
|
|
|
357786 |
|
|
|
357786 |
Reproducer:
|
|
|
357786 |
|
|
|
357786 |
1. Create a scratch image
|
|
|
357786 |
$ dd if=/dev/zero of=scratch.img bs=1M count=100
|
|
|
357786 |
|
|
|
357786 |
Size doesn't actually matter.
|
|
|
357786 |
|
|
|
357786 |
2. Prepare blkdebug configuration:
|
|
|
357786 |
|
|
|
357786 |
$ cat >blkdebug.conf <
|
|
|
357786 |
[inject-error]
|
|
|
357786 |
event = "write_aio"
|
|
|
357786 |
errno = "5"
|
|
|
357786 |
EOF
|
|
|
357786 |
|
|
|
357786 |
Note that errno 5 is EIO.
|
|
|
357786 |
|
|
|
357786 |
3. Run a guest with an additional scratch disk, i.e. with additional
|
|
|
357786 |
arguments
|
|
|
357786 |
-drive if=none,id=scratch-drive,format=raw,werror=stop,file=blkdebug:blkdebug.conf:scratch.img
|
|
|
357786 |
-device virtio-blk-pci,id=scratch,drive=scratch-drive
|
|
|
357786 |
|
|
|
357786 |
The blkdebug part makes all writes to the scratch drive fail with
|
|
|
357786 |
EIO. The werror=stop pauses the guest on write errors.
|
|
|
357786 |
|
|
|
357786 |
4. Connect to the QMP socket e.g. like this:
|
|
|
357786 |
$ socat UNIX:/your/qmp/socket READLINE,history=$HOME/.qmp_history,prompt='QMP> '
|
|
|
357786 |
|
|
|
357786 |
Issue QMP command 'qmp_capabilities':
|
|
|
357786 |
QMP> { "execute": "qmp_capabilities" }
|
|
|
357786 |
|
|
|
357786 |
5. Boot the guest.
|
|
|
357786 |
|
|
|
357786 |
6. In the guest, write to the scratch disk, e.g. like this:
|
|
|
357786 |
|
|
|
357786 |
# dd if=/dev/zero of=/dev/vdb count=1
|
|
|
357786 |
|
|
|
357786 |
Do double-check the device specified with of= is actually the
|
|
|
357786 |
scratch device!
|
|
|
357786 |
|
|
|
357786 |
7. Issue QMP command 'cont':
|
|
|
357786 |
QMP> { "execute": "cont" }
|
|
|
357786 |
|
|
|
357786 |
After step 6, I get a BLOCK_IO_ERROR event followed by a STOP event. Good.
|
|
|
357786 |
|
|
|
357786 |
After step 7, I get BLOCK_IO_ERROR, then RESUME, then STOP. Not so
|
|
|
357786 |
good; I'd expect RESUME, then BLOCK_IO_ERROR, then STOP.
|
|
|
357786 |
|
|
|
357786 |
The funny event order confuses libvirt: virsh -r domstate DOMAIN
|
|
|
357786 |
--reason reports "paused (unknown)" rather than "paused (I/O error)".
|
|
|
357786 |
|
|
|
357786 |
The culprit is vm_prepare_start().
|
|
|
357786 |
|
|
|
357786 |
/* Ensure that a STOP/RESUME pair of events is emitted if a
|
|
|
357786 |
* vmstop request was pending. The BLOCK_IO_ERROR event, for
|
|
|
357786 |
* example, according to documentation is always followed by
|
|
|
357786 |
* the STOP event.
|
|
|
357786 |
*/
|
|
|
357786 |
if (runstate_is_running()) {
|
|
|
357786 |
qapi_event_send_stop(&error_abort);
|
|
|
357786 |
res = -1;
|
|
|
357786 |
} else {
|
|
|
357786 |
replay_enable_events();
|
|
|
357786 |
cpu_enable_ticks();
|
|
|
357786 |
runstate_set(RUN_STATE_RUNNING);
|
|
|
357786 |
vm_state_notify(1, RUN_STATE_RUNNING);
|
|
|
357786 |
}
|
|
|
357786 |
|
|
|
357786 |
/* We are sending this now, but the CPUs will be resumed shortly later */
|
|
|
357786 |
qapi_event_send_resume(&error_abort);
|
|
|
357786 |
return res;
|
|
|
357786 |
|
|
|
357786 |
When resuming a stopped guest, we take the else branch before we get
|
|
|
357786 |
to sending RESUME. vm_state_notify() runs virtio_vmstate_change(),
|
|
|
357786 |
among other things. This restarts I/O, triggering the BLOCK_IO_ERROR
|
|
|
357786 |
event.
|
|
|
357786 |
|
|
|
357786 |
Reshuffle vm_prepare_start() to send the RESUME event earlier.
|
|
|
357786 |
|
|
|
357786 |
Fixes RHBZ 1566153.
|
|
|
357786 |
|
|
|
357786 |
Cc: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
357786 |
Signed-off-by: Markus Armbruster <armbru@redhat.com>
|
|
|
357786 |
Message-Id: <20180423084518.2426-1-armbru@redhat.com>
|
|
|
357786 |
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
|
357786 |
(cherry picked from commit f056158d694d2adc63ff120ca71c73ae8b14426c)
|
|
|
357786 |
Signed-off-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
|
357786 |
---
|
|
|
357786 |
cpus.c | 16 ++++++++--------
|
|
|
357786 |
1 file changed, 8 insertions(+), 8 deletions(-)
|
|
|
357786 |
|
|
|
357786 |
diff --git a/cpus.c b/cpus.c
|
|
|
357786 |
index 38eba8b..398392b 100644
|
|
|
357786 |
--- a/cpus.c
|
|
|
357786 |
+++ b/cpus.c
|
|
|
357786 |
@@ -2043,7 +2043,6 @@ int vm_stop(RunState state)
|
|
|
357786 |
int vm_prepare_start(void)
|
|
|
357786 |
{
|
|
|
357786 |
RunState requested;
|
|
|
357786 |
- int res = 0;
|
|
|
357786 |
|
|
|
357786 |
qemu_vmstop_requested(&requested);
|
|
|
357786 |
if (runstate_is_running() && requested == RUN_STATE__MAX) {
|
|
|
357786 |
@@ -2057,17 +2056,18 @@ int vm_prepare_start(void)
|
|
|
357786 |
*/
|
|
|
357786 |
if (runstate_is_running()) {
|
|
|
357786 |
qapi_event_send_stop(&error_abort);
|
|
|
357786 |
- res = -1;
|
|
|
357786 |
- } else {
|
|
|
357786 |
- replay_enable_events();
|
|
|
357786 |
- cpu_enable_ticks();
|
|
|
357786 |
- runstate_set(RUN_STATE_RUNNING);
|
|
|
357786 |
- vm_state_notify(1, RUN_STATE_RUNNING);
|
|
|
357786 |
+ qapi_event_send_resume(&error_abort);
|
|
|
357786 |
+ return -1;
|
|
|
357786 |
}
|
|
|
357786 |
|
|
|
357786 |
/* We are sending this now, but the CPUs will be resumed shortly later */
|
|
|
357786 |
qapi_event_send_resume(&error_abort);
|
|
|
357786 |
- return res;
|
|
|
357786 |
+
|
|
|
357786 |
+ replay_enable_events();
|
|
|
357786 |
+ cpu_enable_ticks();
|
|
|
357786 |
+ runstate_set(RUN_STATE_RUNNING);
|
|
|
357786 |
+ vm_state_notify(1, RUN_STATE_RUNNING);
|
|
|
357786 |
+ return 0;
|
|
|
357786 |
}
|
|
|
357786 |
|
|
|
357786 |
void vm_start(void)
|
|
|
357786 |
--
|
|
|
357786 |
1.8.3.1
|
|
|
357786 |
|