|
|
fce154 |
From 3bd625a0c183e6c729b2fcbd934b36de14ce4a2f Mon Sep 17 00:00:00 2001
|
|
|
0e7ea9 |
From: Uri Lublin <uril@redhat.com>
|
|
|
0e7ea9 |
Date: Wed, 16 Jul 2014 17:02:04 +0300
|
|
|
0e7ea9 |
Subject: [PATCH] migration: Don't assert() if MIGRATE_DATA comes before
|
|
|
0e7ea9 |
attaching the agent
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
During seamless migration, after switching host, if a client was connected
|
|
|
0e7ea9 |
during the migration, it will have data to send back to the new
|
|
|
0e7ea9 |
qemu/spice-server instance. This is handled through MIGRATE_DATA messages.
|
|
|
0e7ea9 |
SPICE char devices use such MIGRATE_DATA messages to restore their state.
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
However, the MIGRATE_DATA message can arrive any time after the new qemu
|
|
|
0e7ea9 |
instance has started, this can happen before or after the SPICE char
|
|
|
0e7ea9 |
devices have been created. In order to handle this, if the migrate data
|
|
|
0e7ea9 |
arrives early, it's stored in reds->agent_state.mig_data, and
|
|
|
0e7ea9 |
attach_to_red_agent() will restore the agent state as appropriate.
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
Unfortunately this does not work as expected, for main
|
|
|
0e7ea9 |
channel (agent messages).
|
|
|
0e7ea9 |
If attach_to_red_agent() is called before the MIGRATE_DATA
|
|
|
0e7ea9 |
message reaches the server, all goes well,
|
|
|
0e7ea9 |
but if MIGRATE_DATA reaches the server before
|
|
|
0e7ea9 |
attach_to_red_agent() gets called, then some assert() gets
|
|
|
0e7ea9 |
triggered in spice_char_device_state_restore():
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
((null):32507): Spice-ERROR **: char_device.c:937:spice_char_device_state_restore: assertion `dev->num_clients == 1 && dev->wait_for_migrate_data' failed
|
|
|
0e7ea9 |
Thread 3 (Thread 0x7f406b543700 (LWP 32543)):
|
|
|
0e7ea9 |
Thread 2 (Thread 0x7f40697ff700 (LWP 32586)):
|
|
|
0e7ea9 |
Thread 1 (Thread 0x7f4079b45a40 (LWP 32507)):
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
When restoring state, a client must already be added to the
|
|
|
0e7ea9 |
spice-char-device.
|
|
|
0e7ea9 |
What happens is that a client is not being added to the char-device
|
|
|
0e7ea9 |
when when MIGRATE_DATA arrives first, which leaves both
|
|
|
0e7ea9 |
dev->num_clients and dev->wait_for_migrate_data value at 0.
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
This commit changes the logic in spice_server_char_device_add_interface(),
|
|
|
0e7ea9 |
such that if there is migrate data pending in reds->agent_state.mig_data
|
|
|
0e7ea9 |
but no client was added to the spice-char-device yet,
|
|
|
0e7ea9 |
then first the client is added to the device by calling
|
|
|
0e7ea9 |
spice_char_device_client_add(), and only then the state is restored.
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
=== How to Reproduce
|
|
|
0e7ea9 |
To reproduce, add delays to the migration connection between
|
|
|
0e7ea9 |
qmeu-kvm on the source host (SRC) and on the destination (DST).
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
Specifically I added a man in the middle DLY host between
|
|
|
0e7ea9 |
migration ports from SRC to DST.
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
+-----+ +-----+ +-----+
|
|
|
0e7ea9 |
| SRC |--> | DLY | --> | DST |
|
|
|
0e7ea9 |
+-----+ +-----+ +-----+
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
DLY listens on port P1 (e.g. 4444) and DST listens on port
|
|
|
0e7ea9 |
PINCOMING (e.g. 4444, from qemu-kvm '-incoming' command line option)
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
Precondition: make sure port P1 on DLY is accessible in iptables.
|
|
|
0e7ea9 |
Option 1: use ssh tcp port forwarding
|
|
|
0e7ea9 |
On DLY host run ssh:
|
|
|
0e7ea9 |
ssh DLY:P1:DST:PINCOMING DST
|
|
|
0e7ea9 |
Then use the following migration command (on qemu-kvm monitor):
|
|
|
0e7ea9 |
client_migrate_info spice DST PSPICE
|
|
|
0e7ea9 |
migrate -d tcp:DLY:P1
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
Option 2: Use a simple proxy program that forwards
|
|
|
0e7ea9 |
packets from SRC to DST while adding some delays.
|
|
|
0e7ea9 |
The program runs on DLY, listens to port D1, upon
|
|
|
0e7ea9 |
accept connects to DST:PINCOMING and forward all
|
|
|
0e7ea9 |
packets from DLY:D1 to DST:PINCOMING.
|
|
|
0e7ea9 |
Then use the same migrate command as in option 1:
|
|
|
0e7ea9 |
client_migrate_info spice DST PSPICE
|
|
|
0e7ea9 |
migrate -d tcp:DLY:P1
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
=== How to Reproduce Ends
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
This fixes https://bugzilla.redhat.com/show_bug.cgi?id=1035184
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
Based-on-a-patch-by: Christophe Fergeau <cfergeau@redhat.com>
|
|
|
0e7ea9 |
(cherry picked from commit 2d1c00a659cd1b3998f5d1f90fc5ee6abb7519bb)
|
|
|
0e7ea9 |
---
|
|
|
0e7ea9 |
server/reds.c | 39 ++++++++++++++++++++++++++++-----------
|
|
|
0e7ea9 |
1 file changed, 28 insertions(+), 11 deletions(-)
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
diff --git a/server/reds.c b/server/reds.c
|
|
|
fce154 |
index 464552a..06e9a3e 100644
|
|
|
0e7ea9 |
--- a/server/reds.c
|
|
|
0e7ea9 |
+++ b/server/reds.c
|
|
|
0e7ea9 |
@@ -1373,6 +1373,7 @@ int reds_handle_migrate_data(MainChannelClient *mcc, SpiceMigrateDataMain *mig_d
|
|
|
0e7ea9 |
{
|
|
|
0e7ea9 |
VDIPortState *agent_state = &reds->agent_state;
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
+ spice_debug("main-channel: got migrate data");
|
|
|
0e7ea9 |
/*
|
|
|
0e7ea9 |
* Now that the client has switched to the target server, if main_channel
|
|
|
0e7ea9 |
* controls the mm-time, we update the client's mm-time.
|
|
|
0e7ea9 |
@@ -1394,15 +1395,18 @@ int reds_handle_migrate_data(MainChannelClient *mcc, SpiceMigrateDataMain *mig_d
|
|
|
0e7ea9 |
main_channel_push_agent_disconnected(reds->main_channel);
|
|
|
0e7ea9 |
main_channel_push_agent_connected(reds->main_channel);
|
|
|
0e7ea9 |
} else {
|
|
|
0e7ea9 |
+ spice_debug("restoring state from mig_data");
|
|
|
0e7ea9 |
return reds_agent_state_restore(mig_data);
|
|
|
0e7ea9 |
}
|
|
|
0e7ea9 |
}
|
|
|
0e7ea9 |
} else {
|
|
|
0e7ea9 |
/* restore agent starte when the agent gets attached */
|
|
|
0e7ea9 |
+ spice_debug("saving mig_data");
|
|
|
0e7ea9 |
spice_assert(agent_state->plug_generation == 0);
|
|
|
0e7ea9 |
agent_state->mig_data = spice_memdup(mig_data, size);
|
|
|
0e7ea9 |
}
|
|
|
0e7ea9 |
} else {
|
|
|
0e7ea9 |
+ spice_debug("agent was not attached on the source host");
|
|
|
0e7ea9 |
if (vdagent) {
|
|
|
0e7ea9 |
/* spice_char_device_client_remove disables waiting for migration data */
|
|
|
0e7ea9 |
spice_char_device_client_remove(agent_state->base,
|
|
|
fce154 |
@@ -3589,17 +3593,15 @@ static SpiceCharDeviceState *attach_to_red_agent(SpiceCharDeviceInstance *sin)
|
|
|
0e7ea9 |
state->read_filter.discard_all = FALSE;
|
|
|
0e7ea9 |
reds->agent_state.plug_generation++;
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
- if (reds->agent_state.mig_data) {
|
|
|
0e7ea9 |
- spice_assert(reds->agent_state.plug_generation == 1);
|
|
|
0e7ea9 |
- reds_agent_state_restore(reds->agent_state.mig_data);
|
|
|
0e7ea9 |
- free(reds->agent_state.mig_data);
|
|
|
0e7ea9 |
- reds->agent_state.mig_data = NULL;
|
|
|
0e7ea9 |
- } else if (!red_channel_waits_for_migrate_data(&reds->main_channel->base)) {
|
|
|
0e7ea9 |
- /* we will assoicate the client with the char device, upon reds_on_main_agent_start,
|
|
|
0e7ea9 |
- * in response to MSGC_AGENT_START */
|
|
|
0e7ea9 |
- main_channel_push_agent_connected(reds->main_channel);
|
|
|
0e7ea9 |
- } else {
|
|
|
0e7ea9 |
- spice_debug("waiting for migration data");
|
|
|
0e7ea9 |
+ if (reds->agent_state.mig_data ||
|
|
|
0e7ea9 |
+ red_channel_waits_for_migrate_data(&reds->main_channel->base)) {
|
|
|
0e7ea9 |
+ /* Migration in progress (code is running on the destination host):
|
|
|
0e7ea9 |
+ * 1. Add the client to spice char device, if it was not already added.
|
|
|
0e7ea9 |
+ * 2.a If this (qemu-kvm state load side of migration) happens first
|
|
|
0e7ea9 |
+ * then wait for spice migration data to arrive. Otherwise
|
|
|
0e7ea9 |
+ * 2.b If this happens second ==> we already have spice migrate data
|
|
|
0e7ea9 |
+ * then restore state
|
|
|
0e7ea9 |
+ */
|
|
|
0e7ea9 |
if (!spice_char_device_client_exists(reds->agent_state.base, reds_get_client())) {
|
|
|
0e7ea9 |
int client_added;
|
|
|
0e7ea9 |
|
|
|
fce154 |
@@ -3615,9 +3617,24 @@ static SpiceCharDeviceState *attach_to_red_agent(SpiceCharDeviceInstance *sin)
|
|
|
0e7ea9 |
spice_warning("failed to add client to agent");
|
|
|
0e7ea9 |
reds_disconnect();
|
|
|
0e7ea9 |
}
|
|
|
0e7ea9 |
+ }
|
|
|
0e7ea9 |
|
|
|
0e7ea9 |
+ if (reds->agent_state.mig_data) {
|
|
|
0e7ea9 |
+ spice_debug("restoring state from stored migration data");
|
|
|
0e7ea9 |
+ spice_assert(reds->agent_state.plug_generation == 1);
|
|
|
0e7ea9 |
+ reds_agent_state_restore(reds->agent_state.mig_data);
|
|
|
0e7ea9 |
+ free(reds->agent_state.mig_data);
|
|
|
0e7ea9 |
+ reds->agent_state.mig_data = NULL;
|
|
|
0e7ea9 |
}
|
|
|
0e7ea9 |
+ else {
|
|
|
0e7ea9 |
+ spice_debug("waiting for migration data");
|
|
|
0e7ea9 |
+ }
|
|
|
0e7ea9 |
+ } else {
|
|
|
0e7ea9 |
+ /* we will associate the client with the char device, upon reds_on_main_agent_start,
|
|
|
0e7ea9 |
+ * in response to MSGC_AGENT_START */
|
|
|
0e7ea9 |
+ main_channel_push_agent_connected(reds->main_channel);
|
|
|
0e7ea9 |
}
|
|
|
0e7ea9 |
+
|
|
|
0e7ea9 |
return state->base;
|
|
|
0e7ea9 |
}
|
|
|
0e7ea9 |
|