43fe83
From a8a37a6d612e710f4c63c21e83529e6c9ebd7806 Mon Sep 17 00:00:00 2001
43fe83
Message-Id: <a8a37a6d612e710f4c63c21e83529e6c9ebd7806.1381871412.git.jdenemar@redhat.com>
43fe83
From: Michal Privoznik <mprivozn@redhat.com>
43fe83
Date: Fri, 11 Oct 2013 17:18:10 +0200
43fe83
Subject: [PATCH] qemu_migration: Avoid crashing if domain dies too quickly
43fe83
43fe83
https://bugzilla.redhat.com/show_bug.cgi?id=1018267
43fe83
43fe83
I've noticed a SIGSEGV-ing libvirtd on the destination when the qemu
43fe83
died too quickly = in Prepare phase. What is happening here is:
43fe83
43fe83
1) [Thread 3493] We are in qemuMigrationPrepareAny() and calling
43fe83
qemuProcessStart() which subsequently calls qemuProcessWaitForMonitor()
43fe83
and qemuConnectMonitor(). So far so good. The qemuMonitorOpen()
43fe83
succeeds, however switching monitor to QMP mode fails as qemu died
43fe83
meanwhile. That is qemuMonitorSetCapabilities() returns -1.
43fe83
43fe83
2013-10-08 15:54:10.629+0000: 3493: debug : qemuMonitorSetCapabilities:1356 : mon=0x14a53da0
43fe83
2013-10-08 15:54:10.630+0000: 3493: debug : qemuMonitorJSONCommandWithFd:262 : Send command '{"execute":"qmp_capabilities","id":"libvirt-1"}' for write with FD -1
43fe83
2013-10-08 15:54:10.630+0000: 3493: debug : virEventPollUpdateHandle:147 : EVENT_POLL_UPDATE_HANDLE: watch=17 events=13
43fe83
...
43fe83
2013-10-08 15:54:10.631+0000: 3493: debug : qemuMonitorSend:956 : QEMU_MONITOR_SEND_MSG: mon=0x14a53da0 msg={"execute":"qmp_capabilities","id":"libvirt-1"}
43fe83
 fd=-1
43fe83
2013-10-08 15:54:10.631+0000: 3262: debug : virEventPollRunOnce:641 : Poll got 1 event(s)
43fe83
43fe83
2) [Thread 3262] The event loop is trying to do the talking to monitor.
43fe83
However, qemu is dead already, remember?
43fe83
43fe83
2013-10-08 15:54:13.436+0000: 3262: error : qemuMonitorIORead:551 : Unable to read from monitor: Connection reset by peer
43fe83
2013-10-08 15:54:13.516+0000: 3262: debug : virFileClose:90 : Closed fd 25
43fe83
...
43fe83
2013-10-08 15:54:13.533+0000: 3493: debug : qemuMonitorSend:968 : Send command resulted in error internal error: early end of file from monitor: possible problem:
43fe83
43fe83
3) [Thread 3493] qemuProcessStart() failed. No big deal. Go to the
43fe83
'endjob' label and subsequently to the 'cleanup'. Since the domain is
43fe83
not persistent and ret is -1, the qemuDomainRemoveInactive() is called.
43fe83
This has an (unpleasant) effect of virObjectUnref()-in the @vm object.
43fe83
Unpleasant because the event loop which is about to trigger EOF callback
43fe83
still holds a pointer to the @vm (not the reference). See the valgrind
43fe83
output below.
43fe83
43fe83
4) [Thread 3262] So the event loop starts triggering EOF:
43fe83
43fe83
2013-10-08 15:54:13.542+0000: 3262: debug : qemuMonitorIO:729 : Triggering EOF callback
43fe83
2013-10-08 15:54:13.543+0000: 3262: debug : qemuProcessHandleMonitorEOF:294 : Received EOF on 0x14549110 'migt10'
43fe83
43fe83
And the monitor is cleaned up. This results in calling
43fe83
qemuProcessHandleMonitorEOF with the @vm pointer passed. The pointer is
43fe83
kept in qemuMonitor struct.
43fe83
43fe83
==3262== Thread 1:
43fe83
==3262== Invalid read of size 4
43fe83
==3262==    at 0x77ECCAA: pthread_mutex_lock (in /lib64/libpthread-2.15.so)
43fe83
==3262==    by 0x52FAA06: virMutexLock (virthreadpthread.c:85)
43fe83
==3262==    by 0x52E3891: virObjectLock (virobject.c:320)
43fe83
==3262==    by 0x11626743: qemuProcessHandleMonitorEOF (qemu_process.c:296)
43fe83
==3262==    by 0x11642593: qemuMonitorIO (qemu_monitor.c:730)
43fe83
==3262==    by 0x52BD526: virEventPollDispatchHandles (vireventpoll.c:501)
43fe83
==3262==    by 0x52BDD49: virEventPollRunOnce (vireventpoll.c:648)
43fe83
==3262==    by 0x52BBC68: virEventRunDefaultImpl (virevent.c:274)
43fe83
==3262==    by 0x542D3D9: virNetServerRun (virnetserver.c:1112)
43fe83
==3262==    by 0x11F368: main (libvirtd.c:1513)
43fe83
==3262==  Address 0x14549128 is 24 bytes inside a block of size 136 free'd
43fe83
==3262==    at 0x4C2AF5C: free (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so)
43fe83
==3262==    by 0x529B1FF: virFree (viralloc.c:580)
43fe83
==3262==    by 0x52E3703: virObjectUnref (virobject.c:270)
43fe83
==3262==    by 0x531557E: virDomainObjListRemove (domain_conf.c:2355)
43fe83
==3262==    by 0x1160E899: qemuDomainRemoveInactive (qemu_domain.c:2061)
43fe83
==3262==    by 0x1163A0C6: qemuMigrationPrepareAny (qemu_migration.c:2450)
43fe83
==3262==    by 0x1163A923: qemuMigrationPrepareDirect (qemu_migration.c:2626)
43fe83
==3262==    by 0x11682D71: qemuDomainMigratePrepare3Params (qemu_driver.c:10309)
43fe83
==3262==    by 0x53B0976: virDomainMigratePrepare3Params (libvirt.c:7266)
43fe83
==3262==    by 0x1502D3: remoteDispatchDomainMigratePrepare3Params (remote.c:4797)
43fe83
==3262==    by 0x12DECA: remoteDispatchDomainMigratePrepare3ParamsHelper (remote_dispatch.h:5741)
43fe83
==3262==    by 0x54322EB: virNetServerProgramDispatchCall (virnetserverprogram.c:435)
43fe83
43fe83
The mon->vm is set in qemuMonitorOpenInternal() which is the correct
43fe83
place to increase @vm ref counter. The correct place to decrease the ref
43fe83
counter is then qemuMonitorDispose().
43fe83
43fe83
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
43fe83
(cherry picked from commit c7ac2519b7fe8b4f9c0ecc05cf7e46dea64b5e15)
43fe83
43fe83
Conflicts:
43fe83
	src/qemu/qemu_capabilities.c: Context
43fe83
	src/qemu/qemu_monitor.c: Context
43fe83
    Both due to 809ee6ba not being backported yet.
43fe83
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
43fe83
---
43fe83
 src/qemu/qemu_capabilities.c | 14 ++++++++++----
43fe83
 src/qemu/qemu_monitor.c      |  4 +++-
43fe83
 2 files changed, 13 insertions(+), 5 deletions(-)
43fe83
43fe83
diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
43fe83
index 9b1d9f5..b95a984 100644
43fe83
--- a/src/qemu/qemu_capabilities.c
43fe83
+++ b/src/qemu/qemu_capabilities.c
43fe83
@@ -2482,7 +2482,8 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps,
43fe83
     char *monpath = NULL;
43fe83
     char *pidfile = NULL;
43fe83
     pid_t pid = 0;
43fe83
-    virDomainObj vm;
43fe83
+    virDomainObjPtr vm = NULL;
43fe83
+    virDomainXMLOptionPtr xmlopt = NULL;
43fe83
 
43fe83
     /* the ".sock" sufix is important to avoid a possible clash with a qemu
43fe83
      * domain called "capabilities"
43fe83
@@ -2545,10 +2546,13 @@ virQEMUCapsInitQMP(virQEMUCapsPtr qemuCaps,
43fe83
         goto cleanup;
43fe83
     }
43fe83
 
43fe83
-    memset(&vm, 0, sizeof(vm));
43fe83
-    vm.pid = pid;
43fe83
+    if (!(xmlopt = virDomainXMLOptionNew(NULL, NULL, NULL)) ||
43fe83
+        !(vm = virDomainObjNew(xmlopt)))
43fe83
+        goto cleanup;
43fe83
+
43fe83
+    vm->pid = pid;
43fe83
 
43fe83
-    if (!(mon = qemuMonitorOpen(&vm, &config, true, &callbacks))) {
43fe83
+    if (!(mon = qemuMonitorOpen(vm, &config, true, &callbacks))) {
43fe83
         ret = 0;
43fe83
         goto cleanup;
43fe83
     }
43fe83
@@ -2629,6 +2633,8 @@ cleanup:
43fe83
     VIR_FREE(monarg);
43fe83
     VIR_FREE(monpath);
43fe83
     VIR_FREE(package);
43fe83
+    virObjectUnref(vm);
43fe83
+    virObjectUnref(xmlopt);
43fe83
 
43fe83
     if (pid != 0) {
43fe83
         char ebuf[1024];
43fe83
diff --git a/src/qemu/qemu_monitor.c b/src/qemu/qemu_monitor.c
43fe83
index 5b2fb04..e22a2b2 100644
43fe83
--- a/src/qemu/qemu_monitor.c
43fe83
+++ b/src/qemu/qemu_monitor.c
43fe83
@@ -249,6 +249,8 @@ static void qemuMonitorDispose(void *obj)
43fe83
     VIR_DEBUG("mon=%p", mon);
43fe83
     if (mon->cb && mon->cb->destroy)
43fe83
         (mon->cb->destroy)(mon, mon->vm);
43fe83
+    virObjectUnref(mon->vm);
43fe83
+
43fe83
     virCondDestroy(&mon->notify);
43fe83
     VIR_FREE(mon->buffer);
43fe83
     virJSONValueFree(mon->options);
43fe83
@@ -722,7 +724,7 @@ qemuMonitorOpenInternal(virDomainObjPtr vm,
43fe83
     }
43fe83
     mon->fd = fd;
43fe83
     mon->hasSendFD = hasSendFD;
43fe83
-    mon->vm = vm;
43fe83
+    mon->vm = virObjectRef(vm);
43fe83
     mon->json = json;
43fe83
     if (json)
43fe83
         mon->waitGreeting = true;
43fe83
-- 
43fe83
1.8.3.2
43fe83