d76c62
From 16ba1d0258765d9c3b5e2da666ed6d4d933e26d9 Mon Sep 17 00:00:00 2001
d76c62
Message-Id: <16ba1d0258765d9c3b5e2da666ed6d4d933e26d9@dist-git>
d76c62
From: Laine Stump <laine@redhat.com>
d76c62
Date: Thu, 30 Jan 2020 14:12:41 -0500
d76c62
Subject: [PATCH] qemu: support interface <teaming> functionality
d76c62
MIME-Version: 1.0
d76c62
Content-Type: text/plain; charset=UTF-8
d76c62
Content-Transfer-Encoding: 8bit
d76c62
d76c62
The QEMU driver uses the 
d76c62
persistent='blah'/> element to setup a "failover" pair of devices -
d76c62
the persistent device must be a virtio emulated NIC, with the only
d76c62
extra configuration being the addition of ",failover=on" to the device
d76c62
commandline, and the transient device must be a hostdev NIC
d76c62
(<interface type='hostdev'> or <interface type='network'> with a
d76c62
network that is a pool of SRIOV VFs) where the extra configuration is
d76c62
the addition of ",failover_pair_id=$aliasOfVirtio" to the device
d76c62
commandline. These new options are supported in QEMU 4.2.0 and later.
d76c62
d76c62
Extra qemu-specific validation is added to ensure that the device
d76c62
type/model is appropriate and that the qemu binary supports these
d76c62
commandline options.
d76c62
d76c62
The result of this will be:
d76c62
d76c62
1) The virtio device presented to the guest will have an extra bit set
d76c62
in its PCI capabilities indicating that it can be used as a failover
d76c62
backup device. The virtio guest driver will need to be equipped to do
d76c62
something with this information - this is included in the Linux
d76c62
virtio-net driver in kernel 4.18 and above (and also backported to
d76c62
some older distro kernels). Unfortunately there is no way for libvirt
d76c62
to learn whether or not the guest driver supports failover - if it
d76c62
doesn't then the extra PCI capability will be ignored and the guest OS
d76c62
will just see two independent devices. (NB: the current virtio guest
d76c62
driver also requires that the MAC addresses of the two NICs match in
d76c62
order to pair them into a bond).
d76c62
d76c62
2) When a migration is requested, QEMu will automatically unplug the
d76c62
transient/hostdev NIC from the guest on the source host before
d76c62
starting migration, and automatically re-plug a similar device after
d76c62
restarting the guest CPUs on the destination host. While the transient
d76c62
NIC is unplugged, all network traffic will go through the
d76c62
persistent/virtio device, but when the hostdev NIC is plugged in, it
d76c62
will get all the traffic. This means that in normal circumstances the
d76c62
guest gets the performance advantage of vfio-assigned "real hardware"
d76c62
networking, but it can still be migrated with the only downside being
d76c62
a performance penalty (due to using an emulated NIC) during the
d76c62
migration.
d76c62
d76c62
Signed-off-by: Laine Stump <laine@redhat.com>
d76c62
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
d76c62
(cherry picked from commit eb9f6cc4b3464707cf689fda9812e5129003bf27)
d76c62
d76c62
https://bugzilla.redhat.com/1693587
d76c62
Signed-off-by: Laine Stump <laine@redhat.com>
d76c62
Message-Id: <20200130191244.24174-4-laine@redhat.com>
d76c62
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>
d76c62
---
d76c62
 src/qemu/qemu_command.c                       |  9 +++++
d76c62
 src/qemu/qemu_domain.c                        | 36 +++++++++++++++--
d76c62
 .../qemuxml2argvdata/net-virtio-teaming.args  | 40 +++++++++++++++++++
d76c62
 tests/qemuxml2argvtest.c                      |  4 ++
d76c62
 4 files changed, 86 insertions(+), 3 deletions(-)
d76c62
 create mode 100644 tests/qemuxml2argvdata/net-virtio-teaming.args
d76c62
d76c62
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
d76c62
index 7a184c229e..d144855b0d 100644
d76c62
--- a/src/qemu/qemu_command.c
d76c62
+++ b/src/qemu/qemu_command.c
d76c62
@@ -3833,6 +3833,8 @@ qemuBuildNicDevStr(virDomainDefPtr def,
d76c62
         }
d76c62
         virBufferAsprintf(&buf, ",host_mtu=%u", net->mtu);
d76c62
     }
d76c62
+    if (usingVirtio && net->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_PERSISTENT)
d76c62
+       virBufferAddLit(&buf, ",failover=on");
d76c62
 
d76c62
     virBufferAsprintf(&buf, ",netdev=host%s", net->info.alias);
d76c62
     virBufferAsprintf(&buf, ",id=%s", net->info.alias);
d76c62
@@ -4704,6 +4706,13 @@ qemuBuildPCIHostdevDevStr(const virDomainDef *def,
d76c62
     if (qemuBuildRomStr(&buf, dev->info) < 0)
d76c62
         return NULL;
d76c62
 
d76c62
+    if (dev->parentnet &&
d76c62
+        dev->parentnet->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_TRANSIENT &&
d76c62
+        dev->parentnet->teaming.persistent) {
d76c62
+        virBufferAsprintf(&buf,  ",failover_pair_id=%s",
d76c62
+                          dev->parentnet->teaming.persistent);
d76c62
+    }
d76c62
+
d76c62
     return virBufferContentAndReset(&buf;;
d76c62
 }
d76c62
 
d76c62
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
d76c62
index 91a9f0481b..e37404340f 100644
d76c62
--- a/src/qemu/qemu_domain.c
d76c62
+++ b/src/qemu/qemu_domain.c
d76c62
@@ -6391,12 +6391,20 @@ qemuDomainValidateActualNetDef(const virDomainNetDef *net,
d76c62
         return -1;
d76c62
     }
d76c62
 
d76c62
+    if (net->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_TRANSIENT &&
d76c62
+        actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV) {
d76c62
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
d76c62
+                       _("interface %s - teaming transient device must be type='hostdev', not '%s'"),
d76c62
+                       macstr, virDomainNetTypeToString(actualType));
d76c62
+        return -1;
d76c62
+    }
d76c62
     return 0;
d76c62
 }
d76c62
 
d76c62
 
d76c62
 static int
d76c62
-qemuDomainDeviceDefValidateNetwork(const virDomainNetDef *net)
d76c62
+qemuDomainDeviceDefValidateNetwork(const virDomainNetDef *net,
d76c62
+                                   virQEMUCapsPtr qemuCaps)
d76c62
 {
d76c62
     bool hasIPv4 = false;
d76c62
     bool hasIPv6 = false;
d76c62
@@ -6481,7 +6489,29 @@ qemuDomainDeviceDefValidateNetwork(const virDomainNetDef *net)
d76c62
         return -1;
d76c62
     }
d76c62
 
d76c62
-    if (net->coalesce && !qemuDomainNetSupportsCoalesce(net->type)) {
d76c62
+    if (net->teaming.type != VIR_DOMAIN_NET_TEAMING_TYPE_NONE &&
d76c62
+        !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_FAILOVER)) {
d76c62
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
d76c62
+                       _("virtio-net failover (teaming) is not supported with this QEMU binary"));
d76c62
+        return -1;
d76c62
+    }
d76c62
+    if (net->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_PERSISTENT
d76c62
+        && !virDomainNetIsVirtioModel(net)) {
d76c62
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
d76c62
+                       _("virtio-net teaming persistent interface must be <model type='virtio'/>, not '%s'"),
d76c62
+                       virDomainNetGetModelString(net));
d76c62
+        return -1;
d76c62
+    }
d76c62
+    if (net->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_TRANSIENT &&
d76c62
+        net->type != VIR_DOMAIN_NET_TYPE_HOSTDEV &&
d76c62
+        net->type != VIR_DOMAIN_NET_TYPE_NETWORK) {
d76c62
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
d76c62
+                       _("virtio-net teaming transient interface must be type='hostdev', not '%s'"),
d76c62
+                       virDomainNetTypeToString(net->type));
d76c62
+        return -1;
d76c62
+    }
d76c62
+
d76c62
+   if (net->coalesce && !qemuDomainNetSupportsCoalesce(net->type)) {
d76c62
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
d76c62
                        _("coalesce settings on interface type %s are not supported"),
d76c62
                        virDomainNetTypeToString(net->type));
d76c62
@@ -8377,7 +8407,7 @@ qemuDomainDeviceDefValidate(const virDomainDeviceDef *dev,
d76c62
 
d76c62
     switch ((virDomainDeviceType)dev->type) {
d76c62
     case VIR_DOMAIN_DEVICE_NET:
d76c62
-        ret = qemuDomainDeviceDefValidateNetwork(dev->data.net);
d76c62
+        ret = qemuDomainDeviceDefValidateNetwork(dev->data.net, qemuCaps);
d76c62
         break;
d76c62
 
d76c62
     case VIR_DOMAIN_DEVICE_CHR:
d76c62
diff --git a/tests/qemuxml2argvdata/net-virtio-teaming.args b/tests/qemuxml2argvdata/net-virtio-teaming.args
d76c62
new file mode 100644
d76c62
index 0000000000..19e7260843
d76c62
--- /dev/null
d76c62
+++ b/tests/qemuxml2argvdata/net-virtio-teaming.args
d76c62
@@ -0,0 +1,40 @@
d76c62
+LC_ALL=C \
d76c62
+PATH=/bin \
d76c62
+HOME=/tmp/lib/domain--1-QEMUGuest1 \
d76c62
+USER=test \
d76c62
+LOGNAME=test \
d76c62
+XDG_DATA_HOME=/tmp/lib/domain--1-QEMUGuest1/.local/share \
d76c62
+XDG_CACHE_HOME=/tmp/lib/domain--1-QEMUGuest1/.cache \
d76c62
+XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \
d76c62
+QEMU_AUDIO_DRV=none \
d76c62
+/usr/bin/qemu-system-i386 \
d76c62
+-name QEMUGuest1 \
d76c62
+-S \
d76c62
+-machine pc,accel=tcg,usb=off,dump-guest-core=off \
d76c62
+-m 214 \
d76c62
+-realtime mlock=off \
d76c62
+-smp 1,sockets=1,cores=1,threads=1 \
d76c62
+-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \
d76c62
+-display none \
d76c62
+-no-user-config \
d76c62
+-nodefaults \
d76c62
+-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\
d76c62
+server,nowait \
d76c62
+-mon chardev=charmonitor,id=monitor,mode=control \
d76c62
+-rtc base=utc \
d76c62
+-no-shutdown \
d76c62
+-no-acpi \
d76c62
+-usb \
d76c62
+-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \
d76c62
+-device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1 \
d76c62
+-netdev user,id=hostua-backup0 \
d76c62
+-device virtio-net-pci,failover=on,netdev=hostua-backup0,id=ua-backup0,\
d76c62
+mac=00:11:22:33:44:55,bus=pci.0,addr=0x3 \
d76c62
+-netdev user,id=hostua-backup1 \
d76c62
+-device virtio-net-pci,failover=on,netdev=hostua-backup1,id=ua-backup1,\
d76c62
+mac=66:44:33:22:11:00,bus=pci.0,addr=0x4 \
d76c62
+-device vfio-pci,host=0000:03:07.1,id=hostdev0,bus=pci.0,addr=0x5,\
d76c62
+failover_pair_id=ua-backup0 \
d76c62
+-device vfio-pci,host=0000:03:07.2,id=hostdev1,bus=pci.0,addr=0x6,\
d76c62
+failover_pair_id=ua-backup1 \
d76c62
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7
d76c62
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
d76c62
index b923590930..4d26fe0b55 100644
d76c62
--- a/tests/qemuxml2argvtest.c
d76c62
+++ b/tests/qemuxml2argvtest.c
d76c62
@@ -1308,6 +1308,10 @@ mymain(void)
d76c62
             QEMU_CAPS_VIRTIO_NET_RX_QUEUE_SIZE,
d76c62
             QEMU_CAPS_VIRTIO_NET_TX_QUEUE_SIZE);
d76c62
     DO_TEST_PARSE_ERROR("net-virtio-rxqueuesize-invalid-size", NONE);
d76c62
+    DO_TEST("net-virtio-teaming",
d76c62
+            QEMU_CAPS_VIRTIO_NET_FAILOVER,
d76c62
+            QEMU_CAPS_DEVICE_VFIO_PCI);
d76c62
+    DO_TEST_PARSE_ERROR("net-virtio-teaming", NONE);
d76c62
     DO_TEST("net-eth", NONE);
d76c62
     DO_TEST("net-eth-ifname", NONE);
d76c62
     DO_TEST("net-eth-names", NONE);
d76c62
-- 
d76c62
2.25.0
d76c62