a41c76
From 16ba1d0258765d9c3b5e2da666ed6d4d933e26d9 Mon Sep 17 00:00:00 2001
a41c76
Message-Id: <16ba1d0258765d9c3b5e2da666ed6d4d933e26d9@dist-git>
a41c76
From: Laine Stump <laine@redhat.com>
a41c76
Date: Thu, 30 Jan 2020 14:12:41 -0500
a41c76
Subject: [PATCH] qemu: support interface <teaming> functionality
a41c76
MIME-Version: 1.0
a41c76
Content-Type: text/plain; charset=UTF-8
a41c76
Content-Transfer-Encoding: 8bit
a41c76
a41c76
The QEMU driver uses the 
a41c76
persistent='blah'/> element to setup a "failover" pair of devices -
a41c76
the persistent device must be a virtio emulated NIC, with the only
a41c76
extra configuration being the addition of ",failover=on" to the device
a41c76
commandline, and the transient device must be a hostdev NIC
a41c76
(<interface type='hostdev'> or <interface type='network'> with a
a41c76
network that is a pool of SRIOV VFs) where the extra configuration is
a41c76
the addition of ",failover_pair_id=$aliasOfVirtio" to the device
a41c76
commandline. These new options are supported in QEMU 4.2.0 and later.
a41c76
a41c76
Extra qemu-specific validation is added to ensure that the device
a41c76
type/model is appropriate and that the qemu binary supports these
a41c76
commandline options.
a41c76
a41c76
The result of this will be:
a41c76
a41c76
1) The virtio device presented to the guest will have an extra bit set
a41c76
in its PCI capabilities indicating that it can be used as a failover
a41c76
backup device. The virtio guest driver will need to be equipped to do
a41c76
something with this information - this is included in the Linux
a41c76
virtio-net driver in kernel 4.18 and above (and also backported to
a41c76
some older distro kernels). Unfortunately there is no way for libvirt
a41c76
to learn whether or not the guest driver supports failover - if it
a41c76
doesn't then the extra PCI capability will be ignored and the guest OS
a41c76
will just see two independent devices. (NB: the current virtio guest
a41c76
driver also requires that the MAC addresses of the two NICs match in
a41c76
order to pair them into a bond).
a41c76
a41c76
2) When a migration is requested, QEMu will automatically unplug the
a41c76
transient/hostdev NIC from the guest on the source host before
a41c76
starting migration, and automatically re-plug a similar device after
a41c76
restarting the guest CPUs on the destination host. While the transient
a41c76
NIC is unplugged, all network traffic will go through the
a41c76
persistent/virtio device, but when the hostdev NIC is plugged in, it
a41c76
will get all the traffic. This means that in normal circumstances the
a41c76
guest gets the performance advantage of vfio-assigned "real hardware"
a41c76
networking, but it can still be migrated with the only downside being
a41c76
a performance penalty (due to using an emulated NIC) during the
a41c76
migration.
a41c76
a41c76
Signed-off-by: Laine Stump <laine@redhat.com>
a41c76
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
a41c76
(cherry picked from commit eb9f6cc4b3464707cf689fda9812e5129003bf27)
a41c76
a41c76
https://bugzilla.redhat.com/1693587
a41c76
Signed-off-by: Laine Stump <laine@redhat.com>
a41c76
Message-Id: <20200130191244.24174-4-laine@redhat.com>
a41c76
Reviewed-by: Jiri Denemark <jdenemar@redhat.com>
a41c76
---
a41c76
 src/qemu/qemu_command.c                       |  9 +++++
a41c76
 src/qemu/qemu_domain.c                        | 36 +++++++++++++++--
a41c76
 .../qemuxml2argvdata/net-virtio-teaming.args  | 40 +++++++++++++++++++
a41c76
 tests/qemuxml2argvtest.c                      |  4 ++
a41c76
 4 files changed, 86 insertions(+), 3 deletions(-)
a41c76
 create mode 100644 tests/qemuxml2argvdata/net-virtio-teaming.args
a41c76
a41c76
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
a41c76
index 7a184c229e..d144855b0d 100644
a41c76
--- a/src/qemu/qemu_command.c
a41c76
+++ b/src/qemu/qemu_command.c
a41c76
@@ -3833,6 +3833,8 @@ qemuBuildNicDevStr(virDomainDefPtr def,
a41c76
         }
a41c76
         virBufferAsprintf(&buf, ",host_mtu=%u", net->mtu);
a41c76
     }
a41c76
+    if (usingVirtio && net->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_PERSISTENT)
a41c76
+       virBufferAddLit(&buf, ",failover=on");
a41c76
 
a41c76
     virBufferAsprintf(&buf, ",netdev=host%s", net->info.alias);
a41c76
     virBufferAsprintf(&buf, ",id=%s", net->info.alias);
a41c76
@@ -4704,6 +4706,13 @@ qemuBuildPCIHostdevDevStr(const virDomainDef *def,
a41c76
     if (qemuBuildRomStr(&buf, dev->info) < 0)
a41c76
         return NULL;
a41c76
 
a41c76
+    if (dev->parentnet &&
a41c76
+        dev->parentnet->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_TRANSIENT &&
a41c76
+        dev->parentnet->teaming.persistent) {
a41c76
+        virBufferAsprintf(&buf,  ",failover_pair_id=%s",
a41c76
+                          dev->parentnet->teaming.persistent);
a41c76
+    }
a41c76
+
a41c76
     return virBufferContentAndReset(&buf;;
a41c76
 }
a41c76
 
a41c76
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
a41c76
index 91a9f0481b..e37404340f 100644
a41c76
--- a/src/qemu/qemu_domain.c
a41c76
+++ b/src/qemu/qemu_domain.c
a41c76
@@ -6391,12 +6391,20 @@ qemuDomainValidateActualNetDef(const virDomainNetDef *net,
a41c76
         return -1;
a41c76
     }
a41c76
 
a41c76
+    if (net->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_TRANSIENT &&
a41c76
+        actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV) {
a41c76
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
a41c76
+                       _("interface %s - teaming transient device must be type='hostdev', not '%s'"),
a41c76
+                       macstr, virDomainNetTypeToString(actualType));
a41c76
+        return -1;
a41c76
+    }
a41c76
     return 0;
a41c76
 }
a41c76
 
a41c76
 
a41c76
 static int
a41c76
-qemuDomainDeviceDefValidateNetwork(const virDomainNetDef *net)
a41c76
+qemuDomainDeviceDefValidateNetwork(const virDomainNetDef *net,
a41c76
+                                   virQEMUCapsPtr qemuCaps)
a41c76
 {
a41c76
     bool hasIPv4 = false;
a41c76
     bool hasIPv6 = false;
a41c76
@@ -6481,7 +6489,29 @@ qemuDomainDeviceDefValidateNetwork(const virDomainNetDef *net)
a41c76
         return -1;
a41c76
     }
a41c76
 
a41c76
-    if (net->coalesce && !qemuDomainNetSupportsCoalesce(net->type)) {
a41c76
+    if (net->teaming.type != VIR_DOMAIN_NET_TEAMING_TYPE_NONE &&
a41c76
+        !virQEMUCapsGet(qemuCaps, QEMU_CAPS_VIRTIO_NET_FAILOVER)) {
a41c76
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
a41c76
+                       _("virtio-net failover (teaming) is not supported with this QEMU binary"));
a41c76
+        return -1;
a41c76
+    }
a41c76
+    if (net->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_PERSISTENT
a41c76
+        && !virDomainNetIsVirtioModel(net)) {
a41c76
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
a41c76
+                       _("virtio-net teaming persistent interface must be <model type='virtio'/>, not '%s'"),
a41c76
+                       virDomainNetGetModelString(net));
a41c76
+        return -1;
a41c76
+    }
a41c76
+    if (net->teaming.type == VIR_DOMAIN_NET_TEAMING_TYPE_TRANSIENT &&
a41c76
+        net->type != VIR_DOMAIN_NET_TYPE_HOSTDEV &&
a41c76
+        net->type != VIR_DOMAIN_NET_TYPE_NETWORK) {
a41c76
+        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
a41c76
+                       _("virtio-net teaming transient interface must be type='hostdev', not '%s'"),
a41c76
+                       virDomainNetTypeToString(net->type));
a41c76
+        return -1;
a41c76
+    }
a41c76
+
a41c76
+   if (net->coalesce && !qemuDomainNetSupportsCoalesce(net->type)) {
a41c76
         virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
a41c76
                        _("coalesce settings on interface type %s are not supported"),
a41c76
                        virDomainNetTypeToString(net->type));
a41c76
@@ -8377,7 +8407,7 @@ qemuDomainDeviceDefValidate(const virDomainDeviceDef *dev,
a41c76
 
a41c76
     switch ((virDomainDeviceType)dev->type) {
a41c76
     case VIR_DOMAIN_DEVICE_NET:
a41c76
-        ret = qemuDomainDeviceDefValidateNetwork(dev->data.net);
a41c76
+        ret = qemuDomainDeviceDefValidateNetwork(dev->data.net, qemuCaps);
a41c76
         break;
a41c76
 
a41c76
     case VIR_DOMAIN_DEVICE_CHR:
a41c76
diff --git a/tests/qemuxml2argvdata/net-virtio-teaming.args b/tests/qemuxml2argvdata/net-virtio-teaming.args
a41c76
new file mode 100644
a41c76
index 0000000000..19e7260843
a41c76
--- /dev/null
a41c76
+++ b/tests/qemuxml2argvdata/net-virtio-teaming.args
a41c76
@@ -0,0 +1,40 @@
a41c76
+LC_ALL=C \
a41c76
+PATH=/bin \
a41c76
+HOME=/tmp/lib/domain--1-QEMUGuest1 \
a41c76
+USER=test \
a41c76
+LOGNAME=test \
a41c76
+XDG_DATA_HOME=/tmp/lib/domain--1-QEMUGuest1/.local/share \
a41c76
+XDG_CACHE_HOME=/tmp/lib/domain--1-QEMUGuest1/.cache \
a41c76
+XDG_CONFIG_HOME=/tmp/lib/domain--1-QEMUGuest1/.config \
a41c76
+QEMU_AUDIO_DRV=none \
a41c76
+/usr/bin/qemu-system-i386 \
a41c76
+-name QEMUGuest1 \
a41c76
+-S \
a41c76
+-machine pc,accel=tcg,usb=off,dump-guest-core=off \
a41c76
+-m 214 \
a41c76
+-realtime mlock=off \
a41c76
+-smp 1,sockets=1,cores=1,threads=1 \
a41c76
+-uuid c7a5fdbd-edaf-9455-926a-d65c16db1809 \
a41c76
+-display none \
a41c76
+-no-user-config \
a41c76
+-nodefaults \
a41c76
+-chardev socket,id=charmonitor,path=/tmp/lib/domain--1-QEMUGuest1/monitor.sock,\
a41c76
+server,nowait \
a41c76
+-mon chardev=charmonitor,id=monitor,mode=control \
a41c76
+-rtc base=utc \
a41c76
+-no-shutdown \
a41c76
+-no-acpi \
a41c76
+-usb \
a41c76
+-drive file=/dev/HostVG/QEMUGuest1,format=raw,if=none,id=drive-ide0-0-0 \
a41c76
+-device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1 \
a41c76
+-netdev user,id=hostua-backup0 \
a41c76
+-device virtio-net-pci,failover=on,netdev=hostua-backup0,id=ua-backup0,\
a41c76
+mac=00:11:22:33:44:55,bus=pci.0,addr=0x3 \
a41c76
+-netdev user,id=hostua-backup1 \
a41c76
+-device virtio-net-pci,failover=on,netdev=hostua-backup1,id=ua-backup1,\
a41c76
+mac=66:44:33:22:11:00,bus=pci.0,addr=0x4 \
a41c76
+-device vfio-pci,host=0000:03:07.1,id=hostdev0,bus=pci.0,addr=0x5,\
a41c76
+failover_pair_id=ua-backup0 \
a41c76
+-device vfio-pci,host=0000:03:07.2,id=hostdev1,bus=pci.0,addr=0x6,\
a41c76
+failover_pair_id=ua-backup1 \
a41c76
+-device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x7
a41c76
diff --git a/tests/qemuxml2argvtest.c b/tests/qemuxml2argvtest.c
a41c76
index b923590930..4d26fe0b55 100644
a41c76
--- a/tests/qemuxml2argvtest.c
a41c76
+++ b/tests/qemuxml2argvtest.c
a41c76
@@ -1308,6 +1308,10 @@ mymain(void)
a41c76
             QEMU_CAPS_VIRTIO_NET_RX_QUEUE_SIZE,
a41c76
             QEMU_CAPS_VIRTIO_NET_TX_QUEUE_SIZE);
a41c76
     DO_TEST_PARSE_ERROR("net-virtio-rxqueuesize-invalid-size", NONE);
a41c76
+    DO_TEST("net-virtio-teaming",
a41c76
+            QEMU_CAPS_VIRTIO_NET_FAILOVER,
a41c76
+            QEMU_CAPS_DEVICE_VFIO_PCI);
a41c76
+    DO_TEST_PARSE_ERROR("net-virtio-teaming", NONE);
a41c76
     DO_TEST("net-eth", NONE);
a41c76
     DO_TEST("net-eth-ifname", NONE);
a41c76
     DO_TEST("net-eth-names", NONE);
a41c76
-- 
a41c76
2.25.0
a41c76