9119d9
From f3a81fae20b792b7905a13514aa072678929460a Mon Sep 17 00:00:00 2001
9119d9
Message-Id: <f3a81fae20b792b7905a13514aa072678929460a@dist-git>
9119d9
From: Laine Stump <laine@laine.org>
9119d9
Date: Mon, 15 Dec 2014 10:51:27 -0500
9119d9
Subject: [PATCH] conf: new network bridge device attribute macTableManager
9119d9
9119d9
This is part of the fix for:
9119d9
9119d9
  https://bugzilla.redhat.com/show_bug.cgi?id=1099210
9119d9
9119d9
The macTableManager attribute of a network's bridge subelement tells
9119d9
libvirt how the bridge's MAC address table (used to determine the
9119d9
egress port for packets) is managed. In the default mode, "kernel",
9119d9
management is left to the kernel, which usually determines entries in
9119d9
part by turning on promiscuous mode on all ports of the bridge,
9119d9
flooding packets to all ports when the correct destination is unknown,
9119d9
and adding/removing entries to the fdb as it sees incoming traffic
9119d9
from particular MAC addresses.  In "libvirt" mode, libvirt turns off
9119d9
learning and flooding on all the bridge ports connected to guest
9119d9
domain interfaces, and adds/removes entries according to the MAC
9119d9
addresses in the domain interface configurations. A side effect of
9119d9
turning off learning and unicast_flood on the ports of a bridge is
9119d9
that (with Linux kernel 3.17 and newer), the kernel can automatically
9119d9
turn off promiscuous mode on one or more of the bridge's ports
9119d9
(usually only the one interface that is used to connect the bridge to
9119d9
the physical network). The result is better performance (because
9119d9
packets aren't being flooded to all ports, and can be dropped earlier
9119d9
when they are of no interest) and slightly better security (a guest
9119d9
can still send out packets with a spoofed source MAC address, but will
9119d9
only receive traffic intended for the guest interface's configured MAC
9119d9
address).
9119d9
9119d9
The attribute looks like this in the configuration:
9119d9
9119d9
  <network>
9119d9
    <name>test</name>
9119d9
    <bridge name='br0' macTableManager='libvirt'/>
9119d9
    ...
9119d9
9119d9
This patch only adds the config knob, documentation, and test
9119d9
cases. The functionality behind this knob is added in later patches.
9119d9
9119d9
(cherry picked from commit 40961978ee1f498f3a87baf158c7392e1ba48489)
9119d9
9119d9
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
9119d9
---
9119d9
 docs/formatnetwork.html.in                         | 50 ++++++++++++++++++---
9119d9
 docs/schemas/network.rng                           |  9 ++++
9119d9
 src/conf/network_conf.c                            | 51 +++++++++++++++++-----
9119d9
 src/conf/network_conf.h                            | 11 +++++
9119d9
 src/libvirt_private.syms                           |  2 +
9119d9
 tests/networkxml2xmlin/host-bridge-no-flood.xml    |  6 +++
9119d9
 .../nat-network-explicit-flood.xml                 | 21 +++++++++
9119d9
 tests/networkxml2xmlout/host-bridge-no-flood.xml   |  6 +++
9119d9
 .../nat-network-explicit-flood.xml                 | 23 ++++++++++
9119d9
 tests/networkxml2xmltest.c                         |  2 +
9119d9
 10 files changed, 164 insertions(+), 17 deletions(-)
9119d9
 create mode 100644 tests/networkxml2xmlin/host-bridge-no-flood.xml
9119d9
 create mode 100644 tests/networkxml2xmlin/nat-network-explicit-flood.xml
9119d9
 create mode 100644 tests/networkxml2xmlout/host-bridge-no-flood.xml
9119d9
 create mode 100644 tests/networkxml2xmlout/nat-network-explicit-flood.xml
9119d9
9119d9
diff --git a/docs/formatnetwork.html.in b/docs/formatnetwork.html.in
9119d9
index dc438ae..0851aa1 100644
9119d9
--- a/docs/formatnetwork.html.in
9119d9
+++ b/docs/formatnetwork.html.in
9119d9
@@ -81,7 +81,7 @@
9119d9
 
9119d9
     
9119d9
         ...
9119d9
-        <bridge name="virbr0" stp="on" delay="5"/>
9119d9
+        <bridge name="virbr0" stp="on" delay="5" macTableManager="libvirt"/>
9119d9
         <domain name="example.com"/>
9119d9
         <forward mode="nat" dev="eth0"/>
9119d9
         ...
9119d9
@@ -92,18 +92,56 @@
9119d9
         defines the name of a bridge device which will be used to construct
9119d9
         the virtual network. The virtual machines will be connected to this
9119d9
         bridge device allowing them to talk to each other. The bridge device
9119d9
-        may also be connected to the LAN. It is recommended that bridge
9119d9
-        device names started with the prefix vir, but the name
9119d9
-        virbr0 is reserved for the "default" virtual
9119d9
-        network.  This element should always be provided when defining
9119d9
+        may also be connected to the LAN. When defining
9119d9
         a new network with a <forward> mode of
9119d9
+
9119d9
         "nat" or "route" (or an isolated network with
9119d9
-        no <forward> element).
9119d9
+        no <forward> element), libvirt will
9119d9
+        automatically generate a unique name for the bridge device if
9119d9
+        none is given, and this name will be permanently stored in the
9119d9
+        network configuration so that that the same name will be used
9119d9
+        every time the network is started. For these types of networks
9119d9
+        (nat, routed, and isolated), a bridge name beginning with the
9119d9
+        prefix "virbr" is recommended (and that is what is
9119d9
+        auto-generated), but not enforced.
9119d9
         Attribute stp specifies if Spanning Tree Protocol
9119d9
         is 'on' or 'off' (default is
9119d9
         'on'). Attribute delay sets the bridge's forward
9119d9
         delay value in seconds (default is 0).
9119d9
         Since 0.3.0
9119d9
+
9119d9
+        

9119d9
+          The macTableManager attribute of the bridge
9119d9
+          element is used to tell libvirt how the bridge's MAC address
9119d9
+          table (used to determine the correct egress port for packets
9119d9
+          based on destination MAC address) will be managed. In the
9119d9
+          default kernel setting, the kernel
9119d9
+          automatically adds and removes entries, typically using
9119d9
+          learning, flooding, and promiscuous mode on the bridge's
9119d9
+          ports in order to determine the proper egress port for
9119d9
+          packets.  When macTableManager is set
9119d9
+          to libvirt, libvirt disables kernel management
9119d9
+          of the MAC table (in the case of the Linux host bridge, this
9119d9
+          means enabling vlan_filtering on the bridge, and disabling
9119d9
+          learning and unicast_filter for all bridge ports), and
9119d9
+          explicitly adds/removes entries to the table according to
9119d9
+          the MAC addresses in the domain interface configurations.
9119d9
+          Allowing libvirt to manage the MAC table can improve
9119d9
+          performance - with a Linux host bridge, for example, turning
9119d9
+          off learning and unicast_flood on ports has its own
9119d9
+          performance advantage, and can also lead to an additional
9119d9
+          boost by permitting the kernel to automatically turn off
9119d9
+          promiscuous mode on some ports of the bridge (in particular,
9119d9
+          the port attaching the bridge to the physical
9119d9
+          network). However, it can also cause some networking setups
9119d9
+          to stop working (e.g. vlan tagging, multicast,
9119d9
+          guest-initiated changes to MAC address) and is not supported
9119d9
+          by older kernels.
9119d9
+          Since 1.2.11, requires kernel 3.17 or
9119d9
+          newer
9119d9
+        

9119d9
+
9119d9
+
9119d9
       
9119d9
       
domain
9119d9
       
9119d9
diff --git a/docs/schemas/network.rng b/docs/schemas/network.rng
9119d9
index 2783f86..c0ef554 100644
9119d9
--- a/docs/schemas/network.rng
9119d9
+++ b/docs/schemas/network.rng
9119d9
@@ -65,6 +65,15 @@
9119d9
               </attribute>
9119d9
             </optional>
9119d9
 
9119d9
+            <optional>
9119d9
+              <attribute name="macTableManager">
9119d9
+                <choice>
9119d9
+                  <value>kernel</value>
9119d9
+                  <value>libvirt</value>
9119d9
+                </choice>
9119d9
+              </attribute>
9119d9
+            </optional>
9119d9
+
9119d9
           </element>
9119d9
         </optional>
9119d9
 
9119d9
diff --git a/src/conf/network_conf.c b/src/conf/network_conf.c
9119d9
index b253fec..8015bf3 100644
9119d9
--- a/src/conf/network_conf.c
9119d9
+++ b/src/conf/network_conf.c
9119d9
@@ -55,6 +55,10 @@ VIR_ENUM_IMPL(virNetworkForward,
9119d9
               VIR_NETWORK_FORWARD_LAST,
9119d9
               "none", "nat", "route", "bridge", "private", "vepa", "passthrough", "hostdev")
9119d9
 
9119d9
+VIR_ENUM_IMPL(virNetworkBridgeMACTableManager,
9119d9
+              VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_LAST,
9119d9
+              "default", "kernel", "libvirt")
9119d9
+
9119d9
 VIR_ENUM_DECL(virNetworkForwardHostdevDevice)
9119d9
 VIR_ENUM_IMPL(virNetworkForwardHostdevDevice,
9119d9
               VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_LAST,
9119d9
@@ -2110,6 +2114,18 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt)
9119d9
     if (virXPathULong("string(./bridge[1]/@delay)", ctxt, &def->delay) < 0)
9119d9
         def->delay = 0;
9119d9
 
9119d9
+    tmp = virXPathString("string(./bridge[1]/@macTableManager)", ctxt);
9119d9
+    if (tmp) {
9119d9
+        if ((def->macTableManager
9119d9
+             = virNetworkBridgeMACTableManagerTypeFromString(tmp)) <= 0) {
9119d9
+            virReportError(VIR_ERR_XML_ERROR,
9119d9
+                           _("Invalid macTableManager setting '%s' "
9119d9
+                             "in network '%s'"), tmp, def->name);
9119d9
+            goto error;
9119d9
+        }
9119d9
+        VIR_FREE(tmp);
9119d9
+    }
9119d9
+
9119d9
     tmp = virXPathString("string(./mac[1]/@address)", ctxt);
9119d9
     if (tmp) {
9119d9
         if (virMacAddrParse(tmp, &def->mac) < 0) {
9119d9
@@ -2292,6 +2308,14 @@ virNetworkDefParseXML(xmlXPathContextPtr ctxt)
9119d9
                            def->name);
9119d9
             goto error;
9119d9
         }
9119d9
+        if (def->macTableManager) {
9119d9
+            virReportError(VIR_ERR_XML_ERROR,
9119d9
+                           _("bridge macTableManager setting not allowed "
9119d9
+                             "in %s mode (network '%s')"),
9119d9
+                           virNetworkForwardTypeToString(def->forward.type),
9119d9
+                           def->name);
9119d9
+            goto error;
9119d9
+        }
9119d9
         /* fall through to next case */
9119d9
     case VIR_NETWORK_FORWARD_BRIDGE:
9119d9
         if (def->delay || stp) {
9119d9
@@ -2791,22 +2815,27 @@ virNetworkDefFormatBuf(virBufferPtr buf,
9119d9
             virBufferAddLit(buf, "</forward>\n");
9119d9
     }
9119d9
 
9119d9
+
9119d9
     if (def->forward.type == VIR_NETWORK_FORWARD_NONE ||
9119d9
-         def->forward.type == VIR_NETWORK_FORWARD_NAT ||
9119d9
-         def->forward.type == VIR_NETWORK_FORWARD_ROUTE) {
9119d9
+        def->forward.type == VIR_NETWORK_FORWARD_NAT ||
9119d9
+        def->forward.type == VIR_NETWORK_FORWARD_ROUTE ||
9119d9
+        def->bridge || def->macTableManager) {
9119d9
 
9119d9
         virBufferAddLit(buf, "
9119d9
-        if (def->bridge)
9119d9
-            virBufferEscapeString(buf, " name='%s'", def->bridge);
9119d9
-        virBufferAsprintf(buf, " stp='%s' delay='%ld'/>\n",
9119d9
-                          def->stp ? "on" : "off",
9119d9
-                          def->delay);
9119d9
-    } else if (def->forward.type == VIR_NETWORK_FORWARD_BRIDGE &&
9119d9
-               def->bridge) {
9119d9
-        virBufferEscapeString(buf, "<bridge name='%s'/>\n", def->bridge);
9119d9
+        virBufferEscapeString(buf, " name='%s'", def->bridge);
9119d9
+        if (def->forward.type == VIR_NETWORK_FORWARD_NONE ||
9119d9
+            def->forward.type == VIR_NETWORK_FORWARD_NAT ||
9119d9
+            def->forward.type == VIR_NETWORK_FORWARD_ROUTE) {
9119d9
+            virBufferAsprintf(buf, " stp='%s' delay='%ld'",
9119d9
+                              def->stp ? "on" : "off", def->delay);
9119d9
+        }
9119d9
+        if (def->macTableManager) {
9119d9
+            virBufferAsprintf(buf, " macTableManager='%s'",
9119d9
+                             virNetworkBridgeMACTableManagerTypeToString(def->macTableManager));
9119d9
+        }
9119d9
+        virBufferAddLit(buf, "/>\n");
9119d9
     }
9119d9
 
9119d9
-
9119d9
     if (def->mac_specified) {
9119d9
         char macaddr[VIR_MAC_STRING_BUFLEN];
9119d9
         virMacAddrFormat(&def->mac, macaddr);
9119d9
diff --git a/src/conf/network_conf.h b/src/conf/network_conf.h
9119d9
index 660cd2d..8110028 100644
9119d9
--- a/src/conf/network_conf.h
9119d9
+++ b/src/conf/network_conf.h
9119d9
@@ -54,6 +54,16 @@ typedef enum {
9119d9
 } virNetworkForwardType;
9119d9
 
9119d9
 typedef enum {
9119d9
+   VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_DEFAULT = 0,
9119d9
+   VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_KERNEL,
9119d9
+   VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_LIBVIRT,
9119d9
+
9119d9
+   VIR_NETWORK_BRIDGE_MAC_TABLE_MANAGER_LAST,
9119d9
+} virNetworkBridgeMACTableManagerType;
9119d9
+
9119d9
+VIR_ENUM_DECL(virNetworkBridgeMACTableManager)
9119d9
+
9119d9
+typedef enum {
9119d9
     VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_NONE = 0,
9119d9
     VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_PCI,
9119d9
     VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_NETDEV,
9119d9
@@ -231,6 +241,7 @@ struct _virNetworkDef {
9119d9
     int   connections; /* # of guest interfaces connected to this network */
9119d9
 
9119d9
     char *bridge;       /* Name of bridge device */
9119d9
+    int  macTableManager; /* enum virNetworkBridgeMACTableManager */
9119d9
     char *domain;
9119d9
     unsigned long delay;   /* Bridge forward delay (ms) */
9119d9
     bool stp; /* Spanning tree protocol */
9119d9
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
9119d9
index ec795aa..e4da444 100644
9119d9
--- a/src/libvirt_private.syms
9119d9
+++ b/src/libvirt_private.syms
9119d9
@@ -525,6 +525,8 @@ virNetDevVPortTypeToString;
9119d9
 
9119d9
 # conf/network_conf.h
9119d9
 virNetworkAssignDef;
9119d9
+virNetworkBridgeMACTableManagerTypeFromString;
9119d9
+virNetworkBridgeMACTableManagerTypeToString;
9119d9
 virNetworkConfigChangeSetup;
9119d9
 virNetworkConfigFile;
9119d9
 virNetworkDefCopy;
9119d9
diff --git a/tests/networkxml2xmlin/host-bridge-no-flood.xml b/tests/networkxml2xmlin/host-bridge-no-flood.xml
9119d9
new file mode 100644
9119d9
index 0000000..562e697
9119d9
--- /dev/null
9119d9
+++ b/tests/networkxml2xmlin/host-bridge-no-flood.xml
9119d9
@@ -0,0 +1,6 @@
9119d9
+<network>
9119d9
+  <name>host-bridge-net</name>
9119d9
+  <uuid>81ff0d90-c91e-6742-64da-4a736edb9a8e</uuid>
9119d9
+  <forward mode="bridge"/>
9119d9
+  <bridge name="br0" macTableManager="libvirt"/>
9119d9
+</network>
9119d9
diff --git a/tests/networkxml2xmlin/nat-network-explicit-flood.xml b/tests/networkxml2xmlin/nat-network-explicit-flood.xml
9119d9
new file mode 100644
9119d9
index 0000000..9738b81
9119d9
--- /dev/null
9119d9
+++ b/tests/networkxml2xmlin/nat-network-explicit-flood.xml
9119d9
@@ -0,0 +1,21 @@
9119d9
+<network>
9119d9
+  <name>default</name>
9119d9
+  <uuid>81ff0d90-c91e-6742-64da-4a736edb9a9b</uuid>
9119d9
+  <bridge name="virbr0" macTableManager="kernel"/>
9119d9
+  <forward mode="nat" dev="eth1"/>
9119d9
+  <ip address="192.168.122.1" netmask="255.255.255.0">
9119d9
+    <dhcp>
9119d9
+      <range start="192.168.122.2" end="192.168.122.254"/>
9119d9
+      <host mac="00:16:3e:77:e2:ed" name="a.example.com" ip="192.168.122.10"/>
9119d9
+      <host mac="00:16:3e:3e:a9:1a" name="b.example.com" ip="192.168.122.11"/>
9119d9
+    </dhcp>
9119d9
+  </ip>
9119d9
+  <ip family="ipv4" address="192.168.123.1" netmask="255.255.255.0">
9119d9
+  </ip>
9119d9
+  <ip family="ipv6" address="2001:db8:ac10:fe01::1" prefix="64">
9119d9
+  </ip>
9119d9
+  <ip family="ipv6" address="2001:db8:ac10:fd01::1" prefix="64">
9119d9
+  </ip>
9119d9
+  <ip family="ipv4" address="10.24.10.1">
9119d9
+  </ip>
9119d9
+</network>
9119d9
diff --git a/tests/networkxml2xmlout/host-bridge-no-flood.xml b/tests/networkxml2xmlout/host-bridge-no-flood.xml
9119d9
new file mode 100644
9119d9
index 0000000..bdbbf3b
9119d9
--- /dev/null
9119d9
+++ b/tests/networkxml2xmlout/host-bridge-no-flood.xml
9119d9
@@ -0,0 +1,6 @@
9119d9
+<network>
9119d9
+  <name>host-bridge-net</name>
9119d9
+  <uuid>81ff0d90-c91e-6742-64da-4a736edb9a8e</uuid>
9119d9
+  <forward mode='bridge'/>
9119d9
+  <bridge name='br0' macTableManager='libvirt'/>
9119d9
+</network>
9119d9
diff --git a/tests/networkxml2xmlout/nat-network-explicit-flood.xml b/tests/networkxml2xmlout/nat-network-explicit-flood.xml
9119d9
new file mode 100644
9119d9
index 0000000..305c3b7
9119d9
--- /dev/null
9119d9
+++ b/tests/networkxml2xmlout/nat-network-explicit-flood.xml
9119d9
@@ -0,0 +1,23 @@
9119d9
+<network>
9119d9
+  <name>default</name>
9119d9
+  <uuid>81ff0d90-c91e-6742-64da-4a736edb9a9b</uuid>
9119d9
+  <forward dev='eth1' mode='nat'>
9119d9
+    <interface dev='eth1'/>
9119d9
+  </forward>
9119d9
+  <bridge name='virbr0' stp='on' delay='0' macTableManager='kernel'/>
9119d9
+  <ip address='192.168.122.1' netmask='255.255.255.0'>
9119d9
+    <dhcp>
9119d9
+      <range start='192.168.122.2' end='192.168.122.254'/>
9119d9
+      <host mac='00:16:3e:77:e2:ed' name='a.example.com' ip='192.168.122.10'/>
9119d9
+      <host mac='00:16:3e:3e:a9:1a' name='b.example.com' ip='192.168.122.11'/>
9119d9
+    </dhcp>
9119d9
+  </ip>
9119d9
+  <ip family='ipv4' address='192.168.123.1' netmask='255.255.255.0'>
9119d9
+  </ip>
9119d9
+  <ip family='ipv6' address='2001:db8:ac10:fe01::1' prefix='64'>
9119d9
+  </ip>
9119d9
+  <ip family='ipv6' address='2001:db8:ac10:fd01::1' prefix='64'>
9119d9
+  </ip>
9119d9
+  <ip family='ipv4' address='10.24.10.1'>
9119d9
+  </ip>
9119d9
+</network>
9119d9
diff --git a/tests/networkxml2xmltest.c b/tests/networkxml2xmltest.c
9119d9
index 65ac591..34a5211 100644
9119d9
--- a/tests/networkxml2xmltest.c
9119d9
+++ b/tests/networkxml2xmltest.c
9119d9
@@ -120,6 +120,8 @@ mymain(void)
9119d9
     DO_TEST("hostdev");
9119d9
     DO_TEST_FULL("hostdev-pf", VIR_NETWORK_XML_INACTIVE);
9119d9
     DO_TEST("passthrough-address-crash");
9119d9
+    DO_TEST("nat-network-explicit-flood");
9119d9
+    DO_TEST("host-bridge-no-flood");
9119d9
 
9119d9
     return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
9119d9
 }
9119d9
-- 
9119d9
2.2.0
9119d9