diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index 863f023888..c06e88c577 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -216,7 +216,7 @@ fi
if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then
if [ -z "$DPDK_VER" ]; then
- DPDK_VER="20.11.1"
+ DPDK_VER="20.11.4"
fi
install_dpdk $DPDK_VER
if [ "$CC" = "clang" ]; then
@@ -246,8 +246,8 @@ if [ "$ASAN" ]; then
export ASAN_OPTIONS='detect_leaks=1'
# -O2 generates few false-positive memory leak reports in test-ovsdb
# application, so lowering optimizations to -O1 here.
- CLFAGS_ASAN="-O1 -fno-omit-frame-pointer -fno-common -fsanitize=address"
- CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} ${CLFAGS_ASAN}"
+ CFLAGS_ASAN="-O1 -fno-omit-frame-pointer -fno-common -fsanitize=address"
+ CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} ${CFLAGS_ASAN}"
fi
save_OPTS="${OPTS} $*"
diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh
index c55125cf78..c0b7473eda 100755
--- a/.ci/linux-prepare.sh
+++ b/.ci/linux-prepare.sh
@@ -20,9 +20,13 @@ cd sparse
make -j4 HAVE_LLVM= HAVE_SQLITE= install
cd ..
+# Installing wheel separately because it may be needed to build some
+# of the packages during dependency backtracking and pip >= 22.0 will
+# abort backtracking on build failures:
+# https://github.com/pypa/pip/issues/10655
+pip3 install --disable-pip-version-check --user wheel
pip3 install --disable-pip-version-check --user \
- flake8 hacking sphinx pyOpenSSL wheel setuptools
-pip3 install --user --upgrade docutils
+ flake8 'hacking>=3.0' sphinx setuptools
pip3 install --user 'meson==0.47.1'
if [ "$M32" ]; then
diff --git a/.cirrus.yml b/.cirrus.yml
index 358f2ba256..a4d2a5bbcd 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -2,14 +2,14 @@ freebsd_build_task:
freebsd_instance:
matrix:
- image_family: freebsd-12-2-snap
- image_family: freebsd-11-4-snap
+ image_family: freebsd-12-3-snap
+ image_family: freebsd-13-0-snap
cpu: 4
- memory: 8G
+ memory: 4G
env:
DEPENDENCIES: automake libtool gmake gcc wget openssl python3
- PY_DEPS: sphinx|openssl
+ PY_DEPS: sphinx
matrix:
COMPILER: gcc
COMPILER: clang
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index e2350c6d9d..7434ad18ec 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -127,7 +127,7 @@ jobs:
- name: set up python
uses: actions/setup-python@v2
with:
- python-version: '3.x'
+ python-version: '3.9'
- name: create ci signature file for the dpdk cache key
if: matrix.dpdk != '' || matrix.dpdk_shared != ''
@@ -215,7 +215,7 @@ jobs:
- name: set up python
uses: actions/setup-python@v2
with:
- python-version: '3.x'
+ python-version: '3.9'
- name: install dependencies
run: brew install automake libtool
- name: prepare
diff --git a/.travis.yml b/.travis.yml
index 51d0511080..c7aeede06e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,7 +17,6 @@ addons:
- libjemalloc-dev
- libnuma-dev
- libpcap-dev
- - python3-openssl
- python3-pip
- python3-sphinx
- libelf-dev
diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index 68c9867b19..d62d575eba 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -205,10 +205,10 @@ Q: What DPDK version does each Open vSwitch release work with?
2.10.x 17.11.10
2.11.x 18.11.9
2.12.x 18.11.9
- 2.13.x 19.11.8
- 2.14.x 19.11.8
- 2.15.x 20.11.1
- 2.16.x 20.11.1
+ 2.13.x 19.11.10
+ 2.14.x 19.11.10
+ 2.15.x 20.11.4
+ 2.16.x 20.11.4
============ ========
Q: Are all the DPDK releases that OVS versions work with maintained?
diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst
index d8fa931fab..9ce5285c58 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -42,7 +42,7 @@ Build requirements
In addition to the requirements described in :doc:`general`, building Open
vSwitch with DPDK will require the following:
-- DPDK 20.11.1
+- DPDK 20.11.4
- A `DPDK supported NIC`_
@@ -73,9 +73,9 @@ Install DPDK
#. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``::
$ cd /usr/src/
- $ wget https://fast.dpdk.org/rel/dpdk-20.11.1.tar.xz
- $ tar xf dpdk-20.11.1.tar.xz
- $ export DPDK_DIR=/usr/src/dpdk-stable-20.11.1
+ $ wget https://fast.dpdk.org/rel/dpdk-20.11.4.tar.xz
+ $ tar xf dpdk-20.11.4.tar.xz
+ $ export DPDK_DIR=/usr/src/dpdk-stable-20.11.4
$ cd $DPDK_DIR
#. Configure and install DPDK using Meson
@@ -219,7 +219,7 @@ To verify hugepage configuration::
Mount the hugepages, if not already mounted by default::
- $ mount -t hugetlbfs none /dev/hugepages``
+ $ mount -t hugetlbfs none /dev/hugepages
.. note::
diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst
index c4300cd53e..a297aadac8 100644
--- a/Documentation/intro/install/general.rst
+++ b/Documentation/intro/install/general.rst
@@ -169,7 +169,7 @@ other than plain text, only if you have the following:
If you are going to extensively modify Open vSwitch, consider installing the
following to obtain better warnings:
-- "sparse" version 0.5.1 or later
+- "sparse" version 0.6.2 or later
(https://git.kernel.org/pub/scm/devel/sparse/sparse.git/).
- GNU make.
diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst
index 95fa7af128..c1a35eb13a 100644
--- a/Documentation/topics/dpdk/pmd.rst
+++ b/Documentation/topics/dpdk/pmd.rst
@@ -31,17 +31,19 @@ input ports for packets, classifying packets once received, and executing
actions on the packets once they are classified.
PMD threads utilize Receive (Rx) and Transmit (Tx) queues, commonly known as
-*rxq*\s and *txq*\s. While Tx queue configuration happens automatically, Rx
-queues can be configured by the user. This can happen in one of two ways:
+*rxq*\s and *txq*\s to receive and send packets from/to an interface.
-- For physical interfaces, configuration is done using the
- :program:`ovs-appctl` utility.
+- For physical interfaces, the number of Tx Queues is automatically configured
+ based on the number of PMD thread cores. The number of Rx queues can be
+ configured with::
-- For virtual interfaces, configuration is done using the :program:`ovs-appctl`
- utility, but this configuration must be reflected in the guest configuration
- (e.g. QEMU command line arguments).
+ $ ovs-vsctl set Interface <interface_name> options:n_rxq=N
-The :program:`ovs-appctl` utility also provides a number of commands for
+- For virtual interfaces, the number of Tx and Rx queues are configured by
+ libvirt/QEMU and enabled/disabled in the guest. Refer to :doc:'vhost-user'
+ for more information.
+
+The :program:`ovs-appctl` utility provides a number of commands for
querying PMD threads and their respective queues. This, and all of the above,
is discussed here.
diff --git a/NEWS b/NEWS
index 559a51ba3f..c3c5c16ae6 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,39 @@
+v2.16.5 - xx xxx xxxx
+---------------------
+
+v2.16.4 - 15 Jun 2022
+---------------------
+ - Bug fixes
+
+v2.16.3 - 08 Apr 2022
+---------------------
+ - Bug fixes
+ - libopenvswitch API change:
+ * To fix the Undefined Behavior issue causing the compiler to incorrectly
+ optimize important parts of code, container iteration macros (e.g.,
+ LIST_FOR_EACH) have been re-implemented in a UB-safe way.
+ * Backwards compatibility has mostly been preserved, however the
+ user-provided pointer is now set to NULL after the loop (unless it
+ exited via "break;")
+ * Users of libopenvswitch will need to double-check the use of such loop
+ macros before compiling with a new version.
+ * Since the change is limited to the definitions within the headers, the
+ ABI is not affected.
+ - DPDK:
+ * OVS validated with DPDK 20.11.4. It is recommended to use this version
+ until further releases.
+ - Python:
+ * For SSL support, the use of the pyOpenSSL library has been replaced
+ with the native 'ssl' module.
+
+v2.16.2 - 17 Dec 2021
+---------------------
+ - Bug fixes
+
+v2.16.1 - 21 Oct 2021
+---------------------
+ - Bug fixes
+
v2.16.0 - 16 Aug 2021
---------------------
- Removed support for 1024-bit Diffie-Hellman key exchange, which is now
diff --git a/acinclude.m4 b/acinclude.m4
index dba365ea1a..1b957c3dcd 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -77,7 +77,7 @@ dnl Checks if compiler and binutils supports AVX512.
AC_DEFUN([OVS_CHECK_AVX512], [
OVS_CHECK_BINUTILS_AVX512
OVS_CHECK_CC_OPTION(
- [-mavx512f], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no])
+ [-mavx512f -mavx512vpopcntdq], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no])
AM_CONDITIONAL([HAVE_AVX512F], [test $ovs_have_cc_mavx512f = yes])
if test "$ovs_have_cc_mavx512f" = yes; then
AC_DEFINE([HAVE_AVX512F], [1],
@@ -305,6 +305,13 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [
])],
[AC_DEFINE([HAVE_TCA_SKBEDIT_FLAGS], [1],
[Define to 1 if TCA_SKBEDIT_FLAGS is available.])])
+
+ AC_COMPILE_IFELSE([
+ AC_LANG_PROGRAM([#include <linux/gen_stats.h>], [
+ int x = TCA_STATS_PKT64;
+ ])],
+ [AC_DEFINE([HAVE_TCA_STATS_PKT64], [1],
+ [Define to 1 if TCA_STATS_PKT64 is available.])])
])
dnl OVS_CHECK_LINUX_SCTP_CT
@@ -1417,7 +1424,7 @@ AC_DEFUN([OVS_ENABLE_SPARSE],
: ${SPARSE=sparse}
AC_SUBST([SPARSE])
AC_CONFIG_COMMANDS_PRE(
- [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')'])
+ [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse -I $(top_srcdir)/include $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')'])
AC_ARG_ENABLE(
[sparse],
diff --git a/configure.ac b/configure.ac
index 16b32be965..406df116ee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
# limitations under the License.
AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.16.0, bugs@openvswitch.org)
+AC_INIT(openvswitch, 2.16.5, bugs@openvswitch.org)
AC_CONFIG_SRCDIR([datapath/datapath.c])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([build-aux])
diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index e130c2f966..218e7db814 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -1112,9 +1112,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx,
* should split the function and refactor. */
if (!bufferData) {
EthHdr *ethHdr = (EthHdr *)bufferStart;
- /* If the frame is not VLAN make it a no op */
if (ethHdr->Type != ETH_TYPE_802_1PQ_NBO) {
- return NDIS_STATUS_SUCCESS;
+ OVS_LOG_ERROR("Invalid ethHdr type %u, nbl %p", ethHdr->Type, ovsFwdCtx->curNbl);
+ return NDIS_STATUS_INVALID_PACKET;
}
}
RtlMoveMemory(bufferStart + shiftLength, bufferStart, shiftOffset);
@@ -1137,6 +1137,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx,
static __inline NDIS_STATUS
OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
{
+ NDIS_STATUS status;
+ OVS_PACKET_HDR_INFO* layers = &ovsFwdCtx->layers;
+
/*
* Declare a dummy vlanTag structure since we need to compute the size
* of shiftLength. The NDIS one is a unionized structure.
@@ -1145,7 +1148,15 @@ OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
UINT32 shiftLength = sizeof(vlanTag.TagHeader);
UINT32 shiftOffset = sizeof(DL_EUI48) + sizeof(DL_EUI48);
- return OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, NULL);
+ status = OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength,
+ NULL);
+
+ if (status == NDIS_STATUS_SUCCESS) {
+ layers->l3Offset -= (UINT16) shiftLength;
+ layers->l4Offset -= (UINT16) shiftLength;
+ }
+
+ return status;
}
@@ -1516,6 +1527,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
TcpIpChecksumNetBufferListInfo);
+
/*
* Adjust the IP header inline as dictated by the action, and also update
* the IP and the TCP checksum for the data modified.
@@ -1524,6 +1536,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
* ChecksumUpdate32(). Ignoring this for now, since for the most common
* case, we only update the TTL.
*/
+ /*Only tx direction the checksum value will be reset to be PseudoChecksum*/
if (isSource) {
addrField = &ipHdr->saddr;
@@ -1540,7 +1553,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded ||
(BOOLEAN)csumInfo.Receive.UdpChecksumFailed);
}
- if (l4Offload) {
+ if (isTx && l4Offload) {
*checkField = IPPseudoChecksum(&newAddr, &ipHdr->daddr,
tcpHdr ? IPPROTO_TCP : IPPROTO_UDP,
ntohs(ipHdr->tot_len) - ipHdr->ihl * 4);
@@ -1561,7 +1574,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
(BOOLEAN)csumInfo.Receive.UdpChecksumFailed);
}
- if (l4Offload) {
+ if (isTx && l4Offload) {
*checkField = IPPseudoChecksum(&ipHdr->saddr, &newAddr,
tcpHdr ? IPPROTO_TCP : IPPROTO_UDP,
ntohs(ipHdr->tot_len) - ipHdr->ihl * 4);
@@ -1570,7 +1583,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
if (*addrField != newAddr) {
UINT32 oldAddr = *addrField;
- if (checkField && *checkField != 0 && !l4Offload) {
+ if ((checkField && *checkField != 0) && (!l4Offload || !isTx)) {
/* Recompute total checksum. */
*checkField = ChecksumUpdate32(*checkField, oldAddr,
newAddr);
@@ -1579,11 +1592,12 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr,
newAddr);
}
+
*addrField = newAddr;
}
if (portField && *portField != newPort) {
- if (checkField && !l4Offload) {
+ if ((checkField) && (!l4Offload || !isTx)) {
/* Recompute total checksum. */
*checkField = ChecksumUpdate16(*checkField, *portField,
newPort);
@@ -1698,6 +1712,15 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
ipHdr->ttl = ipAttr->ipv4_ttl;
key->ipKey.nwTtl = ipAttr->ipv4_ttl;
}
+ if (ipHdr->dscp != (ipAttr->ipv4_tos & 0xfc)) {
+ /* ECN + DSCP */
+ UINT8 newTos = (ipHdr->tos & 0x3) | (ipAttr->ipv4_tos & 0xfc);
+ if (ipHdr->check != 0) {
+ ipHdr->check = ChecksumUpdate16(ipHdr->check, ipHdr->tos, newTos);
+ }
+ ipHdr->tos = newTos;
+ key->ipKey.nwTos = newTos;
+ }
return NDIS_STATUS_SUCCESS;
}
@@ -1792,9 +1815,11 @@ OvsExecuteRecirc(OvsForwardingContext *ovsFwdCtx,
}
if (newNbl) {
- deferredAction = OvsAddDeferredActions(newNbl, key, NULL);
+ deferredAction = OvsAddDeferredActions(newNbl, key, &(ovsFwdCtx->layers),
+ NULL);
} else {
- deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key, NULL);
+ deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key,
+ &(ovsFwdCtx->layers), NULL);
}
if (deferredAction) {
@@ -1964,7 +1989,7 @@ OvsExecuteSampleAction(OvsForwardingContext *ovsFwdCtx,
return STATUS_SUCCESS;
}
- if (!OvsAddDeferredActions(newNbl, key, a)) {
+ if (!OvsAddDeferredActions(newNbl, key, &(ovsFwdCtx->layers), a)) {
OVS_LOG_INFO(
"Deferred actions limit reached, dropping sample action.");
OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
@@ -2100,6 +2125,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
*/
status = OvsPopVlanInPktBuf(&ovsFwdCtx);
if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_ERROR("OVS-pop vlan action failed status = %lu", status);
dropReason = L"OVS-pop vlan action failed";
goto dropit;
}
@@ -2349,7 +2375,7 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
if (status == STATUS_SUCCESS) {
status = OvsProcessDeferredActions(switchContext, completionList,
- portNo, sendFlags, layers);
+ portNo, sendFlags);
}
return status;
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index 2610d626a0..7f1d2fb412 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -493,15 +493,32 @@ static __inline NDIS_STATUS
OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
OvsFlowKey *key)
{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ OvsFlowKey newFlowKey = { 0 };
+
switch (ntohs(key->l2.dlType)) {
case ETH_TYPE_IPV4:
if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) {
- return OvsProcessIpv4Fragment(fwdCtx->switchContext,
+ status = OvsProcessIpv4Fragment(fwdCtx->switchContext,
&fwdCtx->curNbl,
fwdCtx->completionList,
fwdCtx->fwdDetail->SourcePortId,
&fwdCtx->layers,
key->tunKey.tunnelId);
+ if (status == NDIS_STATUS_SUCCESS) {
+ /* After the Ipv4 Fragment is reassembled, update flow key as
+ L3 and L4 headers are not correct */
+ status =
+ OvsExtractFlow(fwdCtx->curNbl, fwdCtx->srcVportNo,
+ &newFlowKey, &fwdCtx->layers,
+ fwdCtx->tunKey.dst != 0 ? &fwdCtx->tunKey : NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_ERROR("Extract flow failed Nbl %p", fwdCtx->curNbl);
+ return status;
+ }
+ *key = newFlowKey;
+ }
+ return status;
}
if (key->ipKey.nwProto == IPPROTO_TCP
|| key->ipKey.nwProto == IPPROTO_UDP
@@ -609,6 +626,31 @@ OvsReverseIcmpType(UINT8 type)
}
}
+static __inline void
+OvsPickupCtTupleAsLookupKey(POVS_CT_KEY ctKey, UINT16 zone, OvsFlowKey *flowKey)
+{
+ UINT32 ipAddrSrc = 0, ipAddrDst = 0;
+
+ if (!flowKey || !ctKey) return;
+
+ if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
+ ipAddrSrc = flowKey->ct.tuple_ipv4.ipv4_src;
+ ipAddrDst = flowKey->ct.tuple_ipv4.ipv4_dst;
+
+ if ((ipAddrSrc > 0 && ipAddrDst > 0) &&
+ (zone == flowKey->ct.zone)) {
+ /* if the ct tuple_ipv4 in flowKey is not null and ct.zone is same with
+ * zone parameter pickup the tuple_ipv4 value as the lookup key
+ */
+ ctKey->src.addr.ipv4 = flowKey->ct.tuple_ipv4.ipv4_src;
+ ctKey->dst.addr.ipv4 = flowKey->ct.tuple_ipv4.ipv4_dst;
+ ctKey->nw_proto = flowKey->ct.tuple_ipv4.ipv4_proto;
+ ctKey->src.port = flowKey->ct.tuple_ipv4.src_port;
+ ctKey->dst.port = flowKey->ct.tuple_ipv4.dst_port;
+ }
+ }
+}
+
static __inline NDIS_STATUS
OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
UINT16 zone,
@@ -629,6 +671,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
ctx->key.src.port = flowKey->ipKey.l4.tpSrc;
ctx->key.dst.port = flowKey->ipKey.l4.tpDst;
+
if (flowKey->ipKey.nwProto == IPPROTO_ICMP) {
ICMPHdr icmpStorage;
const ICMPHdr *icmp;
@@ -683,6 +726,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
/* Translate address first for reverse NAT */
ctx->key = natEntry->ctEntry->key;
OvsCtKeyReverse(&ctx->key);
+ } else {
+ if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
+ OvsPickupCtTupleAsLookupKey(&(ctx->key), zone, flowKey);
+ }
}
ctx->hash = OvsCtHashKey(&ctx->key);
diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c
index cc0840704a..2a206305ec 100644
--- a/datapath-windows/ovsext/PacketIO.c
+++ b/datapath-windows/ovsext/PacketIO.c
@@ -45,7 +45,9 @@ extern NDIS_STRING ovsExtFriendlyNameUC;
static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList);
static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext,
- PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags);
+ PNET_BUFFER_LIST netBufferLists,
+ ULONG sendCompleteFlags,
+ BOOLEAN isSendComplete);
VOID
OvsInitCompletionList(OvsCompletionList *completionList,
@@ -155,7 +157,7 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext,
OvsReportNBLIngressError(switchContext, netBufferLists, &filterReason,
NDIS_STATUS_PAUSED);
OvsCompleteNBLIngress(switchContext, netBufferLists,
- sendCompleteFlags);
+ sendCompleteFlags, FALSE);
return;
}
@@ -175,6 +177,79 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext,
NDIS_DEFAULT_PORT_NUMBER, sendFlags);
}
+static __inline BOOLEAN
+OvsCheckNBLSingleSource(PNET_BUFFER_LIST netBufferLists)
+{
+ UINT32 sourcePortId = 0;
+ BOOLEAN singleSource = TRUE;
+ PNET_BUFFER_LIST curNbl = netBufferLists;
+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
+
+ while (curNbl != NULL) {
+ info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
+ if (info == NULL) {
+ /* We are not able to determine the source port ID */
+ singleSource = FALSE;
+ OVS_LOG_INFO("nbl %p has no source port", curNbl);
+ break;
+ }
+ if (curNbl == netBufferLists) {
+ sourcePortId = info->SourcePortId;
+ } else if (info->SourcePortId != sourcePortId) {
+ singleSource = FALSE;
+ OVS_LOG_INFO("Source port in nbl %p is %u, not from %u",
+ curNbl, info->SourcePortId, sourcePortId);
+ break;
+ }
+ curNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl);
+ }
+
+ return singleSource;
+}
+
+/*
+ * SendNetBufferListsCompleteHandler releases the NetBufferLists with flag
+ * NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE if all the NBLs have same
+ * source port, for cloned NBLs, source port might be changed, although the
+ * cloned NBLs have same source port, there parent NBLs may have different
+ * source ports, so we should have a check before passing the flag to
+ * NdisFSendNetBufferListsComplete.
+ */
+static __inline VOID
+OvsCompleteUpperLayerNBL(NDIS_HANDLE ndisHandle,
+ PNET_BUFFER_LIST netBufferLists,
+ ULONG sendCompleteFlags,
+ BOOLEAN isSendComplete)
+{
+ BOOLEAN singleSource = TRUE;
+ PNET_BUFFER_LIST curNbl, nextNbl;
+
+ /* To check whether the NBLs are from the same source port */
+ if (isSendComplete &&
+ (sendCompleteFlags & NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE)) {
+ singleSource = OvsCheckNBLSingleSource(netBufferLists);
+ }
+
+ if (singleSource) {
+ NdisFSendNetBufferListsComplete(ndisHandle,
+ netBufferLists,
+ sendCompleteFlags);
+ } else {
+ /*
+ * Not from a single source port, releasing the NBls without flag
+ * NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE doesn't help, so
+ * let's release them one by one.
+ */
+ for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) {
+ nextNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl);
+ NET_BUFFER_LIST_NEXT_NBL(curNbl) = NULL;
+ NdisFSendNetBufferListsComplete(ndisHandle,
+ curNbl,
+ sendCompleteFlags);
+ }
+ }
+}
+
static __inline VOID
OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext,
PNET_BUFFER_LIST nblList,
@@ -184,8 +259,8 @@ OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext,
{
ASSERT(error);
OvsReportNBLIngressError(switchContext, nblList, filterReason, error);
- NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, nblList,
- sendCompleteFlags);
+ OvsCompleteUpperLayerNBL(switchContext->NdisFilterHandle, nblList,
+ sendCompleteFlags, FALSE);
}
static VOID
@@ -427,7 +502,8 @@ OvsExtSendNBL(NDIS_HANDLE filterModuleContext,
static VOID
OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext,
PNET_BUFFER_LIST netBufferLists,
- ULONG sendCompleteFlags)
+ ULONG sendCompleteFlags,
+ BOOLEAN isSendComplete)
{
PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL;
OvsCompletionList newList;
@@ -449,8 +525,10 @@ OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext,
/* Complete the NBL's that were sent by the upper layer. */
if (newList.dropNbl != NULL) {
- NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl,
- sendCompleteFlags);
+ OvsCompleteUpperLayerNBL(switchContext->NdisFilterHandle,
+ newList.dropNbl,
+ sendCompleteFlags,
+ isSendComplete);
}
}
@@ -466,7 +544,7 @@ OvsExtSendNBLComplete(NDIS_HANDLE filterModuleContext,
ULONG sendCompleteFlags)
{
OvsCompleteNBLIngress((POVS_SWITCH_CONTEXT)filterModuleContext,
- netBufferLists, sendCompleteFlags);
+ netBufferLists, sendCompleteFlags, TRUE);
}
@@ -476,7 +554,8 @@ OvsFinalizeCompletionList(OvsCompletionList *completionList)
if (completionList->dropNbl != NULL) {
OvsCompleteNBLIngress(completionList->switchContext,
completionList->dropNbl,
- completionList->sendCompleteFlags);
+ completionList->sendCompleteFlags,
+ FALSE);
completionList->dropNbl = NULL;
completionList->dropNblNext = &completionList->dropNbl;
diff --git a/datapath-windows/ovsext/Recirc.c b/datapath-windows/ovsext/Recirc.c
index 2febf060dd..7a688c8742 100644
--- a/datapath-windows/ovsext/Recirc.c
+++ b/datapath-windows/ovsext/Recirc.c
@@ -277,16 +277,23 @@ OvsDeferredActionsQueuePush(POVS_DEFERRED_ACTION_QUEUE queue)
POVS_DEFERRED_ACTION
OvsAddDeferredActions(PNET_BUFFER_LIST nbl,
OvsFlowKey *key,
+ POVS_PACKET_HDR_INFO layers,
const PNL_ATTR actions)
{
POVS_DEFERRED_ACTION_QUEUE queue = OvsDeferredActionsQueueGet();
POVS_DEFERRED_ACTION deferredAction = NULL;
+ OVS_PACKET_HDR_INFO layersInit = { 0 };
deferredAction = OvsDeferredActionsQueuePush(queue);
if (deferredAction) {
deferredAction->nbl = nbl;
deferredAction->actions = actions;
deferredAction->key = *key;
+ if (layers) {
+ deferredAction->layers = *layers;
+ } else {
+ deferredAction->layers = layersInit;
+ }
}
return deferredAction;
@@ -303,15 +310,17 @@ NDIS_STATUS
OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
OvsCompletionList *completionList,
UINT32 portNo,
- ULONG sendFlags,
- OVS_PACKET_HDR_INFO *layers)
+ ULONG sendFlags)
{
NDIS_STATUS status = NDIS_STATUS_SUCCESS;
POVS_DEFERRED_ACTION_QUEUE queue = OvsDeferredActionsQueueGet();
POVS_DEFERRED_ACTION deferredAction = NULL;
+ POVS_PACKET_HDR_INFO layersDeferred = NULL;
/* Process all deferred actions. */
while ((deferredAction = OvsDeferredActionsQueuePop(queue)) != NULL) {
+ layersDeferred = &(deferredAction->layers);
+
if (deferredAction->actions) {
status = OvsDoExecuteActions(switchContext,
completionList,
@@ -319,7 +328,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
portNo,
sendFlags,
&deferredAction->key, NULL,
- layers, deferredAction->actions,
+ layersDeferred, deferredAction->actions,
NlAttrGetSize(deferredAction->actions));
} else {
status = OvsDoRecirc(switchContext,
@@ -327,7 +336,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
deferredAction->nbl,
&deferredAction->key,
portNo,
- layers);
+ layersDeferred);
}
}
diff --git a/datapath-windows/ovsext/Recirc.h b/datapath-windows/ovsext/Recirc.h
index 2b314ce274..b2d02a65c2 100644
--- a/datapath-windows/ovsext/Recirc.h
+++ b/datapath-windows/ovsext/Recirc.h
@@ -18,6 +18,7 @@
#define __RECIRC_H_ 1
#include "Actions.h"
+#include "NetProto.h"
#define DEFERRED_ACTION_QUEUE_SIZE 10
#define DEFERRED_ACTION_EXEC_LEVEL 4
@@ -26,6 +27,7 @@ typedef struct _OVS_DEFERRED_ACTION {
PNET_BUFFER_LIST nbl;
PNL_ATTR actions;
OvsFlowKey key;
+ OVS_PACKET_HDR_INFO layers;
} OVS_DEFERRED_ACTION, *POVS_DEFERRED_ACTION;
/*
@@ -39,8 +41,7 @@ NDIS_STATUS
OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
OvsCompletionList *completionList,
UINT32 portNo,
- ULONG sendFlags,
- OVS_PACKET_HDR_INFO *layers);
+ ULONG sendFlags);
/*
* --------------------------------------------------------------------------
@@ -52,6 +53,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
POVS_DEFERRED_ACTION
OvsAddDeferredActions(PNET_BUFFER_LIST packet,
OvsFlowKey *key,
+ POVS_PACKET_HDR_INFO layers,
const PNL_ATTR actions);
/*
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
index 4cce92f66c..bc18c56b81 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
@@ -108,7 +108,14 @@ static inline bool rpl_nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
static inline unsigned int
rpl_nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
- return nf_conntrack_in(state->net, state->pf, state->hook, skb);
+ int err;
+
+ /* Repeat if requested, see nf_iterate(). */
+ do {
+ err = nf_conntrack_in(state->net, state->pf, state->hook, skb);
+ } while (err == NF_REPEAT);
+
+ return err;
}
#define nf_conntrack_in rpl_nf_conntrack_in
#endif /* HAVE_NF_CONNTRACK_IN_TAKES_NF_HOOK_STATE */
diff --git a/debian/changelog b/debian/changelog
index 239d210b96..522e10b0e5 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,33 @@
+openvswitch (2.16.5-1) unstable; urgency=low
+ [ Open vSwitch team ]
+ * New upstream version
+
+ -- Open vSwitch team <dev@openvswitch.org> Wed, 15 Jun 2022 12:03:55 +0200
+
+openvswitch (2.16.4-1) unstable; urgency=low
+ [ Open vSwitch team ]
+ * New upstream version
+
+ -- Open vSwitch team <dev@openvswitch.org> Wed, 15 Jun 2022 12:03:55 +0200
+
+openvswitch (2.16.3-1) unstable; urgency=low
+ [ Open vSwitch team ]
+ * New upstream version
+
+ -- Open vSwitch team <dev@openvswitch.org> Fri, 08 Apr 2022 14:57:43 +0200
+
+openvswitch (2.16.2-1) unstable; urgency=low
+ [ Open vSwitch team ]
+ * New upstream version
+
+ -- Open vSwitch team <dev@openvswitch.org> Fri, 17 Dec 2021 22:14:03 +0100
+
+openvswitch (2.16.1-1) unstable; urgency=low
+ [ Open vSwitch team ]
+ * New upstream version
+
+ -- Open vSwitch team <dev@openvswitch.org> Thu, 21 Oct 2021 23:58:12 +0200
+
openvswitch (2.16.0-1) unstable; urgency=low
* New upstream version
diff --git a/dpdk/lib/librte_vhost/vhost_user.c b/dpdk/lib/librte_vhost/vhost_user.c
index 45c8ac09da..70d206dcf8 100644
--- a/dpdk/lib/librte_vhost/vhost_user.c
+++ b/dpdk/lib/librte_vhost/vhost_user.c
@@ -1416,6 +1416,9 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev,
int fd, i, j;
void *addr;
+ if (validate_msg_fds(msg, 0) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
if (msg->size != sizeof(msg->payload.inflight)) {
VHOST_LOG_CONFIG(ERR,
"invalid get_inflight_fd message size is %d\n",
@@ -1509,6 +1512,9 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg,
void *addr;
int fd, i;
+ if (validate_msg_fds(msg, 1) != 0)
+ return RTE_VHOST_MSG_RESULT_ERR;
+
fd = msg->fds[0];
if (msg->size != sizeof(msg->payload.inflight) || fd < 0) {
VHOST_LOG_CONFIG(ERR,
@@ -2652,6 +2658,9 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev,
case VHOST_USER_SET_VRING_ADDR:
vring_idx = msg->payload.addr.index;
break;
+ case VHOST_USER_SET_INFLIGHT_FD:
+ vring_idx = msg->payload.inflight.num_queues - 1;
+ break;
default:
return 0;
}
diff --git a/include/linux/automake.mk b/include/linux/automake.mk
index 8f063f482e..f857c7e088 100644
--- a/include/linux/automake.mk
+++ b/include/linux/automake.mk
@@ -2,6 +2,7 @@ noinst_HEADERS += \
include/linux/netlink.h \
include/linux/netfilter/nf_conntrack_sctp.h \
include/linux/pkt_cls.h \
+ include/linux/gen_stats.h \
include/linux/tc_act/tc_mpls.h \
include/linux/tc_act/tc_pedit.h \
include/linux/tc_act/tc_skbedit.h \
diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h
new file mode 100644
index 0000000000..6fae6f727c
--- /dev/null
+++ b/include/linux/gen_stats.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_GEN_STATS_WRAPPER_H
+#define __LINUX_GEN_STATS_WRAPPER_H 1
+
+#if defined(__KERNEL__) || defined(HAVE_TCA_STATS_PKT64)
+#include_next <linux/gen_stats.h>
+#else
+#include <linux/types.h>
+
+enum {
+ TCA_STATS_UNSPEC,
+ TCA_STATS_BASIC,
+ TCA_STATS_RATE_EST,
+ TCA_STATS_QUEUE,
+ TCA_STATS_APP,
+ TCA_STATS_RATE_EST64,
+ TCA_STATS_PAD,
+ TCA_STATS_BASIC_HW,
+ TCA_STATS_PKT64,
+ __TCA_STATS_MAX,
+};
+#define TCA_STATS_MAX (__TCA_STATS_MAX - 1)
+
+/**
+ * struct gnet_stats_basic - byte/packet throughput statistics
+ * @bytes: number of seen bytes
+ * @packets: number of seen packets
+ */
+struct gnet_stats_basic {
+ __u64 bytes;
+ __u32 packets;
+};
+
+/**
+ * struct gnet_stats_rate_est - rate estimator
+ * @bps: current byte rate
+ * @pps: current packet rate
+ */
+struct gnet_stats_rate_est {
+ __u32 bps;
+ __u32 pps;
+};
+
+/**
+ * struct gnet_stats_rate_est64 - rate estimator
+ * @bps: current byte rate
+ * @pps: current packet rate
+ */
+struct gnet_stats_rate_est64 {
+ __u64 bps;
+ __u64 pps;
+};
+
+/**
+ * struct gnet_stats_queue - queuing statistics
+ * @qlen: queue length
+ * @backlog: backlog size of queue
+ * @drops: number of dropped packets
+ * @requeues: number of requeues
+ * @overlimits: number of enqueues over the limit
+ */
+struct gnet_stats_queue {
+ __u32 qlen;
+ __u32 backlog;
+ __u32 drops;
+ __u32 requeues;
+ __u32 overlimits;
+};
+
+/**
+ * struct gnet_estimator - rate estimator configuration
+ * @interval: sampling period
+ * @ewma_log: the log of measurement window weight
+ */
+struct gnet_estimator {
+ signed char interval;
+ unsigned char ewma_log;
+};
+
+#endif /* __KERNEL__ || !HAVE_TCA_STATS_PKT64 */
+#endif /* __LINUX_GEN_STATS_WRAPPER_H */
diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
index 3054015d93..df10cf579e 100644
--- a/include/openvswitch/flow.h
+++ b/include/openvswitch/flow.h
@@ -141,15 +141,14 @@ struct flow {
uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */
uint8_t nw_ttl; /* IP TTL/Hop Limit. */
uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */
+ /* L4 (64-bit aligned) */
struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */
struct eth_addr arp_sha; /* ARP/ND source hardware address. */
struct eth_addr arp_tha; /* ARP/ND target hardware address. */
- ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type.
- * With L3 to avoid matching L4. */
+ ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. */
ovs_be16 pad2; /* Pad to 64 bits. */
struct ovs_key_nsh nsh; /* Network Service Header keys */
- /* L4 (64-bit aligned) */
ovs_be16 tp_src; /* TCP/UDP/SCTP source port/ICMP type. */
ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */
ovs_be16 ct_tp_src; /* CT original tuple source port/ICMP type. */
@@ -179,7 +178,7 @@ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
enum {
FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_dst),
FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, nw_src),
- FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src),
+ FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, nd_target),
};
BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % sizeof(uint64_t) == 0);
BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % sizeof(uint64_t) == 0);
diff --git a/include/openvswitch/hmap.h b/include/openvswitch/hmap.h
index 4e001cc692..68c284cf14 100644
--- a/include/openvswitch/hmap.h
+++ b/include/openvswitch/hmap.h
@@ -134,17 +134,17 @@ struct hmap_node *hmap_random_node(const struct hmap *);
* without using 'break', NODE will be NULL. This is true for all of the
* HMAP_FOR_EACH_*() macros.
*/
-#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \
- for (INIT_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
- || ((NODE = NULL), false); \
- ASSIGN_CONTAINER(NODE, hmap_next_with_hash(&(NODE)->MEMBER), \
- MEMBER))
-#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \
- for (INIT_CONTAINER(NODE, hmap_first_in_bucket(HMAP, HASH), MEMBER); \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
- || ((NODE = NULL), false); \
- ASSIGN_CONTAINER(NODE, hmap_next_in_bucket(&(NODE)->MEMBER), MEMBER))
+#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \
+ for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_with_hash(HMAP, HASH), \
+ struct hmap_node); \
+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \
+ UPDATE_MULTIVAR(NODE, hmap_next_with_hash(ITER_VAR(NODE))))
+
+#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \
+ for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_in_bucket(HMAP, HASH), \
+ struct hmap_node); \
+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \
+ UPDATE_MULTIVAR(NODE, hmap_next_in_bucket(ITER_VAR(NODE))))
static inline struct hmap_node *hmap_first_with_hash(const struct hmap *,
size_t hash);
@@ -170,54 +170,62 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *);
/* Iterates through every node in HMAP. */
#define HMAP_FOR_EACH(NODE, MEMBER, HMAP) \
HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, (void) 0)
-#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \
- for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
- || ((NODE = NULL), false); \
- ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER))
+#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \
+ for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_first(HMAP), struct hmap_node, \
+ __VA_ARGS__); \
+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \
+ UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE))))
/* Safe when NODE may be freed (not needed when NODE may be removed from the
* hash map but its members remain accessible and intact). */
#define HMAP_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HMAP) \
- HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, (void) 0)
-#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \
- for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \
- ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
- || ((NODE = NULL), false) \
- ? INIT_CONTAINER(NEXT, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), 1 \
- : 0); \
- (NODE) = (NEXT))
+ HMAP_FOR_EACH_SAFE_INIT (NODE, NEXT, MEMBER, HMAP, (void) NEXT)
+
+#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \
+ for (INIT_MULTIVAR_SAFE_LONG_EXP(NODE, NEXT, MEMBER, hmap_first(HMAP), \
+ struct hmap_node, __VA_ARGS__); \
+ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \
+ ITER_VAR(NODE) != NULL, \
+ ITER_VAR(NEXT) = hmap_next(HMAP, ITER_VAR(NODE)), \
+ ITER_VAR(NEXT) != NULL); \
+ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT))
/* Continues an iteration from just after NODE. */
#define HMAP_FOR_EACH_CONTINUE(NODE, MEMBER, HMAP) \
HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, (void) 0)
-#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \
- for (ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), \
- __VA_ARGS__; \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
- || ((NODE = NULL), false); \
- ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER))
+#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \
+ for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_next(HMAP, &(NODE)->MEMBER), \
+ struct hmap_node, __VA_ARGS__); \
+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \
+ UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE))))
+
+struct hmap_pop_helper_iter__ {
+ size_t bucket;
+ struct hmap_node *node;
+};
-static inline struct hmap_node *
-hmap_pop_helper__(struct hmap *hmap, size_t *bucket) {
+static inline void
+hmap_pop_helper__(struct hmap *hmap, struct hmap_pop_helper_iter__ *iter) {
- for (; *bucket <= hmap->mask; (*bucket)++) {
- struct hmap_node *node = hmap->buckets[*bucket];
+ for (; iter->bucket <= hmap->mask; (iter->bucket)++) {
+ struct hmap_node *node = hmap->buckets[iter->bucket];
if (node) {
hmap_remove(hmap, node);
- return node;
+ iter->node = node;
+ return;
}
}
-
- return NULL;
+ iter->node = NULL;
}
-#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \
- for (size_t bucket__ = 0; \
- INIT_CONTAINER(NODE, hmap_pop_helper__(HMAP, &bucket__), MEMBER), \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
- || ((NODE = NULL), false);)
+#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \
+ for (struct hmap_pop_helper_iter__ ITER_VAR(NODE) = { 0, NULL }; \
+ hmap_pop_helper__(HMAP, &ITER_VAR(NODE)), \
+ (ITER_VAR(NODE).node != NULL) ? \
+ (((NODE) = OBJECT_CONTAINING(ITER_VAR(NODE).node, \
+ NODE, MEMBER)),1): \
+ (((NODE) = NULL), 0);)
static inline struct hmap_node *hmap_first(const struct hmap *);
static inline struct hmap_node *hmap_next(const struct hmap *,
diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h
index 73b562e03d..0831a9cee1 100644
--- a/include/openvswitch/json.h
+++ b/include/openvswitch/json.h
@@ -50,7 +50,9 @@ enum json_type {
JSON_INTEGER, /* 123. */
JSON_REAL, /* 123.456. */
JSON_STRING, /* "..." */
- JSON_N_TYPES
+ JSON_N_TYPES,
+ JSON_SERIALIZED_OBJECT, /* Internal type to hold serialized version of
+ * data of other types. */
};
const char *json_type_to_string(enum json_type);
@@ -70,7 +72,7 @@ struct json {
struct json_array array;
long long int integer;
double real;
- char *string;
+ char *string; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */
};
};
@@ -78,6 +80,7 @@ struct json *json_null_create(void);
struct json *json_boolean_create(bool);
struct json *json_string_create(const char *);
struct json *json_string_create_nocopy(char *);
+struct json *json_serialized_object_create(const struct json *);
struct json *json_integer_create(long long int);
struct json *json_real_create(double);
@@ -99,6 +102,7 @@ void json_object_put_format(struct json *,
OVS_PRINTF_FORMAT(3, 4);
const char *json_string(const struct json *);
+const char *json_serialized_object(const struct json *);
struct json_array *json_array(const struct json *);
struct shash *json_object(const struct json *);
bool json_boolean(const struct json *);
@@ -125,6 +129,7 @@ struct json *json_parser_finish(struct json_parser *);
void json_parser_abort(struct json_parser *);
struct json *json_from_string(const char *string);
+struct json *json_from_serialized_object(const struct json *);
struct json *json_from_file(const char *file_name);
struct json *json_from_stream(FILE *stream);
diff --git a/include/openvswitch/list.h b/include/openvswitch/list.h
index 8ad5eeb327..bbd2edbd0c 100644
--- a/include/openvswitch/list.h
+++ b/include/openvswitch/list.h
@@ -72,37 +72,48 @@ static inline bool ovs_list_is_empty(const struct ovs_list *);
static inline bool ovs_list_is_singleton(const struct ovs_list *);
static inline bool ovs_list_is_short(const struct ovs_list *);
-#define LIST_FOR_EACH(ITER, MEMBER, LIST) \
- for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \
- &(ITER)->MEMBER != (LIST); \
- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER))
-#define LIST_FOR_EACH_CONTINUE(ITER, MEMBER, LIST) \
- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER); \
- &(ITER)->MEMBER != (LIST); \
- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER))
-#define LIST_FOR_EACH_REVERSE(ITER, MEMBER, LIST) \
- for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \
- &(ITER)->MEMBER != (LIST); \
- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER))
-#define LIST_FOR_EACH_REVERSE_SAFE(ITER, PREV, MEMBER, LIST) \
- for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \
- (&(ITER)->MEMBER != (LIST) \
- ? INIT_CONTAINER(PREV, (ITER)->MEMBER.prev, MEMBER), 1 \
- : 0); \
- (ITER) = (PREV))
-#define LIST_FOR_EACH_REVERSE_CONTINUE(ITER, MEMBER, LIST) \
- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \
- &(ITER)->MEMBER != (LIST); \
- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER))
-#define LIST_FOR_EACH_SAFE(ITER, NEXT, MEMBER, LIST) \
- for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \
- (&(ITER)->MEMBER != (LIST) \
- ? INIT_CONTAINER(NEXT, (ITER)->MEMBER.next, MEMBER), 1 \
- : 0); \
- (ITER) = (NEXT))
-#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \
- while (!ovs_list_is_empty(LIST) \
- && (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1))
+#define LIST_FOR_EACH(VAR, MEMBER, LIST) \
+ for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->next, struct ovs_list); \
+ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \
+ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next))
+
+#define LIST_FOR_EACH_CONTINUE(VAR, MEMBER, LIST) \
+ for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.next, struct ovs_list); \
+ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \
+ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next))
+
+#define LIST_FOR_EACH_REVERSE(VAR, MEMBER, LIST) \
+ for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->prev, struct ovs_list); \
+ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \
+ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev))
+
+#define LIST_FOR_EACH_REVERSE_CONTINUE(VAR, MEMBER, LIST) \
+ for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.prev, struct ovs_list); \
+ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \
+ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev))
+
+#define LIST_FOR_EACH_REVERSE_SAFE(VAR, PREV, MEMBER, LIST) \
+ for (INIT_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, (LIST)->prev, \
+ struct ovs_list); \
+ CONDITION_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, \
+ ITER_VAR(VAR) != (LIST), \
+ ITER_VAR(PREV) = ITER_VAR(VAR)->prev, \
+ ITER_VAR(PREV) != (LIST)); \
+ UPDATE_MULTIVAR_SAFE_LONG(VAR, PREV))
+
+#define LIST_FOR_EACH_SAFE(VAR, NEXT, MEMBER, LIST) \
+ for (INIT_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, (LIST)->next, \
+ struct ovs_list); \
+ CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, \
+ ITER_VAR(VAR) != (LIST), \
+ ITER_VAR(NEXT) = ITER_VAR(VAR)->next, \
+ ITER_VAR(NEXT) != (LIST)); \
+ UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT))
+
+#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \
+ while (!ovs_list_is_empty(LIST) ? \
+ (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1) : \
+ (ITER = NULL, 0))
/* Inline implementations. */
diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h
index 95e52e3587..045dce8f5f 100644
--- a/include/openvswitch/meta-flow.h
+++ b/include/openvswitch/meta-flow.h
@@ -2305,6 +2305,7 @@ void mf_set_flow_value_masked(const struct mf_field *,
const union mf_value *mask,
struct flow *);
bool mf_is_tun_metadata(const struct mf_field *);
+bool mf_is_frozen_metadata(const struct mf_field *);
bool mf_is_pipeline_field(const struct mf_field *);
bool mf_is_set(const struct mf_field *, const struct flow *);
void mf_mask_field(const struct mf_field *, struct flow_wildcards *);
diff --git a/include/openvswitch/util.h b/include/openvswitch/util.h
index 228b185c3a..8e6c46a85f 100644
--- a/include/openvswitch/util.h
+++ b/include/openvswitch/util.h
@@ -145,6 +145,150 @@ OVS_NO_RETURN void ovs_assert_failure(const char *, const char *, const char *);
#define INIT_CONTAINER(OBJECT, POINTER, MEMBER) \
((OBJECT) = NULL, ASSIGN_CONTAINER(OBJECT, POINTER, MEMBER))
+/* Multi-variable container iterators.
+ *
+ * The following macros facilitate safe iteration over data structures
+ * contained in objects. It does so by using an internal iterator variable of
+ * the type of the member object pointer (i.e: pointer to the data structure).
+ */
+
+/* Multi-variable iterator variable name.
+ * Returns the name of the internal iterator variable.
+ */
+#define ITER_VAR(NAME) NAME ## __iterator__
+
+/* Multi-variable initialization. Creates an internal iterator variable that
+ * points to the provided pointer. The type of the iterator variable is
+ * ITER_TYPE*. It must be the same type as &VAR->MEMBER.
+ *
+ * The _EXP version evaluates the extra expressions once.
+ */
+#define INIT_MULTIVAR(VAR, MEMBER, POINTER, ITER_TYPE) \
+ INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0)
+
+#define INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \
+ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER)
+
+/* Multi-variable condition.
+ * Evaluates the condition expression (that must be based on the internal
+ * iterator variable). Only if the result of expression is true, the OBJECT is
+ * set to the object containing the current value of the iterator variable.
+ *
+ * It is up to the caller to make sure it is safe to run OBJECT_CONTAINING on
+ * the pointers that verify the condition.
+ */
+#define CONDITION_MULTIVAR(VAR, MEMBER, EXPR) \
+ ((EXPR) ? \
+ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), 1) : \
+ (((VAR) = NULL), 0))
+
+/* Multi-variable update.
+ * Sets the iterator value to NEXT_ITER.
+ */
+#define UPDATE_MULTIVAR(VAR, NEXT_ITER) \
+ (ITER_VAR(VAR) = NEXT_ITER)
+
+/* In the safe version of the multi-variable container iteration, the next
+ * value of the iterator is precalculated on the condition expression.
+ * This allows for the iterator to be freed inside the loop.
+ *
+ * Two versions of the macros are provided:
+ *
+ * * In the _SHORT version, the user does not have to provide a variable to
+ * store the next value of the iterator. Instead, a second iterator variable
+ * is declared in the INIT_ macro and its name is determined by
+ * ITER_NEXT_VAR(OBJECT).
+ *
+ * * In the _LONG version, the user provides another variable of the same type
+ * as the iterator object variable to store the next containing object.
+ * We still declare an iterator variable inside the loop but in this case it's
+ * name is derived from the name of the next containing variable.
+ * The value of the next containing object will only be set
+ * (via OBJECT_CONTAINING) if an additional condition is statisfied. This
+ * second condition must ensure it is safe to call OBJECT_CONTAINING on the
+ * next iterator variable.
+ * With respect to the value of the next containing object:
+ * - Inside of the loop: the variable is either NULL or safe to use.
+ * - Outside of the loop: the variable is NULL if the loop ends normally.
+ * If the loop ends with a "break;" statement, rules of Inside the loop
+ * apply.
+ */
+#define ITER_NEXT_VAR(NAME) NAME ## __iterator__next__
+
+/* Safe initialization declares both iterators. */
+#define INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, POINTER, ITER_TYPE) \
+ INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0)
+
+#define INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \
+ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \
+ *ITER_NEXT_VAR(VAR) = NULL
+
+/* Evaluate the condition expression and, if satisfied, update the _next_
+ * iterator with the NEXT_EXPR.
+ * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and
+ * ITER_NEXT_VAR(VAR).
+ */
+#define CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, EXPR, NEXT_EXPR) \
+ ((EXPR) ? \
+ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \
+ (NEXT_EXPR), 1) : \
+ (((VAR) = NULL), 0))
+
+#define UPDATE_MULTIVAR_SAFE_SHORT(VAR) \
+ UPDATE_MULTIVAR(VAR, ITER_NEXT_VAR(VAR))
+
+/* _LONG versions of the macros. */
+
+#define INIT_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE) \
+ INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE, \
+ (void) 0) \
+
+#define INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, \
+ ITER_TYPE, ...) \
+ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \
+ *ITER_VAR(NEXT_VAR) = NULL
+
+/* Evaluate the condition expression and, if satisfied, update the _next_
+ * iterator with the NEXT_EXPR. After, evaluate the NEXT_COND and, if
+ * satisfied, set the value to NEXT_VAR. NEXT_COND must use ITER_VAR(NEXT_VAR).
+ *
+ * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and
+ * ITER_VAR(NEXT_VAR).
+ */
+#define CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, EXPR, NEXT_EXPR, \
+ NEXT_COND) \
+ ((EXPR) ? \
+ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \
+ (NEXT_EXPR), ((NEXT_COND) ? \
+ ((NEXT_VAR) = \
+ OBJECT_CONTAINING(ITER_VAR(NEXT_VAR), NEXT_VAR, MEMBER)) : \
+ ((NEXT_VAR) = NULL)), 1) : \
+ (((VAR) = NULL), ((NEXT_VAR) = NULL), 0))
+
+#define UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR) \
+ UPDATE_MULTIVAR(VAR, ITER_VAR(NEXT_VAR))
+
+/* Helpers to allow overloading the *_SAFE iterator macros and select either
+ * the LONG or the SHORT version depending on the number of arguments.
+ */
+#define GET_SAFE_MACRO2(_1, _2, NAME, ...) NAME
+#define GET_SAFE_MACRO3(_1, _2, _3, NAME, ...) NAME
+#define GET_SAFE_MACRO4(_1, _2, _3, _4, NAME, ...) NAME
+#define GET_SAFE_MACRO5(_1, _2, _3, _4, _5, NAME, ...) NAME
+#define GET_SAFE_MACRO6(_1, _2, _3, _4, _5, _6, NAME, ...) NAME
+#define GET_SAFE_MACRO(MAX_ARGS) GET_SAFE_MACRO ## MAX_ARGS
+
+/* MSVC treats __VA_ARGS__ as a simple token in argument lists. Introduce
+ * a level of indirection to work around that. */
+#define EXPAND_MACRO(name, args) name args
+
+/* Overload the LONG and the SHORT version of the macros. MAX_ARGS is the
+ * maximum number of arguments (i.e: the number of arguments of the LONG
+ * version). */
+#define OVERLOAD_SAFE_MACRO(LONG, SHORT, MAX_ARGS, ...) \
+ EXPAND_MACRO(GET_SAFE_MACRO(MAX_ARGS), \
+ (__VA_ARGS__, LONG, SHORT))(__VA_ARGS__)
+
/* Returns the number of elements in ARRAY. */
#define ARRAY_SIZE(ARRAY) __ARRAY_SIZE(ARRAY)
@@ -285,6 +429,9 @@ is_pow2(uintmax_t x)
* segfault, so it is important to be aware of correct alignment. */
#define ALIGNED_CAST(TYPE, ATTR) ((TYPE) (void *) (ATTR))
+#define IS_PTR_ALIGNED(OBJ) \
+ (!(OBJ) || (uintptr_t) (OBJ) % __alignof__(OVS_TYPEOF(OBJ)) == 0)
+
#ifdef __cplusplus
}
#endif
diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 89a36fe17b..a8b0705d9f 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -202,18 +202,18 @@ conn prevent_unencrypted_vxlan
"""
auth_tmpl = {"psk": Template("""\
- left=0.0.0.0
+ left=%any
right=$remote_ip
authby=psk"""),
"pki_remote": Template("""\
- left=0.0.0.0
+ left=%any
right=$remote_ip
leftid=$local_name
rightid=$remote_name
leftcert=$certificate
rightcert=$remote_cert"""),
"pki_ca": Template("""\
- left=0.0.0.0
+ left=%any
right=$remote_ip
leftid=$local_name
rightid=$remote_name
@@ -299,11 +299,11 @@ conn prevent_unencrypted_vxlan
def config_tunnel(self, tunnel):
if tunnel.conf["psk"]:
- self.secrets_file.write('0.0.0.0 %s : PSK "%s"\n' %
+ self.secrets_file.write('%%any %s : PSK "%s"\n' %
(tunnel.conf["remote_ip"], tunnel.conf["psk"]))
auth_section = self.auth_tmpl["psk"].substitute(tunnel.conf)
else:
- self.secrets_file.write("0.0.0.0 %s : RSA %s\n" %
+ self.secrets_file.write("%%any %s : RSA %s\n" %
(tunnel.conf["remote_ip"],
tunnel.conf["private_key"]))
if tunnel.conf["remote_cert"]:
diff --git a/lib/bfd.c b/lib/bfd.c
index 3c965699ac..9698576d07 100644
--- a/lib/bfd.c
+++ b/lib/bfd.c
@@ -131,16 +131,17 @@ enum diag {
* | Required Min Echo RX Interval |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */
struct msg {
- uint8_t vers_diag; /* Version and diagnostic. */
- uint8_t flags; /* 2bit State field followed by flags. */
- uint8_t mult; /* Fault detection multiplier. */
- uint8_t length; /* Length of this BFD message. */
- ovs_be32 my_disc; /* My discriminator. */
- ovs_be32 your_disc; /* Your discriminator. */
- ovs_be32 min_tx; /* Desired minimum tx interval. */
- ovs_be32 min_rx; /* Required minimum rx interval. */
- ovs_be32 min_rx_echo; /* Required minimum echo rx interval. */
+ uint8_t vers_diag; /* Version and diagnostic. */
+ uint8_t flags; /* 2bit State field followed by flags. */
+ uint8_t mult; /* Fault detection multiplier. */
+ uint8_t length; /* Length of this BFD message. */
+ ovs_16aligned_be32 my_disc; /* My discriminator. */
+ ovs_16aligned_be32 your_disc; /* Your discriminator. */
+ ovs_16aligned_be32 min_tx; /* Desired minimum tx interval. */
+ ovs_16aligned_be32 min_rx; /* Required minimum rx interval. */
+ ovs_16aligned_be32 min_rx_echo; /* Required minimum echo rx interval. */
};
+
BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg));
#define DIAG_MASK 0x1f
@@ -634,9 +635,9 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p,
msg->mult = bfd->mult;
msg->length = BFD_PACKET_LEN;
- msg->my_disc = htonl(bfd->disc);
- msg->your_disc = htonl(bfd->rmt_disc);
- msg->min_rx_echo = htonl(0);
+ put_16aligned_be32(&msg->my_disc, htonl(bfd->disc));
+ put_16aligned_be32(&msg->your_disc, htonl(bfd->rmt_disc));
+ put_16aligned_be32(&msg->min_rx_echo, htonl(0));
if (bfd_in_poll(bfd)) {
min_tx = bfd->poll_min_tx;
@@ -646,8 +647,8 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p,
min_rx = bfd->min_rx;
}
- msg->min_tx = htonl(min_tx * 1000);
- msg->min_rx = htonl(min_rx * 1000);
+ put_16aligned_be32(&msg->min_tx, htonl(min_tx * 1000));
+ put_16aligned_be32(&msg->min_rx, htonl(min_rx * 1000));
bfd->flags &= ~FLAG_FINAL;
*oam = bfd->oam;
@@ -781,12 +782,12 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
goto out;
}
- if (!msg->my_disc) {
+ if (!get_16aligned_be32(&msg->my_disc)) {
log_msg(VLL_WARN, msg, "NULL my_disc", bfd);
goto out;
}
- pkt_your_disc = ntohl(msg->your_disc);
+ pkt_your_disc = ntohl(get_16aligned_be32(&msg->your_disc));
if (pkt_your_disc) {
/* Technically, we should use the your discriminator field to figure
* out which 'struct bfd' this packet is destined towards. That way a
@@ -806,7 +807,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
bfd_status_changed(bfd);
}
- bfd->rmt_disc = ntohl(msg->my_disc);
+ bfd->rmt_disc = ntohl(get_16aligned_be32(&msg->my_disc));
bfd->rmt_state = rmt_state;
bfd->rmt_flags = flags;
bfd->rmt_diag = msg->vers_diag & DIAG_MASK;
@@ -834,7 +835,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
bfd->rmt_mult = msg->mult;
}
- rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1);
+ rmt_min_rx = MAX(ntohl(get_16aligned_be32(&msg->min_rx)) / 1000, 1);
if (bfd->rmt_min_rx != rmt_min_rx) {
bfd->rmt_min_rx = rmt_min_rx;
if (bfd->next_tx) {
@@ -843,7 +844,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
log_msg(VLL_INFO, msg, "New remote min_rx", bfd);
}
- bfd->rmt_min_tx = MAX(ntohl(msg->min_tx) / 1000, 1);
+ bfd->rmt_min_tx = MAX(ntohl(get_16aligned_be32(&msg->min_tx)) / 1000, 1);
bfd->detect_time = bfd_rx_interval(bfd) * bfd->rmt_mult + time_msec();
if (bfd->state == STATE_ADMIN_DOWN) {
@@ -1105,10 +1106,14 @@ log_msg(enum vlog_level level, const struct msg *p, const char *message,
bfd_diag_str(p->vers_diag & DIAG_MASK),
bfd_state_str(p->flags & STATE_MASK),
p->mult, p->length, bfd_flag_str(p->flags & FLAGS_MASK),
- ntohl(p->my_disc), ntohl(p->your_disc),
- ntohl(p->min_tx), ntohl(p->min_tx) / 1000,
- ntohl(p->min_rx), ntohl(p->min_rx) / 1000,
- ntohl(p->min_rx_echo), ntohl(p->min_rx_echo) / 1000);
+ ntohl(get_16aligned_be32(&p->my_disc)),
+ ntohl(get_16aligned_be32(&p->your_disc)),
+ ntohl(get_16aligned_be32(&p->min_tx)),
+ ntohl(get_16aligned_be32(&p->min_tx)) / 1000,
+ ntohl(get_16aligned_be32(&p->min_rx)),
+ ntohl(get_16aligned_be32(&p->min_rx)) / 1000,
+ ntohl(get_16aligned_be32(&p->min_rx_echo)),
+ ntohl(get_16aligned_be32(&p->min_rx_echo)) / 1000);
bfd_put_details(&ds, bfd);
VLOG(level, "%s", ds_cstr(&ds));
ds_destroy(&ds);
diff --git a/lib/cmap.h b/lib/cmap.h
index c502d23112..72e2ec5f71 100644
--- a/lib/cmap.h
+++ b/lib/cmap.h
@@ -108,6 +108,8 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node,
*
* CMAP and HASH are evaluated only once. NODE is evaluated many times.
*
+ * After a normal exit of the loop (not through a "break;" statement) NODE is
+ * NULL.
*
* Thread-safety
* =============
@@ -128,15 +130,15 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node,
* CMAP_FOR_EACH_WITH_HASH_PROTECTED may only be used if CMAP is guaranteed not
* to change during iteration. It may be very slightly faster.
*/
-#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \
- for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \
- (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \
- ASSIGN_CONTAINER(NODE, cmap_node_next(&(NODE)->MEMBER), MEMBER))
-#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \
- for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \
- (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \
- ASSIGN_CONTAINER(NODE, cmap_node_next_protected(&(NODE)->MEMBER), \
- MEMBER))
+#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \
+ for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \
+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \
+ UPDATE_MULTIVAR(NODE, cmap_node_next(ITER_VAR(NODE))))
+#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \
+ for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \
+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \
+ UPDATE_MULTIVAR(NODE, cmap_node_next_protected(ITER_VAR(NODE))))
+
#define CMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, CMAP) \
CMAP_NODE_FOR_EACH(NODE, MEMBER, cmap_find(CMAP, HASH))
#define CMAP_FOR_EACH_WITH_HASH_PROTECTED(NODE, MEMBER, HASH, CMAP) \
@@ -223,7 +225,7 @@ unsigned long cmap_find_batch(const struct cmap *cmap, unsigned long map,
? (INIT_CONTAINER(NODE, (CURSOR)->node, MEMBER), \
cmap_cursor_advance(CURSOR), \
true) \
- : false)
+ : (NODE = NULL, false))
#define CMAP_CURSOR_FOR_EACH(NODE, MEMBER, CURSOR, CMAP) \
for (*(CURSOR) = cmap_cursor_start(CMAP); \
diff --git a/lib/conntrack.c b/lib/conntrack.c
index 551c2061a7..b8183faa2d 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2238,7 +2238,7 @@ nat_range_hash(const struct conn *conn, uint32_t basis)
hash = ct_addr_hash_add(hash, &conn->nat_info->min_addr);
hash = ct_addr_hash_add(hash, &conn->nat_info->max_addr);
hash = hash_add(hash,
- (conn->nat_info->max_port << 16)
+ ((uint32_t) conn->nat_info->max_port << 16)
| conn->nat_info->min_port);
hash = ct_endpoint_hash_add(hash, &conn->key.src);
hash = ct_endpoint_hash_add(hash, &conn->key.dst);
diff --git a/lib/db-ctl-base.c b/lib/db-ctl-base.c
index 77cc76a9f6..7074561588 100644
--- a/lib/db-ctl-base.c
+++ b/lib/db-ctl-base.c
@@ -247,15 +247,15 @@ record_id_equals(const union ovsdb_atom *name, enum ovsdb_atomic_type type,
const char *record_id)
{
if (type == OVSDB_TYPE_STRING) {
- if (!strcmp(name->string, record_id)) {
+ if (!strcmp(name->s->string, record_id)) {
return true;
}
struct uuid uuid;
size_t len = strlen(record_id);
if (len >= 4
- && uuid_from_string(&uuid, name->string)
- && !strncmp(name->string, record_id, len)) {
+ && uuid_from_string(&uuid, name->s->string)
+ && !strncmp(name->s->string, record_id, len)) {
return true;
}
@@ -314,15 +314,19 @@ get_row_by_id(struct ctl_context *ctx,
row, id->name_column, key, value);
/* Extract the name from the column. */
- const union ovsdb_atom *name;
+ const union ovsdb_atom *name = NULL;
if (!id->key) {
name = datum->n == 1 ? &datum->keys[0] : NULL;
} else {
- const union ovsdb_atom key_atom
- = { .string = CONST_CAST(char *, id->key) };
- unsigned int i = ovsdb_datum_find_key(datum, &key_atom,
- OVSDB_TYPE_STRING);
- name = i == UINT_MAX ? NULL : &datum->values[i];
+ union ovsdb_atom key_atom = {
+ .s = ovsdb_atom_string_create(CONST_CAST(char *, id->key)) };
+ unsigned int i;
+
+ if (ovsdb_datum_find_key(datum, &key_atom,
+ OVSDB_TYPE_STRING, &i)) {
+ name = &datum->values[i];
+ }
+ ovsdb_atom_destroy(&key_atom, OVSDB_TYPE_STRING);
}
if (!name) {
continue;
@@ -819,14 +823,14 @@ check_condition(const struct ovsdb_idl_table_class *table,
goto out;
}
- idx = ovsdb_datum_find_key(have_datum,
- &want_key, column->type.key.type);
- if (idx == UINT_MAX && !is_set_operator(operator)) {
+ bool found = ovsdb_datum_find_key(have_datum, &want_key,
+ column->type.key.type, &idx);
+ if (!found && !is_set_operator(operator)) {
retval = false;
} else {
struct ovsdb_datum a;
- if (idx != UINT_MAX) {
+ if (found) {
a.n = 1;
a.keys = &have_datum->values[idx];
a.values = NULL;
@@ -992,9 +996,8 @@ cmd_get(struct ctl_context *ctx)
return;
}
- idx = ovsdb_datum_find_key(datum, &key,
- column->type.key.type);
- if (idx == UINT_MAX) {
+ if (!ovsdb_datum_find_key(datum, &key,
+ column->type.key.type, &idx)) {
if (must_exist) {
ctl_error(
ctx, "no key \"%s\" in %s record \"%s\" column %s",
@@ -1375,7 +1378,7 @@ set_column(const struct ovsdb_idl_table_class *table,
ovsdb_atom_destroy(&value, column->type.value.type);
ovsdb_datum_union(&datum, ovsdb_idl_read(row, column),
- &column->type, false);
+ &column->type);
ovsdb_idl_txn_verify(row, column);
ovsdb_idl_txn_write(row, column, &datum);
} else {
@@ -1514,7 +1517,7 @@ cmd_add(struct ctl_context *ctx)
ovsdb_datum_destroy(&old, &column->type);
return;
}
- ovsdb_datum_union(&old, &add, type, false);
+ ovsdb_datum_union(&old, &add, type);
ovsdb_datum_destroy(&add, type);
}
if (old.n > type->n_max) {
diff --git a/lib/dns-resolve.c b/lib/dns-resolve.c
index d344514343..8bcecb90ce 100644
--- a/lib/dns-resolve.c
+++ b/lib/dns-resolve.c
@@ -265,7 +265,7 @@ resolve_callback__(void *req_, int err, struct ub_result *result)
if (err != 0 || (result->qtype == ns_t_aaaa && !result->havedata)) {
ub_resolve_free(result);
req->state = RESOLVE_ERROR;
- VLOG_ERR_RL(&rl, "%s: failed to resolve", req->name);
+ VLOG_WARN_RL(&rl, "%s: failed to resolve", req->name);
return;
}
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 72f6d09ac7..35c72542a2 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -294,7 +294,7 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom)
void
dp_packet_prealloc_tailroom(struct dp_packet *b, size_t size)
{
- if (size > dp_packet_tailroom(b)) {
+ if ((size && !dp_packet_base(b)) || (size > dp_packet_tailroom(b))) {
dp_packet_resize(b, dp_packet_headroom(b), MAX(size, 64));
}
}
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 08d93c2779..3dc582fbfd 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -199,6 +199,7 @@ struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t,
void dp_packet_resize(struct dp_packet *b, size_t new_headroom,
size_t new_tailroom);
static inline void dp_packet_delete(struct dp_packet *);
+static inline void dp_packet_swap(struct dp_packet *, struct dp_packet *);
static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
size_t size);
@@ -256,6 +257,18 @@ dp_packet_delete(struct dp_packet *b)
}
}
+/* Swaps content of two packets. */
+static inline void
+dp_packet_swap(struct dp_packet *a, struct dp_packet *b)
+{
+ ovs_assert(a->source == DPBUF_MALLOC || a->source == DPBUF_STUB);
+ ovs_assert(b->source == DPBUF_MALLOC || b->source == DPBUF_STUB);
+ struct dp_packet c = *a;
+
+ *a = *b;
+ *b = c;
+}
+
/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
* byte 'offset'. Otherwise, returns a null pointer. */
static inline void *
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index b7d577870d..fe24f9abdf 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -83,7 +83,7 @@ bool
dpdk_get_cpu_has_isa(const char *arch OVS_UNUSED,
const char *feature OVS_UNUSED)
{
- VLOG_ERR_ONCE("DPDK not supported in this version of Open vSwitch, "
+ VLOG_DBG_ONCE("DPDK not supported in this version of Open vSwitch, "
"cannot use CPU flag based optimizations");
return false;
}
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index ec64419e38..28b54ef2f1 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -157,10 +157,19 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
0, 0, 0, 0, /* Src IP */ \
0, 0, 0, 0, /* Dst IP */
-#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xFE, 0xFF, 0xFF)
+#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xBF, 0xFF, 0xFF)
#define PATTERN_IPV4_UDP PATTERN_IPV4_GEN(0x45, 0, 0, 0x11)
#define PATTERN_IPV4_TCP PATTERN_IPV4_GEN(0x45, 0, 0, 0x06)
+#define PATTERN_TCP_GEN(data_offset) \
+ 0, 0, 0, 0, /* sport, dport */ \
+ 0, 0, 0, 0, /* sequence number */ \
+ 0, 0, 0, 0, /* ack number */ \
+ data_offset, /* data offset: used to verify = 5, options not supported */
+
+#define PATTERN_TCP_MASK PATTERN_TCP_GEN(0xF0)
+#define PATTERN_TCP PATTERN_TCP_GEN(0x50)
+
#define NU 0
#define PATTERN_IPV4_UDP_SHUFFLE \
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* Ether */ \
@@ -217,6 +226,25 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
#define PATTERN_DT1Q_IPV4_TCP_KMASK \
(KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_TCP << 40))
+/* Miniflow Strip post-processing masks.
+ * This allows unsetting specific bits from the resulting miniflow. It is used
+ * for e.g. IPv4 where the "DF" bit is never pushed to the miniflow itself.
+ * The NC define is for "No Change", allowing the bits to pass through.
+ */
+#define NC 0xFF
+
+#define PATTERN_STRIP_IPV4_MASK \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, 0xBF, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
+
+#define PATTERN_STRIP_DOT1Q_IPV4_MASK \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
+
/* This union allows initializing static data as u8, but easily loading it
* into AVX512 registers too. The union ensures proper alignment for the zmm.
*/
@@ -241,8 +269,9 @@ struct mfex_profile {
union mfex_data probe_mask;
union mfex_data probe_data;
- /* Required for reshaping packet into miniflow. */
+ /* Required for reshaping packet into miniflow and post-processing it. */
union mfex_data store_shuf;
+ union mfex_data strip_mask;
__mmask64 store_kmsk;
/* Constant data to set in mf.bits and dp_packet data on hit. */
@@ -310,6 +339,7 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
.probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_UDP},
.store_shuf.u8_data = { PATTERN_IPV4_UDP_SHUFFLE },
+ .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
.store_kmsk = PATTERN_IPV4_UDP_KMASK,
.mf_bits = { 0x18a0000000000000, 0x0000000000040401},
@@ -320,10 +350,19 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
},
[PROFILE_ETH_IPV4_TCP] = {
- .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK },
- .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_TCP},
+ .probe_mask.u8_data = {
+ PATTERN_ETHERTYPE_MASK
+ PATTERN_IPV4_MASK
+ PATTERN_TCP_MASK
+ },
+ .probe_data.u8_data = {
+ PATTERN_ETHERTYPE_IPV4
+ PATTERN_IPV4_TCP
+ PATTERN_TCP
+ },
.store_shuf.u8_data = { PATTERN_IPV4_TCP_SHUFFLE },
+ .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
.store_kmsk = PATTERN_IPV4_TCP_KMASK,
.mf_bits = { 0x18a0000000000000, 0x0000000000044401},
@@ -342,6 +381,7 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
},
.store_shuf.u8_data = { PATTERN_DT1Q_IPV4_UDP_SHUFFLE },
+ .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
.store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
.mf_bits = { 0x38a0000000000000, 0x0000000000040401},
@@ -353,20 +393,27 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
[PROFILE_ETH_VLAN_IPV4_TCP] = {
.probe_mask.u8_data = {
- PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK
+ PATTERN_ETHERTYPE_MASK
+ PATTERN_DT1Q_MASK
+ PATTERN_IPV4_MASK
+ PATTERN_TCP_MASK
},
.probe_data.u8_data = {
- PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_TCP
+ PATTERN_ETHERTYPE_DT1Q
+ PATTERN_DT1Q_IPV4
+ PATTERN_IPV4_TCP
+ PATTERN_TCP
},
.store_shuf.u8_data = { PATTERN_DT1Q_IPV4_TCP_SHUFFLE },
+ .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
.store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
.mf_bits = { 0x38a0000000000000, 0x0000000000044401},
.dp_pkt_offs = {
14, UINT16_MAX, 18, 38,
},
- .dp_pkt_min_size = 46,
+ .dp_pkt_min_size = 58,
},
};
@@ -374,16 +421,31 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
/* Protocol specific helper functions, for calculating offsets/lenghts. */
static int32_t
mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh,
- uint32_t len_from_ipv4)
+ uint32_t len_from_ipv4, uint32_t next_proto_len)
{
- /* Handle dynamic l2_pad_size. */
- uint16_t tot_len = ntohs(nh->ip_tot_len);
- if (OVS_UNLIKELY(tot_len > len_from_ipv4 ||
- (len_from_ipv4 - tot_len) > UINT16_MAX)) {
- return -1;
- }
- dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - tot_len);
- return 0;
+ /* Handle dynamic l2_pad_size; note that avx512 has already validated
+ * the IP->ihl field to be 5, so 20 bytes of IP header (no options).
+ */
+ uint16_t ip_tot_len = ntohs(nh->ip_tot_len);
+
+ /* Error if IP total length is greater than remaining packet size. */
+ bool err_ip_tot_len_too_high = ip_tot_len > len_from_ipv4;
+
+ /* Error if IP total length is less than the size of the IP header
+ * itself, and the size of the next-protocol this profile matches on.
+ */
+ bool err_ip_tot_len_too_low =
+ (IP_HEADER_LEN + next_proto_len) > ip_tot_len;
+
+ /* Ensure the l2 pad size will not overflow. */
+ bool err_len_u16_overflow = (len_from_ipv4 - ip_tot_len) > UINT16_MAX;
+
+ if (OVS_UNLIKELY(err_ip_tot_len_too_high || err_ip_tot_len_too_low ||
+ err_len_u16_overflow)) {
+ return -1;
+ }
+ dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len);
+ return 0;
}
/* Fixup the VLAN CFI and PCP, reading the PCP from the input to this function,
@@ -433,6 +495,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
__m512i v_vals = _mm512_loadu_si512(&profile->probe_data);
__m512i v_mask = _mm512_loadu_si512(&profile->probe_mask);
__m512i v_shuf = _mm512_loadu_si512(&profile->store_shuf);
+ __m512i v_strp = _mm512_loadu_si512(&profile->strip_mask);
__mmask64 k_shuf = profile->store_kmsk;
__m128i v_bits = _mm_loadu_si128((void *) &profile->mf_bits);
@@ -450,10 +513,17 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Load packet data and probe with AVX512 mask & compare. */
const uint8_t *pkt = dp_packet_data(packet);
- __m512i v_pkt0 = _mm512_loadu_si512(pkt);
+ __m512i v_pkt0;
+ if (size >= 64) {
+ v_pkt0 = _mm512_loadu_si512(pkt);
+ } else {
+ uint64_t load_kmask = (1ULL << size) - 1;
+ v_pkt0 = _mm512_maskz_loadu_epi8(load_kmask, pkt);
+ }
+
__m512i v_pkt0_masked = _mm512_and_si512(v_pkt0, v_mask);
__mmask64 k_cmp = _mm512_cmpeq_epi8_mask(v_pkt0_masked, v_vals);
- if (k_cmp != UINT64_MAX) {
+ if (OVS_UNLIKELY(k_cmp != UINT64_MAX)) {
continue;
}
@@ -474,15 +544,20 @@ mfex_avx512_process(struct dp_packet_batch *packets,
*/
__m512i v512_zeros = _mm512_setzero_si512();
__m512i v_blk0;
+#if __GNUC__ >= 4
if (__builtin_constant_p(use_vbmi) && use_vbmi) {
+#else
+ if (use_vbmi) {
+#endif
v_blk0 = _mm512_maskz_permutexvar_epi8_wrap(k_shuf, v_shuf,
v_pkt0);
} else {
v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0,
v_shuf, v512_zeros);
}
- _mm512_storeu_si512(&blocks[2], v_blk0);
+ __m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp);
+ _mm512_storeu_si512(&blocks[2], v_blk0_strip);
/* Perform "post-processing" per profile, handling details not easily
* handled in the above generic AVX512 code. Examples include TCP flag
@@ -498,7 +573,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
+ TCP_HEADER_LEN)) {
continue;
}
@@ -512,7 +588,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
+ UDP_HEADER_LEN)) {
continue;
}
} break;
@@ -525,7 +602,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Handle dynamic l2_pad_size. */
uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)];
- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
+ TCP_HEADER_LEN)) {
continue;
}
} break;
@@ -534,7 +612,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Handle dynamic l2_pad_size. */
uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)];
- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
+ UDP_HEADER_LEN)) {
continue;
}
diff --git a/lib/dpif-netdev-private-dfc.h b/lib/dpif-netdev-private-dfc.h
index 92092ebec9..3dfc91f0fe 100644
--- a/lib/dpif-netdev-private-dfc.h
+++ b/lib/dpif-netdev-private-dfc.h
@@ -59,7 +59,8 @@ extern "C" {
* Thread-safety
* =============
*
- * Each pmd_thread has its own private exact match cache.
+ * Each pmd_thread has its own private exact match cache and signature match
+ * cache.
* If dp_netdev_input is not called from a pmd thread, a mutex is used.
*/
diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 7c4a840cb1..0d5da73c7a 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -83,8 +83,10 @@ struct dpcls_subtable {
/* The lookup function to use for this subtable. If there is a known
* property of the subtable (eg: only 3 bits of miniflow metadata is
* used for the lookup) then this can point at an optimized version of
- * the lookup function for this particular subtable. */
- dpcls_subtable_lookup_func lookup_func;
+ * the lookup function for this particular subtable. The lookup function
+ * can be used at any time by a PMD thread, so it's declared as an atomic
+ * here to prevent garbage from being read. */
+ ATOMIC(dpcls_subtable_lookup_func) lookup_func;
/* Caches the masks to match a packet to, reducing runtime calculations. */
uint64_t *mf_masks;
diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h
index 3030660675..32ad020d90 100644
--- a/lib/dpif-netdev-private-flow.h
+++ b/lib/dpif-netdev-private-flow.h
@@ -101,6 +101,7 @@ struct dp_netdev_flow {
bool dead;
uint32_t mark; /* Unique flow mark assigned to a flow */
+ odp_port_t orig_in_port;
/* Statistics. */
struct dp_netdev_flow_stats stats;
diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h
index a782d9678a..ac4885538c 100644
--- a/lib/dpif-netdev-private-thread.h
+++ b/lib/dpif-netdev-private-thread.h
@@ -78,10 +78,10 @@ struct dp_netdev_pmd_thread {
struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
struct cmap_node node; /* In 'dp->poll_threads'. */
- /* Per thread exact-match cache. Note, the instance for cpu core
- * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
- * need to be protected by 'non_pmd_mutex'. Every other instance
- * will only be accessed by its own pmd thread. */
+ /* Per thread exact match cache and signature match cache. Note, the
+ * instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
+ * threads, and thusly need to be protected by 'non_pmd_mutex'. Every
+ * other instance will only be accessed by its own pmd thread. */
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct dfc_cache flow_cache;
/* Flow-Table and classifiers
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index bddce75b63..f96d0ecf54 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -984,7 +984,9 @@ dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc OVS_UNUSED,
if (!cls) {
continue;
}
+ ovs_mutex_lock(&pmd->flow_mutex);
uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls);
+ ovs_mutex_unlock(&pmd->flow_mutex);
if (subtbl_changes) {
lookup_dpcls_changed++;
lookup_subtable_changed += subtbl_changes;
@@ -2221,13 +2223,24 @@ static void
do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
OVS_REQUIRES(dp->port_mutex)
{
- netdev_flow_flush(port->netdev);
- netdev_uninit_flow_api(port->netdev);
hmap_remove(&dp->ports, &port->node);
seq_change(dp->port_seq);
reconfigure_datapath(dp);
+ /* Flush and disable offloads only after 'port' has been made
+ * inaccessible through datapath reconfiguration.
+ * This prevents having PMDs enqueuing offload requests after
+ * the flush. However, the flush doesn't provide any synchronization
+ * with the offload thread, so some requests could still be in the
+ * queue.
+ * When only this port is deleted instead of the whole datapath,
+ * revalidator threads are still active and can still enqueue
+ * offload modification or deletion. Managing those stray requests
+ * is done in the offload threads. */
+ netdev_flow_flush(port->netdev);
+ netdev_uninit_flow_api(port->netdev);
+
port_destroy(port);
}
@@ -2711,6 +2724,10 @@ queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd,
ovsthread_once_done(&offload_thread_once);
}
+ if (!netdev_is_flow_api_enabled()) {
+ return;
+ }
+
offload = dp_netdev_alloc_flow_offload(pmd, flow,
DP_NETDEV_FLOW_OFFLOAD_OP_DEL);
dp_netdev_append_flow_offload(offload);
@@ -2720,7 +2737,7 @@ static void
queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_flow *flow, struct match *match,
const struct nlattr *actions, size_t actions_len,
- odp_port_t orig_in_port, int op)
+ int op)
{
struct dp_flow_offload_item *offload;
@@ -2740,7 +2757,7 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd,
offload->actions = xmalloc(actions_len);
memcpy(offload->actions, actions, actions_len);
offload->actions_len = actions_len;
- offload->orig_in_port = orig_in_port;
+ offload->orig_in_port = flow->orig_in_port;
dp_netdev_append_flow_offload(offload);
}
@@ -2758,9 +2775,7 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
ovs_assert(cls != NULL);
dpcls_remove(cls, &flow->cr);
cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
- if (flow->mark != INVALID_FLOW_MARK) {
- queue_netdev_flow_del(pmd, flow);
- }
+ queue_netdev_flow_del(pmd, flow);
flow->dead = true;
dp_netdev_flow_unref(flow);
@@ -3555,6 +3570,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
flow->dead = false;
flow->batch = NULL;
flow->mark = INVALID_FLOW_MARK;
+ flow->orig_in_port = orig_in_port;
*CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id;
*CONST_CAST(struct flow *, &flow->flow) = match->flow;
*CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid;
@@ -3584,7 +3600,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
dp_netdev_flow_hash(&flow->ufid));
queue_netdev_flow_put(pmd, flow, match, actions, actions_len,
- orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD);
+ DP_NETDEV_FLOW_OFFLOAD_OP_ADD);
if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) {
struct ds ds = DS_EMPTY_INITIALIZER;
@@ -3671,7 +3687,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
ovsrcu_set(&netdev_flow->actions, new_actions);
queue_netdev_flow_put(pmd, netdev_flow, match,
- put->actions, put->actions_len, ODPP_NONE,
+ put->actions, put->actions_len,
DP_NETDEV_FLOW_OFFLOAD_OP_MOD);
if (stats) {
@@ -4061,7 +4077,10 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
flow_hash_5tuple(execute->flow, 0));
}
- dp_packet_batch_init_packet(&pp, execute->packet);
+ /* Making a copy because the packet might be stolen during the execution
+ * and caller might still need it. */
+ struct dp_packet *packet_clone = dp_packet_clone(execute->packet);
+ dp_packet_batch_init_packet(&pp, packet_clone);
dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
execute->actions, execute->actions_len);
dp_netdev_pmd_flush_output_packets(pmd, true);
@@ -4071,6 +4090,24 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
dp_netdev_pmd_unref(pmd);
}
+ if (dp_packet_batch_size(&pp) == 1) {
+ /* Packet wasn't dropped during the execution. Swapping content with
+ * the original packet, because the caller might expect actions to
+ * modify it. Uisng the packet from a batch instead of 'packet_clone'
+ * because it maybe stolen and replaced by other packet, e.g. by
+ * the fragmentation engine. */
+ dp_packet_swap(execute->packet, pp.packets[0]);
+ dp_packet_delete_batch(&pp, true);
+ } else if (dp_packet_batch_size(&pp)) {
+ /* FIXME: We have more packets than expected. Likely, we got IP
+ * fragments of the reassembled packet. Dropping them here as we have
+ * no way to get them to the caller. It might be that all the required
+ * actions with them are already executed, but it also might not be a
+ * case, e.g. if dpif_netdev_execute() called to execute a single
+ * tunnel push. */
+ dp_packet_delete_batch(&pp, true);
+ }
+
return 0;
}
@@ -5033,23 +5070,28 @@ sched_numa_list_put_in_place(struct sched_numa_list *numa_list)
}
}
+/* Returns 'true' if OVS rxq scheduling algorithm assigned any unpinned rxq to
+ * a PMD thread core on a non-local numa node. */
static bool
sched_numa_list_cross_numa_polling(struct sched_numa_list *numa_list)
{
struct sched_numa *numa;
- /* For each numa */
HMAP_FOR_EACH (numa, node, &numa_list->numas) {
- /* For each pmd */
for (int i = 0; i < numa->n_pmds; i++) {
struct sched_pmd *sched_pmd;
sched_pmd = &numa->pmds[i];
- /* For each rxq. */
+ if (sched_pmd->isolated) {
+ /* All rxqs on this PMD thread core are pinned. */
+ continue;
+ }
for (unsigned k = 0; k < sched_pmd->n_rxq; k++) {
struct dp_netdev_rxq *rxq = sched_pmd->rxqs[k];
-
- if (!sched_pmd->isolated &&
+ /* Check if the rxq is not pinned to a specific PMD thread core
+ * by the user AND the PMD thread core that OVS assigned is
+ * non-local to the rxq port. */
+ if (rxq->core_id == OVS_CORE_UNSPEC &&
rxq->pmd->numa_id !=
netdev_get_numa_id(rxq->port->netdev)) {
return true;
@@ -5349,10 +5391,10 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list,
/* Find any numa with available PMDs. */
for (int j = 0; j < n_numa; j++) {
numa = sched_numa_list_next(numa_list, last_cross_numa);
+ last_cross_numa = numa;
if (sched_numa_noniso_pmd_count(numa)) {
break;
}
- last_cross_numa = numa;
numa = NULL;
}
}
@@ -6616,15 +6658,15 @@ static struct dp_netdev_pmd_thread *
dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id)
{
struct dp_netdev_pmd_thread *pmd;
- const struct cmap_node *pnode;
- pnode = cmap_find(&dp->poll_threads, hash_int(core_id, 0));
- if (!pnode) {
- return NULL;
+ CMAP_FOR_EACH_WITH_HASH (pmd, node, hash_int(core_id, 0),
+ &dp->poll_threads) {
+ if (pmd->core_id == core_id) {
+ return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL;
+ }
}
- pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node);
- return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL;
+ return NULL;
}
/* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */
@@ -8942,9 +8984,12 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
/* Get the preferred subtable search function for this (u0,u1) subtable.
* The function is guaranteed to always return a valid implementation, and
- * possibly an ISA optimized, and/or specialized implementation.
+ * possibly an ISA optimized, and/or specialized implementation. Initialize
+ * the subtable search function atomically to avoid garbage data being read
+ * by the PMD thread.
*/
- subtable->lookup_func = dpcls_subtable_get_best_impl(unit0, unit1);
+ atomic_init(&subtable->lookup_func,
+ dpcls_subtable_get_best_impl(unit0, unit1));
cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
/* Add the new subtable at the end of the pvector (with no hits yet) */
@@ -8973,6 +9018,10 @@ dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
/* Checks for the best available implementation for each subtable lookup
* function, and assigns it as the lookup function pointer for each subtable.
* Returns the number of subtables that have changed lookup implementation.
+ * This function requires holding a flow_mutex when called. This is to make
+ * sure modifications done by this function are not overwritten. This could
+ * happen if dpcls_sort_subtable_vector() is called at the same time as this
+ * function.
*/
static uint32_t
dpcls_subtable_lookup_reprobe(struct dpcls *cls)
@@ -8985,10 +9034,13 @@ dpcls_subtable_lookup_reprobe(struct dpcls *cls)
uint32_t u0_bits = subtable->mf_bits_set_unit0;
uint32_t u1_bits = subtable->mf_bits_set_unit1;
void *old_func = subtable->lookup_func;
- subtable->lookup_func = dpcls_subtable_get_best_impl(u0_bits, u1_bits);
+
+ /* Set the subtable lookup function atomically to avoid garbage data
+ * being read by the PMD thread. */
+ atomic_store_relaxed(&subtable->lookup_func,
+ dpcls_subtable_get_best_impl(u0_bits, u1_bits));
subtables_changed += (old_func != subtable->lookup_func);
}
- pvector_publish(pvec);
return subtables_changed;
}
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 34fc042373..5f4b60c5a6 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -84,6 +84,8 @@ enum { MAX_PORTS = USHRT_MAX };
#define EPOLLEXCLUSIVE (1u << 28)
#endif
+#define OVS_DP_F_UNSUPPORTED (1 << 31);
+
/* This PID is not used by the kernel datapath when using dispatch per CPU,
* but it is required to be set (not zero). */
#define DPIF_NETLINK_PER_CPU_PID UINT32_MAX
@@ -382,36 +384,62 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
dp_request.cmd = OVS_DP_CMD_SET;
}
- /* The Open vSwitch kernel module has two modes for dispatching upcalls:
- * per-vport and per-cpu.
- *
- * When dispatching upcalls per-vport, the kernel will
- * send the upcall via a Netlink socket that has been selected based on the
- * vport that received the packet that is causing the upcall.
- *
- * When dispatching upcall per-cpu, the kernel will send the upcall via
- * a Netlink socket that has been selected based on the cpu that received
- * the packet that is causing the upcall.
- *
- * First we test to see if the kernel module supports per-cpu dispatching
- * (the preferred method). If it does not support per-cpu dispatching, we
- * fall back to the per-vport dispatch mode.
+ /* Some older kernels will not reject unknown features. This will cause
+ * 'ovs-vswitchd' to incorrectly assume a feature is supported. In order to
+ * test for that, we attempt to set a feature that we know is not supported
+ * by any kernel. If this feature is not rejected, we can assume we are
+ * running on one of these older kernels.
*/
dp_request.user_features |= OVS_DP_F_UNALIGNED;
- dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS;
- dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
+ dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
+ dp_request.user_features |= OVS_DP_F_UNSUPPORTED;
error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
if (error) {
- dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
+ /* The Open vSwitch kernel module has two modes for dispatching
+ * upcalls: per-vport and per-cpu.
+ *
+ * When dispatching upcalls per-vport, the kernel will
+ * send the upcall via a Netlink socket that has been selected based on
+ * the vport that received the packet that is causing the upcall.
+ *
+ * When dispatching upcall per-cpu, the kernel will send the upcall via
+ * a Netlink socket that has been selected based on the cpu that
+ * received the packet that is causing the upcall.
+ *
+ * First we test to see if the kernel module supports per-cpu
+ * dispatching (the preferred method). If it does not support per-cpu
+ * dispatching, we fall back to the per-vport dispatch mode.
+ */
+ dp_request.user_features &= ~OVS_DP_F_UNSUPPORTED;
+ dp_request.user_features |= OVS_DP_F_UNALIGNED;
+ dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS;
+ dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
+ error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
+ if (error) {
+ dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
+ dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
+ error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
+ }
+ if (error) {
+ return error;
+ }
+
+ error = open_dpif(&dp, dpifp);
+ dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING);
+ } else {
+ VLOG_INFO("Kernel does not correctly support feature negotiation. "
+ "Using standard features.");
+ dp_request.cmd = OVS_DP_CMD_SET;
+ dp_request.user_features = 0;
+ dp_request.user_features |= OVS_DP_F_UNALIGNED;
dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
- }
- if (error) {
- return error;
+ if (error) {
+ return error;
+ }
+ error = open_dpif(&dp, dpifp);
}
- error = open_dpif(&dp, dpifp);
- dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING);
ofpbuf_delete(buf);
if (create) {
diff --git a/lib/flow.c b/lib/flow.c
index 89837de95d..a021bc0eba 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1006,14 +1006,18 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
const struct tcp_header *tcp = data;
-
- miniflow_push_be32(mf, arp_tha.ea[2], 0);
- miniflow_push_be32(mf, tcp_flags,
- TCP_FLAGS_BE32(tcp->tcp_ctl));
- miniflow_push_be16(mf, tp_src, tcp->tcp_src);
- miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
- miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
- miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+ size_t tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
+
+ if (OVS_LIKELY(tcp_hdr_len >= TCP_HEADER_LEN)
+ && OVS_LIKELY(size >= tcp_hdr_len)) {
+ miniflow_push_be32(mf, arp_tha.ea[2], 0);
+ miniflow_push_be32(mf, tcp_flags,
+ TCP_FLAGS_BE32(tcp->tcp_ctl));
+ miniflow_push_be16(mf, tp_src, tcp->tcp_src);
+ miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
+ miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
+ miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+ }
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
diff --git a/lib/hindex.h b/lib/hindex.h
index 876c5a9e39..f7a30d511a 100644
--- a/lib/hindex.h
+++ b/lib/hindex.h
@@ -128,18 +128,22 @@ void hindex_remove(struct hindex *, struct hindex_node *);
* Evaluates HASH only once.
*/
#define HINDEX_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HINDEX) \
- for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \
- NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \
- ASSIGN_CONTAINER(NODE, (NODE)->MEMBER.s, MEMBER))
+ for (INIT_MULTIVAR(NODE, MEMBER, hindex_node_with_hash(HINDEX, HASH), \
+ struct hindex_node); \
+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \
+ UPDATE_MULTIVAR(NODE, ITER_VAR(NODE)->s))
/* Safe when NODE may be freed (not needed when NODE may be removed from the
* hash map but its members remain accessible and intact). */
-#define HINDEX_FOR_EACH_WITH_HASH_SAFE(NODE, NEXT, MEMBER, HASH, HINDEX) \
- for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \
- ? INIT_CONTAINER(NEXT, (NODE)->MEMBER.s, MEMBER), 1 \
- : 0); \
- (NODE) = (NEXT))
+#define HINDEX_FOR_EACH_WITH_HASH_SAFE(NODE, NEXT, MEMBER, HASH, HINDEX) \
+ for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \
+ hindex_node_with_hash(HINDEX, HASH), \
+ struct hindex_node); \
+ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \
+ ITER_VAR(NODE) != NULL, \
+ ITER_VAR(NEXT) = ITER_VAR(NODE)->s, \
+ ITER_VAR(NEXT) != NULL); \
+ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT))
/* Returns the head node in 'hindex' with the given 'hash', or a null pointer
* if no nodes have that hash value. */
@@ -157,19 +161,22 @@ hindex_node_with_hash(const struct hindex *hindex, size_t hash)
/* Iteration. */
/* Iterates through every node in HINDEX. */
-#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \
- for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \
- NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \
- ASSIGN_CONTAINER(NODE, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER))
+#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \
+ for (INIT_MULTIVAR(NODE, MEMBER, hindex_first(HINDEX), \
+ struct hindex_node); \
+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \
+ UPDATE_MULTIVAR(NODE, hindex_next(HINDEX, ITER_VAR(NODE))))
/* Safe when NODE may be freed (not needed when NODE may be removed from the
* hash index but its members remain accessible and intact). */
-#define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \
- for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \
- ? INIT_CONTAINER(NEXT, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER), 1 \
- : 0); \
- (NODE) = (NEXT))
+#define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \
+ for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, hindex_first(HINDEX), \
+ struct hindex_node); \
+ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \
+ ITER_VAR(NODE) != NULL, \
+ ITER_VAR(NEXT) = hindex_next(HINDEX, ITER_VAR(NODE)), \
+ ITER_VAR(NEXT) != NULL); \
+ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT))
struct hindex_node *hindex_first(const struct hindex *);
struct hindex_node *hindex_next(const struct hindex *,
diff --git a/lib/ipf.c b/lib/ipf.c
index d9f781147a..507db2aea2 100644
--- a/lib/ipf.c
+++ b/lib/ipf.c
@@ -943,6 +943,8 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb,
ovs_mutex_lock(&ipf->ipf_lock);
if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) {
dp_packet_batch_refill(pb, pkt, pb_idx);
+ } else {
+ dp_packet_delete(pkt);
}
ovs_mutex_unlock(&ipf->ipf_lock);
} else {
@@ -1152,52 +1154,56 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
* NETDEV_MAX_BURST. */
DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) {
if (rp && pkt == rp->list->reass_execute_ctx) {
+ const struct ipf_frag *frag_0 = &rp->list->frag_list[0];
+ void *l4_frag = dp_packet_l4(frag_0->pkt);
+ void *l4_reass = dp_packet_l4(pkt);
+ memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt));
+
for (int i = 0; i <= rp->list->last_inuse_idx; i++) {
- rp->list->frag_list[i].pkt->md.ct_label = pkt->md.ct_label;
- rp->list->frag_list[i].pkt->md.ct_mark = pkt->md.ct_mark;
- rp->list->frag_list[i].pkt->md.ct_state = pkt->md.ct_state;
- rp->list->frag_list[i].pkt->md.ct_zone = pkt->md.ct_zone;
- rp->list->frag_list[i].pkt->md.ct_orig_tuple_ipv6 =
+ const struct ipf_frag *frag_i = &rp->list->frag_list[i];
+
+ frag_i->pkt->md.ct_label = pkt->md.ct_label;
+ frag_i->pkt->md.ct_mark = pkt->md.ct_mark;
+ frag_i->pkt->md.ct_state = pkt->md.ct_state;
+ frag_i->pkt->md.ct_zone = pkt->md.ct_zone;
+ frag_i->pkt->md.ct_orig_tuple_ipv6 =
pkt->md.ct_orig_tuple_ipv6;
if (pkt->md.ct_orig_tuple_ipv6) {
- rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv6 =
+ frag_i->pkt->md.ct_orig_tuple.ipv6 =
pkt->md.ct_orig_tuple.ipv6;
} else {
- rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv4 =
+ frag_i->pkt->md.ct_orig_tuple.ipv4 =
pkt->md.ct_orig_tuple.ipv4;
}
- }
-
- const struct ipf_frag *frag_0 = &rp->list->frag_list[0];
- void *l4_frag = dp_packet_l4(frag_0->pkt);
- void *l4_reass = dp_packet_l4(pkt);
- memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt));
-
- if (v6) {
- struct ovs_16aligned_ip6_hdr *l3_frag
- = dp_packet_l3(frag_0->pkt);
- struct ovs_16aligned_ip6_hdr *l3_reass = dp_packet_l3(pkt);
- l3_frag->ip6_src = l3_reass->ip6_src;
- l3_frag->ip6_dst = l3_reass->ip6_dst;
- } else {
- struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt);
- struct ip_header *l3_reass = dp_packet_l3(pkt);
- if (!dp_packet_hwol_is_ipv4(frag_0->pkt)) {
- ovs_be32 reass_ip =
- get_16aligned_be32(&l3_reass->ip_src);
- ovs_be32 frag_ip =
- get_16aligned_be32(&l3_frag->ip_src);
-
- l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
- frag_ip, reass_ip);
- reass_ip = get_16aligned_be32(&l3_reass->ip_dst);
- frag_ip = get_16aligned_be32(&l3_frag->ip_dst);
- l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
- frag_ip, reass_ip);
+ if (v6) {
+ struct ovs_16aligned_ip6_hdr *l3_frag
+ = dp_packet_l3(frag_i->pkt);
+ struct ovs_16aligned_ip6_hdr *l3_reass
+ = dp_packet_l3(pkt);
+ l3_frag->ip6_src = l3_reass->ip6_src;
+ l3_frag->ip6_dst = l3_reass->ip6_dst;
+ } else {
+ struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
+ struct ip_header *l3_reass = dp_packet_l3(pkt);
+ if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) {
+ ovs_be32 reass_ip =
+ get_16aligned_be32(&l3_reass->ip_src);
+ ovs_be32 frag_ip =
+ get_16aligned_be32(&l3_frag->ip_src);
+
+ l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
+ frag_ip,
+ reass_ip);
+ reass_ip = get_16aligned_be32(&l3_reass->ip_dst);
+ frag_ip = get_16aligned_be32(&l3_frag->ip_dst);
+ l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
+ frag_ip,
+ reass_ip);
+ }
+
+ l3_frag->ip_src = l3_reass->ip_src;
+ l3_frag->ip_dst = l3_reass->ip_dst;
}
-
- l3_frag->ip_src = l3_reass->ip_src;
- l3_frag->ip_dst = l3_reass->ip_dst;
}
ipf_completed_list_add(&ipf->frag_complete_list, rp->list);
diff --git a/lib/json.c b/lib/json.c
index 32d25003b8..0baf7c622c 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -146,6 +146,7 @@ json_type_to_string(enum json_type type)
case JSON_STRING:
return "string";
+ case JSON_SERIALIZED_OBJECT:
case JSON_N_TYPES:
default:
return "<invalid>";
@@ -180,6 +181,14 @@ json_string_create(const char *s)
return json_string_create_nocopy(xstrdup(s));
}
+struct json *
+json_serialized_object_create(const struct json *src)
+{
+ struct json *json = json_create(JSON_SERIALIZED_OBJECT);
+ json->string = json_to_string(src, JSSF_SORT);
+ return json;
+}
+
struct json *
json_array_create_empty(void)
{
@@ -309,6 +318,13 @@ json_string(const struct json *json)
return json->string;
}
+const char *
+json_serialized_object(const struct json *json)
+{
+ ovs_assert(json->type == JSON_SERIALIZED_OBJECT);
+ return json->string;
+}
+
struct json_array *
json_array(const struct json *json)
{
@@ -362,6 +378,7 @@ json_destroy(struct json *json)
break;
case JSON_STRING:
+ case JSON_SERIALIZED_OBJECT:
free(json->string);
break;
@@ -422,6 +439,9 @@ json_deep_clone(const struct json *json)
case JSON_STRING:
return json_string_create(json->string);
+ case JSON_SERIALIZED_OBJECT:
+ return json_serialized_object_create(json);
+
case JSON_NULL:
case JSON_FALSE:
case JSON_TRUE:
@@ -521,6 +541,7 @@ json_hash(const struct json *json, size_t basis)
return json_hash_array(&json->array, basis);
case JSON_STRING:
+ case JSON_SERIALIZED_OBJECT:
return hash_string(json->string, basis);
case JSON_NULL:
@@ -596,6 +617,7 @@ json_equal(const struct json *a, const struct json *b)
return json_equal_array(&a->array, &b->array);
case JSON_STRING:
+ case JSON_SERIALIZED_OBJECT:
return !strcmp(a->string, b->string);
case JSON_NULL:
@@ -1072,6 +1094,14 @@ json_from_string(const char *string)
return json_parser_finish(p);
}
+/* Parses data of JSON_SERIALIZED_OBJECT to the real JSON. */
+struct json *
+json_from_serialized_object(const struct json *json)
+{
+ ovs_assert(json->type == JSON_SERIALIZED_OBJECT);
+ return json_from_string(json->string);
+}
+
/* Reads the file named 'file_name', parses its contents as a JSON object or
* array, and returns a newly allocated 'struct json'. The caller must free
* the returned structure with json_destroy() when it is no longer needed.
@@ -1563,6 +1593,10 @@ json_serialize(const struct json *json, struct json_serializer *s)
json_serialize_string(json->string, ds);
break;
+ case JSON_SERIALIZED_OBJECT:
+ ds_put_cstr(ds, json->string);
+ break;
+
case JSON_N_TYPES:
default:
OVS_NOT_REACHED();
@@ -1696,14 +1730,30 @@ json_serialize_string(const char *string, struct ds *ds)
{
uint8_t c;
uint8_t c2;
+ size_t count;
const char *escape;
+ const char *start;
ds_put_char(ds, '"');
+ count = 0;
+ start = string;
while ((c = *string++) != '\0') {
- escape = chars_escaping[c];
- while ((c2 = *escape++) != '\0') {
- ds_put_char(ds, c2);
+ if (c >= ' ' && c != '"' && c != '\\') {
+ count++;
+ } else {
+ if (count) {
+ ds_put_buffer(ds, start, count);
+ count = 0;
+ }
+ start = string;
+ escape = chars_escaping[c];
+ while ((c2 = *escape++) != '\0') {
+ ds_put_char(ds, c2);
+ }
}
}
+ if (count) {
+ ds_put_buffer(ds, start, count);
+ }
ds_put_char(ds, '"');
}
diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c
index 18afbab9a7..dfeb2a8002 100644
--- a/lib/lldp/lldp.c
+++ b/lib/lldp/lldp.c
@@ -146,7 +146,9 @@ static void
lldp_tlv_end(struct dp_packet *p, unsigned int start)
{
ovs_be16 *tlv = dp_packet_at_assert(p, start, 2);
- *tlv |= htons((dp_packet_size(p) - (start + 2)) & 0x1ff);
+ put_unaligned_be16(tlv,
+ get_unaligned_be16(tlv)
+ | htons((dp_packet_size(p) - (start + 2)) & 0x1ff));
}
int
diff --git a/lib/lldp/lldpd.c b/lib/lldp/lldpd.c
index a024dc5e58..ee1051dde7 100644
--- a/lib/lldp/lldpd.c
+++ b/lib/lldp/lldpd.c
@@ -140,13 +140,9 @@ lldpd_cleanup(struct lldpd *cfg)
VLOG_DBG("cleanup all ports");
LIST_FOR_EACH_SAFE (hw, hw_next, h_entries, &cfg->g_hardware) {
- if (!hw->h_flags) {
- ovs_list_remove(&hw->h_entries);
- lldpd_remote_cleanup(hw, NULL, true);
- lldpd_hardware_cleanup(cfg, hw);
- } else {
- lldpd_remote_cleanup(hw, NULL, false);
- }
+ ovs_list_remove(&hw->h_entries);
+ lldpd_remote_cleanup(hw, NULL, true);
+ lldpd_hardware_cleanup(cfg, hw);
}
VLOG_DBG("cleanup all chassis");
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index c808d205d5..e03cd8d0c5 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -1788,6 +1788,19 @@ mf_is_tun_metadata(const struct mf_field *mf)
mf->id < MFF_TUN_METADATA0 + TUN_METADATA_NUM_OPTS;
}
+bool
+mf_is_frozen_metadata(const struct mf_field *mf)
+{
+ if (mf->id >= MFF_TUN_ID && mf->id <= MFF_IN_PORT_OXM) {
+ return true;
+ }
+
+ if (mf->id >= MFF_REG0 && mf->id < MFF_ETH_SRC) {
+ return true;
+ }
+ return false;
+}
+
bool
mf_is_pipeline_field(const struct mf_field *mf)
{
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 45a96b9be2..738fb44b3c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -961,14 +961,6 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
rte_eth_dev_info_get(dev->port_id, &info);
- /* As of DPDK 19.11, it is not allowed to set a mq_mode for
- * virtio PMD driver. */
- if (!strcmp(info.driver_name, "net_virtio")) {
- conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
- } else {
- conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
- }
-
/* As of DPDK 17.11.1 a few PMDs require to explicitly enable
* scatter to support jumbo RX.
* Setting scatter for the device is done after checking for
@@ -1000,6 +992,11 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
/* Limit configured rss hash functions to only those supported
* by the eth device. */
conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
+ if (conf.rx_adv_conf.rss_conf.rss_hf == 0) {
+ conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+ } else {
+ conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+ }
/* A device may report more queues than it makes available (this has
* been observed for Intel xl710, which reserves some of them for
@@ -2867,6 +2864,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
bool concurrent_txq)
{
if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
+ rte_spinlock_lock(&dev->stats_lock);
+ dev->stats.tx_dropped += dp_packet_batch_size(batch);
+ rte_spinlock_unlock(&dev->stats_lock);
dp_packet_delete_batch(batch, true);
return;
}
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 60dd138914..94c9737110 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -627,6 +627,7 @@ netdev_linux_notify_sock(void)
if (!error) {
size_t i;
+ nl_sock_listen_all_nsid(sock, true);
for (i = 0; i < ARRAY_SIZE(mcgroups); i++) {
error = nl_sock_join_mcgroup(sock, mcgroups[i]);
if (error) {
@@ -636,7 +637,6 @@ netdev_linux_notify_sock(void)
}
}
}
- nl_sock_listen_all_nsid(sock, true);
ovsthread_once_done(&once);
}
@@ -6285,7 +6285,14 @@ get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats)
if (ofpbuf_try_pull(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg))) {
const struct nlattr *a = nl_attr_find(reply, 0, IFLA_STATS64);
if (a && nl_attr_get_size(a) >= sizeof(struct rtnl_link_stats64)) {
- netdev_stats_from_rtnl_link_stats64(stats, nl_attr_get(a));
+ const struct rtnl_link_stats64 *lstats = nl_attr_get(a);
+ struct rtnl_link_stats64 aligned_lstats;
+
+ if (!IS_PTR_ALIGNED(lstats)) {
+ memcpy(&aligned_lstats, lstats, sizeof aligned_lstats);
+ lstats = &aligned_lstats;
+ }
+ netdev_stats_from_rtnl_link_stats64(stats, lstats);
error = 0;
} else {
a = nl_attr_find(reply, 0, IFLA_STATS);
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 9845e8d3fe..e1568e38a0 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -481,10 +481,10 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump)
static void
parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf,
- struct tc_flower *flower)
+ struct tc_action *action)
{
- char *mask = (char *) &flower->rewrite.mask;
- char *data = (char *) &flower->rewrite.key;
+ char *mask = (char *) &action->rewrite.mask;
+ char *data = (char *) &action->rewrite.key;
for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) {
char *put = NULL;
@@ -585,8 +585,10 @@ parse_tc_flower_to_stats(struct tc_flower *flower,
}
memset(stats, 0, sizeof *stats);
- stats->n_packets = get_32aligned_u64(&flower->stats.n_packets);
- stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes);
+ stats->n_packets = get_32aligned_u64(&flower->stats_sw.n_packets);
+ stats->n_packets += get_32aligned_u64(&flower->stats_hw.n_packets);
+ stats->n_bytes = get_32aligned_u64(&flower->stats_sw.n_bytes);
+ stats->n_bytes += get_32aligned_u64(&flower->stats_hw.n_bytes);
stats->used = flower->lastused;
}
@@ -877,7 +879,7 @@ parse_tc_flower_to_match(struct tc_flower *flower,
}
break;
case TC_ACT_PEDIT: {
- parse_flower_rewrite_to_netlink_action(buf, flower);
+ parse_flower_rewrite_to_netlink_action(buf, action);
}
break;
case TC_ACT_ENCAP: {
@@ -1222,8 +1224,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower,
uint64_t set_stub[1024 / 8];
struct ofpbuf set_buf = OFPBUF_STUB_INITIALIZER(set_stub);
char *set_data, *set_mask;
- char *key = (char *) &flower->rewrite.key;
- char *mask = (char *) &flower->rewrite.mask;
+ char *key = (char *) &action->rewrite.key;
+ char *mask = (char *) &action->rewrite.mask;
const struct nlattr *attr;
int i, j, type;
size_t size;
@@ -1265,14 +1267,6 @@ parse_put_flow_set_masked_action(struct tc_flower *flower,
}
}
- if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) {
- if (flower->rewrite.rewrite == false) {
- flower->rewrite.rewrite = true;
- action->type = TC_ACT_PEDIT;
- flower->action_count++;
- }
- }
-
if (hasmask && !is_all_zeros(set_mask, size)) {
VLOG_DBG_RL(&rl, "unsupported sub attribute of set action type %d",
type);
@@ -1281,6 +1275,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower,
}
ofpbuf_uninit(&set_buf);
+ action->type = TC_ACT_PEDIT;
+ flower->action_count++;
return 0;
}
@@ -1541,6 +1537,12 @@ parse_match_ct_state_to_flower(struct tc_flower *flower, struct match *match)
flower->key.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW);
flower->mask.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW);
}
+
+ if (flower->key.ct_state &&
+ !(flower->key.ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) {
+ flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
+ }
}
if (mask->ct_zone) {
@@ -1841,7 +1843,25 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
return ENODEV;
}
+
+ if (!netdev_flow_api_equals(netdev, outdev)) {
+ VLOG_DBG_RL(&rl,
+ "Flow API provider mismatch between ingress (%s) "
+ "and egress (%s) ports",
+ netdev_get_name(netdev), netdev_get_name(outdev));
+ netdev_close(outdev);
+ return EOPNOTSUPP;
+ }
+
action->out.ifindex_out = netdev_get_ifindex(outdev);
+ if (action->out.ifindex_out < 0) {
+ VLOG_DBG_RL(&rl,
+ "Can't find ifindex for output port %s, error %d",
+ netdev_get_name(outdev), action->out.ifindex_out);
+ netdev_close(outdev);
+ return -action->out.ifindex_out;
+ }
+
action->out.ingress = is_internal_port(netdev_get_type(outdev));
action->type = TC_ACT_OUTPUT;
flower.action_count++;
@@ -2015,9 +2035,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
if (stats) {
memset(stats, 0, sizeof *stats);
if (!tc_get_flower(&id, &flower)) {
- stats->n_packets = get_32aligned_u64(&flower.stats.n_packets);
- stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes);
- stats->used = flower.lastused;
+ parse_tc_flower_to_stats(&flower, stats);
}
}
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 7729a90608..ce3b853e0f 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -2941,7 +2941,7 @@ odp_nsh_key_from_attr__(const struct nlattr *attr, bool is_mask,
const struct ovs_nsh_key_md1 *md1 = nl_attr_get(a);
has_md1 = true;
memcpy(nsh->context, md1->context, sizeof md1->context);
- if (len == 2 * sizeof(*md1)) {
+ if (nsh_mask && (len == 2 * sizeof *md1)) {
const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
memcpy(nsh_mask->context, md1_mask->context,
sizeof(*md1_mask));
@@ -3212,7 +3212,7 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
opts.flags = tun_key->gtpu_flags;
opts.msgtype = tun_key->gtpu_msgtype;
- nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GTPU_OPTS,
&opts, sizeof(opts));
}
nl_msg_end_nested(a, tun_key_ofs);
@@ -3400,16 +3400,16 @@ format_eth(struct ds *ds, const char *name, const struct eth_addr key,
static void
format_be64(struct ds *ds, const char *name, ovs_be64 key,
- const ovs_be64 *mask, bool verbose)
+ const ovs_32aligned_be64 *mask_, bool verbose)
{
- bool mask_empty = mask && !*mask;
+ ovs_be64 mask = mask_ ? get_32aligned_be64(mask_) : htonll(0);
- if (verbose || !mask_empty) {
- bool mask_full = !mask || *mask == OVS_BE64_MAX;
+ if (verbose || mask) {
+ bool mask_full = !mask_ || mask == OVS_BE64_MAX;
ds_put_format(ds, "%s=0x%"PRIx64, name, ntohll(key));
if (!mask_full) { /* Partially masked. */
- ds_put_format(ds, "/%#"PRIx64, ntohll(*mask));
+ ds_put_format(ds, "/%#"PRIx64, ntohll(mask));
}
ds_put_char(ds, ',');
}
@@ -4601,6 +4601,11 @@ odp_flow_format(const struct nlattr *key, size_t key_len,
ds_put_char(ds, ',');
}
ds_put_cstr(ds, "eth()");
+ } else if (attr_type == OVS_KEY_ATTR_PACKET_TYPE && is_wildcard) {
+ /* See the above help text, however in the case where the
+ * packet type is not shown, we still need to display the
+ * eth() header if the packets type is wildcarded. */
+ has_packet_type_key = false;
}
ofpbuf_clear(&ofp);
}
@@ -4618,7 +4623,7 @@ odp_flow_format(const struct nlattr *key, size_t key_len,
}
ds_put_char(ds, ')');
}
- if (!has_ethtype_key) {
+ if (!has_ethtype_key && mask) {
const struct nlattr *ma = nl_attr_find__(mask, mask_len,
OVS_KEY_ATTR_ETHERTYPE);
if (ma) {
@@ -7132,11 +7137,6 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
}
}
}
- } else if (src_flow->nw_proto == IPPROTO_IGMP
- && src_flow->dl_type == htons(ETH_TYPE_IP)) {
- /* OVS userspace parses the IGMP type, code, and group, but its
- * datapaths do not, so there is always missing information. */
- return ODP_FIT_TOO_LITTLE;
}
if (is_mask && expected_bit != OVS_KEY_ATTR_UNSPEC) {
if ((flow->tp_src || flow->tp_dst) && flow->nw_proto != 0xff) {
@@ -7230,6 +7230,14 @@ parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1],
}
expected_attrs = 0;
+ /* For OVS to be backward compatible with newer datapath
+ * implementations, we should ignore out of range attributes. */
+ if (out_of_range_attr) {
+ VLOG_DBG("Flow key decode found unknown OVS_KEY_ATTR, %d",
+ out_of_range_attr);
+ out_of_range_attr = 0;
+ }
+
if (!parse_ethertype(attrs, present_attrs, &expected_attrs,
flow, src_flow, errorp)) {
return ODP_FIT_ERROR;
@@ -7279,6 +7287,14 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
}
expected_attrs = 0;
+ /* For OVS to be backward compatible with newer datapath implementations,
+ * we should ignore out of range attributes. */
+ if (out_of_range_attr) {
+ VLOG_DBG("Flow key decode found unknown OVS_KEY_ATTR, %d",
+ out_of_range_attr);
+ out_of_range_attr = 0;
+ }
+
/* Metadata. */
if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID)) {
flow->recirc_id = nl_attr_get_u32(attrs[OVS_KEY_ATTR_RECIRC_ID]);
@@ -7513,10 +7529,12 @@ parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len,
fitness = odp_flow_key_to_flow(key, key_len, &match->flow, NULL);
if (fitness) {
- /* This should not happen: it indicates that
- * odp_flow_key_from_flow() and odp_flow_key_to_flow() disagree on
- * the acceptable form of a flow. Log the problem as an error,
- * with enough details to enable debugging. */
+ /* This will happen when the odp_flow_key_to_flow() function can't
+ * parse the netlink message to a match structure. It will return
+ * ODP_FIT_TOO_LITTLE if there is not enough information to parse the
+ * content successfully, ODP_FIT_TOO_MUCH if there is too much netlink
+ * data and we do not know how to safely ignore it, and ODP_FIT_ERROR
+ * in any other case. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
if (!VLOG_DROP_ERR(&rl)) {
@@ -7524,7 +7542,8 @@ parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len,
ds_init(&s);
odp_flow_format(key, key_len, NULL, 0, NULL, &s, true);
- VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s));
+ VLOG_ERR("internal error parsing flow key %s (%s)",
+ ds_cstr(&s), odp_key_fitness_to_string(fitness));
ds_destroy(&s);
}
@@ -7534,10 +7553,7 @@ parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len,
fitness = odp_flow_key_to_mask(mask, mask_len, &match->wc, &match->flow,
NULL);
if (fitness) {
- /* This should not happen: it indicates that
- * odp_flow_key_from_mask() and odp_flow_key_to_mask()
- * disagree on the acceptable form of a mask. Log the problem
- * as an error, with enough details to enable debugging. */
+ /* This should not happen, see comment above. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
if (!VLOG_DROP_ERR(&rl)) {
diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c
index ecf914eac1..7ea4b6ed56 100644
--- a/lib/ofp-actions.c
+++ b/lib/ofp-actions.c
@@ -853,7 +853,9 @@ decode_NXAST_RAW_CONTROLLER2(const struct ext_action_header *eah,
case NXAC2PT_REASON: {
uint8_t u8;
error = ofpprop_parse_u8(&payload, &u8);
- oc->reason = u8;
+ if (!error) {
+ oc->reason = u8;
+ }
break;
}
diff --git a/lib/ofp-flow.c b/lib/ofp-flow.c
index ff0396845a..3bc744f78f 100644
--- a/lib/ofp-flow.c
+++ b/lib/ofp-flow.c
@@ -1254,7 +1254,16 @@ ofputil_append_flow_stats_reply(const struct ofputil_flow_stats *fs,
OVS_NOT_REACHED();
}
- ofpmp_postappend(replies, start_ofs);
+ if ((reply->size - start_ofs) > (UINT16_MAX - ofpbuf_headersize(reply))) {
+ /* When this happens, the reply will not fit in a single OFP message,
+ * and we should not append it to the queue. We will log a warning
+ * and continue with the next flow stat entry. */
+ reply->size = start_ofs;
+ VLOG_WARN_RL(&rl, "Flow exceeded the maximum flow statistics reply "
+ "size and was excluded from the response set");
+ } else {
+ ofpmp_postappend(replies, start_ofs);
+ }
fs_->match.flow.tunnel.metadata.tab = orig_tun_table;
}
diff --git a/lib/ofp-packet.c b/lib/ofp-packet.c
index 4579548ee1..9485ddfc93 100644
--- a/lib/ofp-packet.c
+++ b/lib/ofp-packet.c
@@ -133,7 +133,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose,
case NXPINT_FULL_LEN: {
uint32_t u32;
error = ofpprop_parse_u32(&payload, &u32);
- *total_len = u32;
+ if (!error) {
+ *total_len = u32;
+ }
break;
}
@@ -152,7 +154,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose,
case NXPINT_REASON: {
uint8_t reason;
error = ofpprop_parse_u8(&payload, &reason);
- pin->reason = reason;
+ if (!error) {
+ pin->reason = reason;
+ }
break;
}
@@ -883,7 +887,9 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose,
case NXCPT_ODP_PORT: {
uint32_t value;
error = ofpprop_parse_u32(&payload, &value);
- pin->odp_port = u32_to_odp(value);
+ if (!error) {
+ pin->odp_port = u32_to_odp(value);
+ }
break;
}
diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
index 4edb3c114a..05c0b5711d 100644
--- a/lib/ofpbuf.c
+++ b/lib/ofpbuf.c
@@ -422,6 +422,10 @@ void
ofpbuf_reserve(struct ofpbuf *b, size_t size)
{
ovs_assert(!b->size);
+
+ if (!size) {
+ return;
+ }
ofpbuf_prealloc_tailroom(b, size);
b->data = (char*)b->data + size;
}
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index ecc251a7ff..83bd10cca5 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -68,9 +68,9 @@ void ovs_numa_dump_destroy(struct ovs_numa_dump *);
int ovs_numa_thread_setaffinity_core(unsigned core_id);
#define FOR_EACH_CORE_ON_DUMP(ITER, DUMP) \
- HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->cores)
+ HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->cores)
#define FOR_EACH_NUMA_ON_DUMP(ITER, DUMP) \
- HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->numas)
+ HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->numas)
#endif /* ovs-numa.h */
diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
index 1866bd3088..946aa04d18 100644
--- a/lib/ovs-rcu.c
+++ b/lib/ovs-rcu.c
@@ -444,3 +444,40 @@ ovsrcu_init_module(void)
ovsthread_once_done(&once);
}
}
+
+static void
+ovsrcu_barrier_func(void *seq_)
+{
+ struct seq *seq = (struct seq *) seq_;
+ seq_change(seq);
+}
+
+/* Similar to the kernel rcu_barrier, ovsrcu_barrier waits for all outstanding
+ * RCU callbacks to complete. However, unlike the kernel rcu_barrier, which
+ * might return immediately if there are no outstanding RCU callbacks,
+ * this API will at least wait for a grace period.
+ *
+ * Another issue the caller might need to know is that the barrier is just
+ * for "one-shot", i.e. if inside some RCU callbacks, another RCU callback is
+ * registered, this API only guarantees the first round of RCU callbacks have
+ * been executed after it returns.
+ */
+void
+ovsrcu_barrier(void)
+{
+ struct seq *seq = seq_create();
+ /* First let all threads flush their cbsets. */
+ ovsrcu_synchronize();
+
+ /* Then register a new cbset, ensure this cbset
+ * is at the tail of the global list. */
+ uint64_t seqno = seq_read(seq);
+ ovsrcu_postpone__(ovsrcu_barrier_func, (void *) seq);
+
+ do {
+ seq_wait(seq, seqno);
+ poll_block();
+ } while (seqno == seq_read(seq));
+
+ seq_destroy(seq);
+}
diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h
index ecc4c92010..8b397b7fb0 100644
--- a/lib/ovs-rcu.h
+++ b/lib/ovs-rcu.h
@@ -155,6 +155,19 @@
* port_delete(id);
* }
*
+ * Use ovsrcu_barrier() to wait for all the outstanding RCU callbacks to
+ * finish. This is useful when you have to destroy some resources however
+ * these resources are referenced in the outstanding RCU callbacks.
+ *
+ * void rcu_cb(void *A) {
+ * do_something(A);
+ * }
+ *
+ * void destroy_A() {
+ * ovsrcu_postpone(rcu_cb, A); // will use A later
+ * ovsrcu_barrier(); // wait for rcu_cb done
+ * do_destroy_A(); // free A
+ * }
*/
#include "compiler.h"
@@ -310,4 +323,6 @@ void ovsrcu_synchronize(void);
void ovsrcu_exit(void);
+void ovsrcu_barrier(void);
+
#endif /* ovs-rcu.h */
diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c
index 659d49dbf7..dead31275d 100644
--- a/lib/ovsdb-cs.c
+++ b/lib/ovsdb-cs.c
@@ -1109,6 +1109,23 @@ ovsdb_cs_db_sync_condition(struct ovsdb_cs_db *db)
}
table->req_cond = NULL;
db->cond_changed = true;
+
+ /* There are two cases:
+ * a. either the server already processed the requested monitor
+ * condition change but the FSM was restarted before the
+ * client was notified. In this case the client should
+ * clear its local cache because it's out of sync with the
+ * monitor view on the server side.
+ *
+ * b. OR the server hasn't processed the requested monitor
+ * condition change yet.
+ *
+ * As there's no easy way to differentiate between the two,
+ * and given that this condition should be rare, reset the
+ * 'last_id', essentially flushing the local cached DB
+ * contents.
+ */
+ db->last_id = UUID_ZERO;
}
}
}
@@ -1539,12 +1556,11 @@ ovsdb_cs_db_parse_monitor_reply(struct ovsdb_cs_db *db,
const struct json *table_updates;
bool clear;
if (version == 3) {
- struct uuid last_id;
if (result->type != JSON_ARRAY || result->array.n != 3
|| (result->array.elems[0]->type != JSON_TRUE &&
result->array.elems[0]->type != JSON_FALSE)
|| result->array.elems[1]->type != JSON_STRING
- || !uuid_from_string(&last_id,
+ || !uuid_from_string(&db->last_id,
json_string(result->array.elems[1]))) {
struct ovsdb_error *error = ovsdb_syntax_error(
result, NULL, "bad monitor_cond_since reply format");
@@ -1833,7 +1849,7 @@ server_column_get_string(const struct server_row *row,
{
ovs_assert(server_columns[index].type.key.type == OVSDB_TYPE_STRING);
const struct ovsdb_datum *d = &row->data[index];
- return d->n == 1 ? d->keys[0].string : default_value;
+ return d->n == 1 ? d->keys[0].s->string : default_value;
}
static bool
diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index c145f5ad97..6654ed6deb 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -74,7 +74,7 @@ ovsdb_atom_init_default(union ovsdb_atom *atom, enum ovsdb_atomic_type type)
break;
case OVSDB_TYPE_STRING:
- atom->string = xmemdup("", 1);
+ atom->s = ovsdb_atom_string_create_nocopy(xmemdup("", 1));
break;
case OVSDB_TYPE_UUID:
@@ -136,7 +136,7 @@ ovsdb_atom_is_default(const union ovsdb_atom *atom,
return atom->boolean == false;
case OVSDB_TYPE_STRING:
- return atom->string[0] == '\0';
+ return atom->s->string[0] == '\0';
case OVSDB_TYPE_UUID:
return uuid_is_zero(&atom->uuid);
@@ -172,7 +172,8 @@ ovsdb_atom_clone(union ovsdb_atom *new, const union ovsdb_atom *old,
break;
case OVSDB_TYPE_STRING:
- new->string = xstrdup(old->string);
+ new->s = old->s;
+ new->s->n_refs++;
break;
case OVSDB_TYPE_UUID:
@@ -214,7 +215,7 @@ ovsdb_atom_hash(const union ovsdb_atom *atom, enum ovsdb_atomic_type type,
return hash_boolean(atom->boolean, basis);
case OVSDB_TYPE_STRING:
- return hash_string(atom->string, basis);
+ return hash_string(atom->s->string, basis);
case OVSDB_TYPE_UUID:
return hash_int(uuid_hash(&atom->uuid), basis);
@@ -246,7 +247,7 @@ ovsdb_atom_compare_3way(const union ovsdb_atom *a,
return a->boolean - b->boolean;
case OVSDB_TYPE_STRING:
- return strcmp(a->string, b->string);
+ return a->s == b->s ? 0 : strcmp(a->s->string, b->s->string);
case OVSDB_TYPE_UUID:
return uuid_compare_3way(&a->uuid, &b->uuid);
@@ -404,7 +405,7 @@ ovsdb_atom_from_json__(union ovsdb_atom *atom,
case OVSDB_TYPE_STRING:
if (json->type == JSON_STRING) {
- atom->string = xstrdup(json->string);
+ atom->s = ovsdb_atom_string_create(json->string);
return NULL;
}
break;
@@ -473,7 +474,7 @@ ovsdb_atom_to_json(const union ovsdb_atom *atom, enum ovsdb_atomic_type type)
return json_boolean_create(atom->boolean);
case OVSDB_TYPE_STRING:
- return json_string_create(atom->string);
+ return json_string_create(atom->s->string);
case OVSDB_TYPE_UUID:
return wrap_json("uuid", json_string_create_nocopy(
@@ -551,14 +552,18 @@ ovsdb_atom_from_string__(union ovsdb_atom *atom,
if (s_len < 2 || s[s_len - 1] != '"') {
return xasprintf("%s: missing quote at end of "
"quoted string", s);
- } else if (!json_string_unescape(s + 1, s_len - 2,
- &atom->string)) {
- char *error = xasprintf("%s: %s", s, atom->string);
- free(atom->string);
- return error;
+ } else {
+ char *res;
+ if (json_string_unescape(s + 1, s_len - 2, &res)) {
+ atom->s = ovsdb_atom_string_create_nocopy(res);
+ } else {
+ char *error = xasprintf("%s: %s", s, res);
+ free(res);
+ return error;
+ }
}
} else {
- atom->string = xstrdup(s);
+ atom->s = ovsdb_atom_string_create(s);
}
break;
@@ -721,14 +726,14 @@ ovsdb_atom_to_string(const union ovsdb_atom *atom, enum ovsdb_atomic_type type,
break;
case OVSDB_TYPE_STRING:
- if (string_needs_quotes(atom->string)) {
+ if (string_needs_quotes(atom->s->string)) {
struct json json;
json.type = JSON_STRING;
- json.string = atom->string;
+ json.string = atom->s->string;
json_to_ds(&json, 0, out);
} else {
- ds_put_cstr(out, atom->string);
+ ds_put_cstr(out, atom->s->string);
}
break;
@@ -750,7 +755,7 @@ ovsdb_atom_to_bare(const union ovsdb_atom *atom, enum ovsdb_atomic_type type,
struct ds *out)
{
if (type == OVSDB_TYPE_STRING) {
- ds_put_cstr(out, atom->string);
+ ds_put_cstr(out, atom->s->string);
} else {
ovsdb_atom_to_string(atom, type, out);
}
@@ -799,7 +804,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom,
const struct ovsdb_base_type *base)
{
if (base->enum_
- && ovsdb_datum_find_key(base->enum_, atom, base->type) == UINT_MAX) {
+ && !ovsdb_datum_find_key(base->enum_, atom, base->type, NULL)) {
struct ovsdb_error *error;
struct ds actual = DS_EMPTY_INITIALIZER;
struct ds valid = DS_EMPTY_INITIALIZER;
@@ -877,7 +882,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom,
return NULL;
case OVSDB_TYPE_STRING:
- return check_string_constraints(atom->string, &base->string);
+ return check_string_constraints(atom->s->string, &base->string);
case OVSDB_TYPE_UUID:
return NULL;
@@ -1691,8 +1696,8 @@ ovsdb_datum_from_smap(struct ovsdb_datum *datum, const struct smap *smap)
struct smap_node *node;
size_t i = 0;
SMAP_FOR_EACH (node, smap) {
- datum->keys[i].string = xstrdup(node->key);
- datum->values[i].string = xstrdup(node->value);
+ datum->keys[i].s = ovsdb_atom_string_create(node->key);
+ datum->values[i].s = ovsdb_atom_string_create(node->value);
i++;
}
ovs_assert(i == datum->n);
@@ -1784,14 +1789,16 @@ ovsdb_datum_compare_3way(const struct ovsdb_datum *a,
a->n));
}
-/* If 'key' is one of the keys in 'datum', returns its index within 'datum',
- * otherwise UINT_MAX. 'key.type' must be the type of the atoms stored in the
- * 'keys' array in 'datum'.
+/* If 'key' is one of the keys in 'datum', returns 'true' and sets '*pos' to
+ * its index within 'datum', otherwise returns 'false' and sets '*pos' to the
+ * index where 'key' should have been. 'key.type' must be the type of the
+ * atoms stored in the 'keys' array in 'datum'.
*/
-unsigned int
+bool
ovsdb_datum_find_key(const struct ovsdb_datum *datum,
const union ovsdb_atom *key,
- enum ovsdb_atomic_type key_type)
+ enum ovsdb_atomic_type key_type,
+ unsigned int *pos)
{
unsigned int low = 0;
unsigned int high = datum->n;
@@ -1803,10 +1810,16 @@ ovsdb_datum_find_key(const struct ovsdb_datum *datum,
} else if (cmp > 0) {
low = idx + 1;
} else {
- return idx;
+ if (pos) {
+ *pos = idx;
+ }
+ return true;
}
}
- return UINT_MAX;
+ if (pos) {
+ *pos = low;
+ }
+ return false;
}
/* If 'key' and 'value' is one of the key-value pairs in 'datum', returns its
@@ -1821,10 +1834,11 @@ ovsdb_datum_find_key_value(const struct ovsdb_datum *datum,
const union ovsdb_atom *value,
enum ovsdb_atomic_type value_type)
{
- unsigned int idx = ovsdb_datum_find_key(datum, key, key_type);
- if (idx != UINT_MAX
- && value_type != OVSDB_TYPE_VOID
- && !ovsdb_atom_equals(&datum->values[idx], value, value_type)) {
+ unsigned int idx;
+
+ if (!ovsdb_datum_find_key(datum, key, key_type, &idx)
+ || (value_type != OVSDB_TYPE_VOID
+ && !ovsdb_atom_equals(&datum->values[idx], value, value_type))) {
idx = UINT_MAX;
}
return idx;
@@ -1948,38 +1962,68 @@ ovsdb_datum_add_unsafe(struct ovsdb_datum *datum,
}
}
+/* Adds 'n' atoms starting from index 'start_idx' from 'src' to the end of
+ * 'dst'. 'dst' should have enough memory allocated to hold the additional
+ * 'n' atoms. Atoms are not cloned, i.e. 'dst' will reference the same data.
+ * Caller also should take care of the result being sorted. */
+static void
+ovsdb_datum_push_unsafe(struct ovsdb_datum *dst,
+ const struct ovsdb_datum *src,
+ unsigned int start_idx, unsigned int n,
+ const struct ovsdb_type *type)
+{
+ memcpy(&dst->keys[dst->n], &src->keys[start_idx], n * sizeof src->keys[0]);
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ memcpy(&dst->values[dst->n], &src->values[start_idx],
+ n * sizeof src->values[0]);
+ }
+ dst->n += n;
+}
+
void
ovsdb_datum_union(struct ovsdb_datum *a, const struct ovsdb_datum *b,
- const struct ovsdb_type *type, bool replace)
+ const struct ovsdb_type *type)
{
- unsigned int n;
- size_t bi;
+ struct ovsdb_datum result;
+ unsigned int copied, pos;
- n = a->n;
- for (bi = 0; bi < b->n; bi++) {
- unsigned int ai;
+ ovsdb_datum_init_empty(&result);
- ai = ovsdb_datum_find_key(a, &b->keys[bi], type->key.type);
- if (ai == UINT_MAX) {
- if (n == a->n) {
- ovsdb_datum_reallocate(a, type, a->n + (b->n - bi));
- }
- ovsdb_atom_clone(&a->keys[n], &b->keys[bi], type->key.type);
- if (type->value.type != OVSDB_TYPE_VOID) {
- ovsdb_atom_clone(&a->values[n], &b->values[bi],
- type->value.type);
- }
- n++;
- } else if (replace && type->value.type != OVSDB_TYPE_VOID) {
- ovsdb_atom_destroy(&a->values[ai], type->value.type);
- ovsdb_atom_clone(&a->values[ai], &b->values[bi],
+ copied = 0;
+ for (size_t bi = 0; bi < b->n; bi++) {
+ if (ovsdb_datum_find_key(a, &b->keys[bi], type->key.type, &pos)) {
+ /* Atom with the same key already exists. */
+ continue;
+ }
+ if (!result.keys) {
+ ovsdb_datum_reallocate(&result, type, a->n + (b->n - bi));
+ }
+ if (pos > copied) {
+ /* Need to copy some atoms from 'a' first. */
+ ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type);
+ copied = pos;
+ }
+ /* Inserting new atom from 'b'. */
+ ovsdb_atom_clone(&result.keys[result.n], &b->keys[bi], type->key.type);
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_clone(&result.values[result.n], &b->values[bi],
type->value.type);
}
+ result.n++;
}
- if (n != a->n) {
- a->n = n;
- ovs_assert(!ovsdb_datum_sort(a, type->key.type));
+ if (!result.keys) {
+ /* 'a' doesn't need to be changed. */
+ return;
+ }
+ if (a->n > copied) {
+ /* Copying remaining atoms. */
+ ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type);
}
+ /* All atoms are copied now. */
+ a->n = 0;
+
+ ovsdb_datum_swap(&result, a);
+ ovsdb_datum_destroy(&result, type);
}
void
@@ -1987,26 +2031,55 @@ ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type,
const struct ovsdb_datum *b,
const struct ovsdb_type *b_type)
{
- bool changed = false;
- size_t i;
+ unsigned int *idx, ai;
+ size_t n_idx;
ovs_assert(a_type->key.type == b_type->key.type);
ovs_assert(a_type->value.type == b_type->value.type
|| b_type->value.type == OVSDB_TYPE_VOID);
- /* XXX The big-O of this could easily be improved. */
- for (i = 0; i < a->n; ) {
- unsigned int idx = ovsdb_datum_find(a, i, b, b_type);
- if (idx != UINT_MAX) {
- changed = true;
- ovsdb_datum_remove_unsafe(a, i, a_type);
- } else {
- i++;
+ idx = xmalloc(b->n * sizeof *idx);
+ n_idx = 0;
+ for (size_t bi = 0; bi < b->n; bi++) {
+ ai = ovsdb_datum_find(b, bi, a, b_type);
+ if (ai == UINT_MAX) {
+ /* No such atom in 'a'. */
+ continue;
}
+ /* Not destroying right away since ovsdb_datum_find() will use them. */
+ idx[n_idx++] = ai;
}
- if (changed) {
- ovsdb_datum_sort_assert(a, a_type->key.type);
+ if (!n_idx) {
+ free(idx);
+ return;
+ }
+
+ struct ovsdb_datum result;
+
+ ovsdb_datum_init_empty(&result);
+ ovsdb_datum_reallocate(&result, a_type, a->n - n_idx);
+
+ unsigned int start_idx = 0;
+ for (size_t i = 0; i < n_idx; i++) {
+ ai = idx[i];
+
+ /* Destroying atom. */
+ ovsdb_atom_destroy(&a->keys[ai], a_type->key.type);
+ if (a_type->value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_destroy(&a->values[ai], a_type->value.type);
+ }
+
+ /* Copy non-removed atoms from 'a' to result. */
+ ovsdb_datum_push_unsafe(&result, a, start_idx, ai - start_idx, a_type);
+ start_idx = idx[i] + 1;
}
+ /* Copying remaining atoms. */
+ ovsdb_datum_push_unsafe(&result, a, start_idx, a->n - start_idx, a_type);
+ a->n = 0;
+
+ ovsdb_datum_swap(&result, a);
+ ovsdb_datum_destroy(&result, a_type);
+ free(idx);
}
struct ovsdb_symbol_table *
@@ -2067,6 +2140,64 @@ ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab,
/* APIs for Generating and apply diffs. */
+/* Find what needs to be added to and removed from 'old' to construct 'new'.
+ *
+ * The 'added' and 'removed' datums are always safe; the orders of keys are
+ * maintained since they are added in order. */
+void
+ovsdb_datum_added_removed(struct ovsdb_datum *added,
+ struct ovsdb_datum *removed,
+ const struct ovsdb_datum *old,
+ const struct ovsdb_datum *new,
+ const struct ovsdb_type *type)
+{
+ size_t oi, ni;
+
+ ovsdb_datum_init_empty(added);
+ ovsdb_datum_init_empty(removed);
+ if (!ovsdb_type_is_composite(type)) {
+ ovsdb_datum_clone(removed, old, type);
+ ovsdb_datum_clone(added, new, type);
+ return;
+ }
+
+ /* Generate the diff in O(n) time. */
+ for (oi = ni = 0; oi < old->n && ni < new->n;) {
+ int c = ovsdb_atom_compare_3way(&old->keys[oi], &new->keys[ni],
+ type->key.type);
+ if (c < 0) {
+ ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi],
+ type, NULL);
+ oi++;
+ } else if (c > 0) {
+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni],
+ type, NULL);
+ ni++;
+ } else {
+ if (type->value.type != OVSDB_TYPE_VOID &&
+ ovsdb_atom_compare_3way(&old->values[oi], &new->values[ni],
+ type->value.type)) {
+ ovsdb_datum_add_unsafe(removed, &old->keys[oi],
+ &old->values[oi], type, NULL);
+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni],
+ type, NULL);
+ }
+ oi++; ni++;
+ }
+ }
+
+ for (; oi < old->n; oi++) {
+ ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi],
+ type, NULL);
+ }
+
+ for (; ni < new->n; ni++) {
+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni],
+ type, NULL);
+ }
+}
+
+
/* Generate a difference ovsdb_dataum between 'old' and 'new'.
* 'new' can be regenerated by applying the difference to the 'old'.
*
@@ -2127,6 +2258,106 @@ ovsdb_datum_diff(struct ovsdb_datum *diff,
}
}
+/* Apply 'diff' to 'a'.
+ *
+ * Return NULL if the 'a' is successfully updated, otherwise, return
+ * ovsdb_error. */
+struct ovsdb_error *
+ovsdb_datum_apply_diff_in_place(struct ovsdb_datum *a,
+ const struct ovsdb_datum *diff,
+ const struct ovsdb_type *type)
+{
+ struct ovsdb_error *error = NULL;
+ struct ovsdb_datum result;
+ size_t i, new_size;
+ unsigned int *idx, pos;
+ enum {
+ DIFF_OP_ADD,
+ DIFF_OP_REMOVE,
+ DIFF_OP_UPDATE,
+ } *operation;
+
+ if (!ovsdb_type_is_composite(type)) {
+ ovsdb_datum_destroy(a, type);
+ ovsdb_datum_clone(a, diff, type);
+ return NULL;
+ }
+
+ operation = xmalloc(diff->n * sizeof *operation);
+ idx = xmalloc(diff->n * sizeof *idx);
+ new_size = a->n;
+ for (i = 0; i < diff->n; i++) {
+ if (!ovsdb_datum_find_key(a, &diff->keys[i], type->key.type, &pos)) {
+ operation[i] = DIFF_OP_ADD;
+ new_size++;
+ } else if (type->value.type != OVSDB_TYPE_VOID
+ && !ovsdb_atom_equals(&diff->values[i], &a->values[pos],
+ type->value.type)) {
+ operation[i] = DIFF_OP_UPDATE;
+ } else {
+ operation[i] = DIFF_OP_REMOVE;
+ new_size--;
+ }
+ idx[i] = pos;
+ }
+
+ /* Make sure member size of 'new' conforms to type. */
+ if (new_size < type->n_min || new_size > type->n_max) {
+ error = ovsdb_error(NULL, "Datum crated by diff has size error");
+ goto exit;
+ }
+
+ ovsdb_datum_init_empty(&result);
+ ovsdb_datum_reallocate(&result, type, new_size);
+
+ unsigned int copied = 0;
+ for (i = 0; i < diff->n; i++) {
+ pos = idx[i];
+
+ if (copied < pos) {
+ /* Copying all atoms that should go before the current one. */
+ ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type);
+ copied = pos;
+ }
+
+ switch (operation[i]) {
+ case DIFF_OP_UPDATE:
+ case DIFF_OP_ADD:
+ /* Inserting new atom from 'diff'. */
+ ovsdb_atom_clone(&result.keys[result.n],
+ &diff->keys[i], type->key.type);
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_clone(&result.values[result.n],
+ &diff->values[i], type->value.type);
+ }
+ result.n++;
+ if (operation[i] != DIFF_OP_UPDATE) {
+ break;
+ }
+ /* fall through */
+
+ case DIFF_OP_REMOVE:
+ /* Destroying atom. */
+ ovsdb_atom_destroy(&a->keys[pos], type->key.type);
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_destroy(&a->values[pos], type->value.type);
+ }
+ copied++; /* Skipping removed atom. */
+ break;
+ }
+ }
+ /* Copying remaining atoms. */
+ ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type);
+ a->n = 0;
+
+ ovsdb_datum_swap(&result, a);
+ ovsdb_datum_destroy(&result, type);
+exit:
+ free(operation);
+ free(idx);
+ return error;
+}
+
/* Apply 'diff' to 'old' to regenerate 'new'.
*
* Return NULL if the 'new' is successfully generated, otherwise, return
diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h
index c5a80ee39f..f66ed3472c 100644
--- a/lib/ovsdb-data.h
+++ b/lib/ovsdb-data.h
@@ -20,6 +20,7 @@
#include "compiler.h"
#include "ovsdb-types.h"
#include "openvswitch/shash.h"
+#include "util.h"
#ifdef __cplusplus
extern "C" {
@@ -31,12 +32,33 @@ struct ds;
struct ovsdb_symbol_table;
struct smap;
+struct ovsdb_atom_string {
+ char *string;
+ size_t n_refs;
+};
+
+static inline struct ovsdb_atom_string *
+ovsdb_atom_string_create_nocopy(char *str)
+{
+ struct ovsdb_atom_string *s = xzalloc(sizeof *s);
+
+ s->string = str;
+ s->n_refs = 1;
+ return s;
+}
+
+static inline struct ovsdb_atom_string *
+ovsdb_atom_string_create(const char *str)
+{
+ return ovsdb_atom_string_create_nocopy(xstrdup(str));
+}
+
/* One value of an atomic type (given by enum ovs_atomic_type). */
union ovsdb_atom {
int64_t integer;
double real;
bool boolean;
- char *string;
+ struct ovsdb_atom_string *s;
struct uuid uuid;
};
@@ -66,8 +88,9 @@ ovsdb_atom_needs_destruction(enum ovsdb_atomic_type type)
static inline void
ovsdb_atom_destroy(union ovsdb_atom *atom, enum ovsdb_atomic_type type)
{
- if (type == OVSDB_TYPE_STRING) {
- free(atom->string);
+ if (type == OVSDB_TYPE_STRING && !--atom->s->n_refs) {
+ free(atom->s->string);
+ free(atom->s);
}
}
@@ -209,9 +232,10 @@ bool ovsdb_datum_equals(const struct ovsdb_datum *,
const struct ovsdb_type *);
/* Search. */
-unsigned int ovsdb_datum_find_key(const struct ovsdb_datum *,
- const union ovsdb_atom *key,
- enum ovsdb_atomic_type key_type);
+bool ovsdb_datum_find_key(const struct ovsdb_datum *,
+ const union ovsdb_atom *key,
+ enum ovsdb_atomic_type key_type,
+ unsigned int *pos);
unsigned int ovsdb_datum_find_key_value(const struct ovsdb_datum *,
const union ovsdb_atom *key,
enum ovsdb_atomic_type key_type,
@@ -227,14 +251,19 @@ bool ovsdb_datum_excludes_all(const struct ovsdb_datum *,
const struct ovsdb_type *);
void ovsdb_datum_union(struct ovsdb_datum *,
const struct ovsdb_datum *,
- const struct ovsdb_type *,
- bool replace);
+ const struct ovsdb_type *);
void ovsdb_datum_subtract(struct ovsdb_datum *a,
const struct ovsdb_type *a_type,
const struct ovsdb_datum *b,
const struct ovsdb_type *b_type);
/* Generate and apply diffs */
+void ovsdb_datum_added_removed(struct ovsdb_datum *added,
+ struct ovsdb_datum *removed,
+ const struct ovsdb_datum *old,
+ const struct ovsdb_datum *new,
+ const struct ovsdb_type *type);
+
void ovsdb_datum_diff(struct ovsdb_datum *diff,
const struct ovsdb_datum *old_datum,
const struct ovsdb_datum *new_datum,
@@ -246,6 +275,12 @@ struct ovsdb_error *ovsdb_datum_apply_diff(struct ovsdb_datum *new_datum,
const struct ovsdb_type *type)
OVS_WARN_UNUSED_RESULT;
+struct ovsdb_error * ovsdb_datum_apply_diff_in_place(
+ struct ovsdb_datum *a,
+ const struct ovsdb_datum *diff,
+ const struct ovsdb_type *type)
+OVS_WARN_UNUSED_RESULT;
+
/* Raw operations that may not maintain the invariants. */
void ovsdb_datum_remove_unsafe(struct ovsdb_datum *, size_t idx,
const struct ovsdb_type *);
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 2198c69c60..496ec490d3 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -1898,8 +1898,7 @@ ovsdb_idl_index_destroy_row(const struct ovsdb_idl_row *row_)
BITMAP_FOR_EACH_1 (i, class->n_columns, row->written) {
c = &class->columns[i];
(c->unparse) (row);
- free(row->new_datum[i].values);
- free(row->new_datum[i].keys);
+ ovsdb_datum_destroy(&row->new_datum[i], &c->type);
}
free(row->new_datum);
free(row->written);
@@ -2787,9 +2786,8 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row,
struct ovsdb_datum *new_datum;
unsigned int pos;
new_datum = map_op_datum(map_op);
- pos = ovsdb_datum_find_key(old_datum,
- &new_datum->keys[0],
- key_type);
+ ovsdb_datum_find_key(old_datum, &new_datum->keys[0],
+ key_type, &pos);
if (ovsdb_atom_equals(&new_datum->values[0],
&old_datum->values[pos],
value_type)) {
@@ -2798,11 +2796,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row,
}
} else if (map_op_type(map_op) == MAP_OP_DELETE){
/* Verify that there is a key to delete. */
- unsigned int pos;
- pos = ovsdb_datum_find_key(old_datum,
- &map_op_datum(map_op)->keys[0],
- key_type);
- if (pos == UINT_MAX) {
+ if (!ovsdb_datum_find_key(old_datum,
+ &map_op_datum(map_op)->keys[0],
+ key_type, NULL)) {
/* No key to delete. Move on to next update. */
VLOG_WARN("Trying to delete a key that doesn't "
"exist in the map.");
@@ -2897,11 +2893,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row,
any_ins = true;
} else { /* SETP_OP_DELETE */
/* Verify that there is a key to delete. */
- unsigned int pos;
- pos = ovsdb_datum_find_key(old_datum,
- &set_op_datum(set_op)->keys[0],
- key_type);
- if (pos == UINT_MAX) {
+ if (!ovsdb_datum_find_key(old_datum,
+ &set_op_datum(set_op)->keys[0],
+ key_type, NULL)) {
/* No key to delete. Move on to next update. */
VLOG_WARN("Trying to delete a key that doesn't "
"exist in the set.");
@@ -4066,7 +4060,6 @@ ovsdb_idl_txn_write_partial_map(const struct ovsdb_idl_row *row_,
struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_);
enum ovsdb_atomic_type key_type;
enum map_op_type op_type;
- unsigned int pos;
const struct ovsdb_datum *old_datum;
if (!is_valid_partial_update(row, column, datum)) {
@@ -4078,8 +4071,11 @@ ovsdb_idl_txn_write_partial_map(const struct ovsdb_idl_row *row_,
/* Find out if this is an insert or an update. */
key_type = column->type.key.type;
old_datum = ovsdb_idl_read(row, column);
- pos = ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type);
- op_type = pos == UINT_MAX ? MAP_OP_INSERT : MAP_OP_UPDATE;
+ if (ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type, NULL)) {
+ op_type = MAP_OP_UPDATE;
+ } else {
+ op_type = MAP_OP_INSERT;
+ }
ovsdb_idl_txn_add_map_op(row, column, datum, op_type);
}
@@ -4112,6 +4108,9 @@ void
ovsdb_idl_loop_destroy(struct ovsdb_idl_loop *loop)
{
if (loop) {
+ if (loop->committing_txn) {
+ ovsdb_idl_txn_destroy(loop->committing_txn);
+ }
ovsdb_idl_destroy(loop->idl);
}
}
@@ -4121,8 +4120,8 @@ ovsdb_idl_loop_run(struct ovsdb_idl_loop *loop)
{
ovsdb_idl_run(loop->idl);
- /* See if we can commit the loop->committing_txn. */
- if (loop->committing_txn) {
+ /* See if the 'committing_txn' succeeded in the meantime. */
+ if (loop->committing_txn && loop->committing_txn->status == TXN_SUCCESS) {
ovsdb_idl_try_commit_loop_txn(loop, NULL);
}
diff --git a/lib/pcap-file.c b/lib/pcap-file.c
index b30a11c24b..41835f6f4d 100644
--- a/lib/pcap-file.c
+++ b/lib/pcap-file.c
@@ -89,6 +89,7 @@ ovs_pcap_open(const char *file_name, const char *mode)
: mode[0] == 'w' ? "writing"
: "appending"),
ovs_strerror(errno));
+ free(p_file);
return NULL;
}
diff --git a/lib/rculist.h b/lib/rculist.h
index 1072b87af2..c0d77acf94 100644
--- a/lib/rculist.h
+++ b/lib/rculist.h
@@ -365,35 +365,57 @@ rculist_is_singleton_protected(const struct rculist *list)
return list_next == list->prev && list_next != list;
}
-#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \
- for (INIT_CONTAINER(ITER, rculist_next(RCULIST), MEMBER); \
- &(ITER)->MEMBER != (RCULIST); \
- ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER))
-#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \
- for (ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER); \
- &(ITER)->MEMBER != (RCULIST); \
- ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER))
-
-#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \
- for (INIT_CONTAINER(ITER, (RCULIST)->prev, MEMBER); \
- &(ITER)->MEMBER != (RCULIST); \
- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER))
-#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \
- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \
- &(ITER)->MEMBER != (RCULIST); \
- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER))
-
-#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \
- for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \
- &(ITER)->MEMBER != (RCULIST); \
- ASSIGN_CONTAINER(ITER, rculist_next_protected(&(ITER)->MEMBER), \
- MEMBER))
-
-#define RCULIST_FOR_EACH_SAFE_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \
- for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \
- (&(ITER)->MEMBER != (RCULIST) \
- ? INIT_CONTAINER(NEXT, rculist_next_protected(&(ITER)->MEMBER), \
- MEMBER), 1 : 0); \
- (ITER) = (NEXT))
+#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \
+ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(RCULIST), \
+ const struct rculist); \
+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \
+ UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER))))
+
+#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \
+ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(&(ITER)->MEMBER), \
+ const struct rculist); \
+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \
+ UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER))))
+
+#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \
+ for (INIT_MULTIVAR(ITER, MEMBER, (RCULIST)->prev, struct rculist); \
+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \
+ UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev))
+
+#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \
+ for (INIT_MULTIVAR(ITER, MEMBER, (ITER)->MEMBER.prev, struct rculist); \
+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \
+ UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev))
+
+#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \
+ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next_protected(RCULIST), \
+ struct rculist); \
+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \
+ UPDATE_MULTIVAR(ITER, rculist_next_protected(ITER_VAR(ITER))) \
+
+#define RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED(ITER, MEMBER, RCULIST) \
+ for (INIT_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \
+ rculist_next_protected(RCULIST), \
+ struct rculist); \
+ CONDITION_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \
+ ITER_VAR(ITER) != (RCULIST), \
+ ITER_NEXT_VAR(ITER) = rculist_next_protected(ITER_VAR(VAR))); \
+ UPDATE_MULTIVAR_SHORT(ITER))
+
+#define RCULIST_FOR_EACH_SAFE_LONG_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \
+ for (INIT_MULTIVAR_SAFE_LONG(ITER, NEXT, MEMBER, \
+ rculist_next_protected(RCULIST) \
+ struct rculist); \
+ CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER \
+ ITER_VAR(ITER) != (RCULIST), \
+ ITER_VAR(NEXT) = rculist_next_protected(ITER_VAR(VAR)), \
+ ITER_VAR(NEXT) != (RCULIST)); \
+ UPDATE_MULTIVAR_LONG(ITER))
+
+#define RCULIST_FOR_EACH_SAFE_PROTECTED(...) \
+ OVERLOAD_SAFE_MACRO(RCULIST_FOR_EACH_SAFE_LONG_PROTECTED, \
+ RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED, \
+ 4, __VA_ARGS__)
+
#endif /* rculist.h */
diff --git a/lib/reconnect.c b/lib/reconnect.c
index a929ddfd2d..89a0bcaf95 100644
--- a/lib/reconnect.c
+++ b/lib/reconnect.c
@@ -75,7 +75,8 @@ struct reconnect {
static void reconnect_transition__(struct reconnect *, long long int now,
enum state state);
-static long long int reconnect_deadline__(const struct reconnect *);
+static long long int reconnect_deadline__(const struct reconnect *,
+ long long int now);
static bool reconnect_may_retry(struct reconnect *);
static const char *
@@ -539,7 +540,7 @@ reconnect_transition__(struct reconnect *fsm, long long int now,
}
static long long int
-reconnect_deadline__(const struct reconnect *fsm)
+reconnect_deadline__(const struct reconnect *fsm, long long int now)
{
ovs_assert(fsm->state_entered != LLONG_MIN);
switch (fsm->state) {
@@ -557,8 +558,18 @@ reconnect_deadline__(const struct reconnect *fsm)
if (fsm->probe_interval) {
long long int base = MAX(fsm->last_activity, fsm->state_entered);
long long int expiration = base + fsm->probe_interval;
- if (fsm->last_receive_attempt >= expiration) {
+ if (now < expiration || fsm->last_receive_attempt >= expiration) {
+ /* We still have time before the expiration or the time has
+ * already passed and there was no activity. In the first case
+ * we need to wait for the expiration, in the second - we're
+ * already past the deadline. */
return expiration;
+ } else {
+ /* Time has already passed, but we didn't attempt to receive
+ * anything. We need to wake up and try to receive even if
+ * nothing is pending, so we can update the expiration time or
+ * transition to a different state. */
+ return now + 1;
}
}
return LLONG_MAX;
@@ -566,8 +577,10 @@ reconnect_deadline__(const struct reconnect *fsm)
case S_IDLE:
if (fsm->probe_interval) {
long long int expiration = fsm->state_entered + fsm->probe_interval;
- if (fsm->last_receive_attempt >= expiration) {
+ if (now < expiration || fsm->last_receive_attempt >= expiration) {
return expiration;
+ } else {
+ return now + 1;
}
}
return LLONG_MAX;
@@ -618,7 +631,7 @@ reconnect_deadline__(const struct reconnect *fsm)
enum reconnect_action
reconnect_run(struct reconnect *fsm, long long int now)
{
- if (now >= reconnect_deadline__(fsm)) {
+ if (now >= reconnect_deadline__(fsm, now)) {
switch (fsm->state) {
case S_VOID:
return 0;
@@ -671,7 +684,7 @@ reconnect_wait(struct reconnect *fsm, long long int now)
int
reconnect_timeout(struct reconnect *fsm, long long int now)
{
- long long int deadline = reconnect_deadline__(fsm);
+ long long int deadline = reconnect_deadline__(fsm, now);
if (deadline != LLONG_MAX) {
long long int remaining = deadline - now;
return MAX(0, MIN(INT_MAX, remaining));
diff --git a/lib/socket-util.c b/lib/socket-util.c
index 4f1ffecf5d..38705cc51e 100644
--- a/lib/socket-util.c
+++ b/lib/socket-util.c
@@ -62,7 +62,8 @@ static bool parse_sockaddr_components(struct sockaddr_storage *ss,
const char *port_s,
uint16_t default_port,
const char *s,
- bool resolve_host);
+ bool resolve_host,
+ bool *dns_failure);
/* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a
* positive errno value. */
@@ -438,7 +439,7 @@ parse_sockaddr_components_dns(struct sockaddr_storage *ss OVS_UNUSED,
dns_resolve(host_s, &tmp_host_s);
if (tmp_host_s != NULL) {
parse_sockaddr_components(ss, tmp_host_s, port_s,
- default_port, s, false);
+ default_port, s, false, NULL);
free(tmp_host_s);
return true;
}
@@ -450,11 +451,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss,
char *host_s,
const char *port_s, uint16_t default_port,
const char *s,
- bool resolve_host)
+ bool resolve_host, bool *dns_failure)
{
struct sockaddr_in *sin = sin_cast(sa_cast(ss));
int port;
+ if (dns_failure) {
+ *dns_failure = false;
+ }
+
if (port_s && port_s[0]) {
if (!str_to_int(port_s, 10, &port) || port < 0 || port > 65535) {
VLOG_ERR("%s: bad port number \"%s\"", s, port_s);
@@ -501,10 +506,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss,
return true;
resolve:
- if (resolve_host && parse_sockaddr_components_dns(ss, host_s, port_s,
- default_port, s)) {
- return true;
- } else if (!resolve_host) {
+ if (resolve_host) {
+ if (parse_sockaddr_components_dns(ss, host_s, port_s,
+ default_port, s)) {
+ return true;
+ }
+ if (dns_failure) {
+ *dns_failure = true;
+ }
+ } else {
VLOG_ERR("%s: bad IP address \"%s\"", s, host_s);
}
exit:
@@ -521,10 +531,12 @@ exit:
* It resolves the host if 'resolve_host' is true.
*
* On success, returns true and stores the parsed remote address into '*ss'.
- * On failure, logs an error, stores zeros into '*ss', and returns false. */
+ * On failure, logs an error, stores zeros into '*ss', and returns false,
+ * '*dns_failure' indicates if the host resolution failed. */
bool
inet_parse_active(const char *target_, int default_port,
- struct sockaddr_storage *ss, bool resolve_host)
+ struct sockaddr_storage *ss,
+ bool resolve_host, bool *dns_failure)
{
char *target = xstrdup(target_);
char *port, *host;
@@ -539,7 +551,7 @@ inet_parse_active(const char *target_, int default_port,
ok = false;
} else {
ok = parse_sockaddr_components(ss, host, port, default_port,
- target_, resolve_host);
+ target_, resolve_host, dns_failure);
}
if (!ok) {
memset(ss, 0, sizeof *ss);
@@ -576,7 +588,7 @@ inet_open_active(int style, const char *target, int default_port,
int error;
/* Parse. */
- if (!inet_parse_active(target, default_port, &ss, true)) {
+ if (!inet_parse_active(target, default_port, &ss, true, NULL)) {
error = EAFNOSUPPORT;
goto exit;
}
@@ -660,7 +672,7 @@ inet_parse_passive(const char *target_, int default_port,
ok = false;
} else {
ok = parse_sockaddr_components(ss, host, port, default_port,
- target_, true);
+ target_, true, NULL);
}
if (!ok) {
memset(ss, 0, sizeof *ss);
@@ -783,7 +795,8 @@ inet_parse_address(const char *target_, struct sockaddr_storage *ss)
{
char *target = xstrdup(target_);
char *host = unbracket(target);
- bool ok = parse_sockaddr_components(ss, host, NULL, 0, target_, false);
+ bool ok = parse_sockaddr_components(ss, host, NULL, 0,
+ target_, false, NULL);
if (!ok) {
memset(ss, 0, sizeof *ss);
}
diff --git a/lib/socket-util.h b/lib/socket-util.h
index 9ccb7d4cc4..bf66393df9 100644
--- a/lib/socket-util.h
+++ b/lib/socket-util.h
@@ -49,7 +49,8 @@ ovs_be32 guess_netmask(ovs_be32 ip);
void inet_parse_host_port_tokens(char *s, char **hostp, char **portp);
void inet_parse_port_host_tokens(char *s, char **portp, char **hostp);
bool inet_parse_active(const char *target, int default_port,
- struct sockaddr_storage *ssp, bool resolve_host);
+ struct sockaddr_storage *ssp,
+ bool resolve_host, bool *dns_failure);
int inet_open_active(int style, const char *target, int default_port,
struct sockaddr_storage *ssp, int *fdp, uint8_t dscp);
diff --git a/lib/stopwatch.c b/lib/stopwatch.c
index f5602163bc..1c71df1a12 100644
--- a/lib/stopwatch.c
+++ b/lib/stopwatch.c
@@ -114,7 +114,6 @@ static void
calc_percentile(unsigned long long n_samples, struct percentile *pctl,
unsigned long long new_sample)
{
-
if (n_samples < P_SQUARE_MIN) {
pctl->samples[n_samples - 1] = new_sample;
}
@@ -228,13 +227,12 @@ add_sample(struct stopwatch *sw, unsigned long long new_sample)
sw->min = new_sample;
}
- calc_percentile(sw->n_samples, &sw->pctl, new_sample);
-
if (sw->n_samples++ == 0) {
sw->short_term.average = sw->long_term.average = new_sample;
return;
}
+ calc_percentile(sw->n_samples, &sw->pctl, new_sample);
calc_average(&sw->short_term, new_sample);
calc_average(&sw->long_term, new_sample);
}
diff --git a/lib/stp.c b/lib/stp.c
index 809b405a52..a869b5f390 100644
--- a/lib/stp.c
+++ b/lib/stp.c
@@ -737,7 +737,7 @@ void
stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
{
struct stp *stp = p->stp;
- const struct stp_bpdu_header *header;
+ struct stp_bpdu_header header;
ovs_mutex_lock(&mutex);
if (p->state == STP_DISABLED) {
@@ -750,19 +750,19 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
goto out;
}
- header = bpdu;
- if (header->protocol_id != htons(STP_PROTOCOL_ID)) {
+ memcpy(&header, bpdu, sizeof header);
+ if (header.protocol_id != htons(STP_PROTOCOL_ID)) {
VLOG_WARN("%s: received BPDU with unexpected protocol ID %"PRIu16,
- stp->name, ntohs(header->protocol_id));
+ stp->name, ntohs(header.protocol_id));
p->error_count++;
goto out;
}
- if (header->protocol_version != STP_PROTOCOL_VERSION) {
+ if (header.protocol_version != STP_PROTOCOL_VERSION) {
VLOG_DBG("%s: received BPDU with unexpected protocol version %"PRIu8,
- stp->name, header->protocol_version);
+ stp->name, header.protocol_version);
}
- switch (header->bpdu_type) {
+ switch (header.bpdu_type) {
case STP_TYPE_CONFIG:
if (bpdu_size < sizeof(struct stp_config_bpdu)) {
VLOG_WARN("%s: received config BPDU with invalid size %"PRIuSIZE,
@@ -785,7 +785,7 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
default:
VLOG_WARN("%s: received BPDU of unexpected type %"PRIu8,
- stp->name, header->bpdu_type);
+ stp->name, header.bpdu_type);
p->error_count++;
goto out;
}
diff --git a/lib/stream.c b/lib/stream.c
index fcaddf10ad..71039e24f1 100644
--- a/lib/stream.c
+++ b/lib/stream.c
@@ -788,7 +788,7 @@ stream_parse_target_with_default_port(const char *target, int default_port,
struct sockaddr_storage *ss)
{
return ((!strncmp(target, "tcp:", 4) || !strncmp(target, "ssl:", 4))
- && inet_parse_active(target + 4, default_port, ss, true));
+ && inet_parse_active(target + 4, default_port, ss, true, NULL));
}
/* Attempts to guess the content type of a stream whose first few bytes were
diff --git a/lib/tc.c b/lib/tc.c
index 38a1dfc0eb..df73a43d4c 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -568,16 +568,17 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower)
flower->key.encap_eth_type[0] =
nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]);
+ flower->mask.encap_eth_type[0] = CONSTANT_HTONS(0xffff);
if (attrs[TCA_FLOWER_KEY_VLAN_ID]) {
flower->key.vlan_id[0] =
nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]);
- flower->mask.vlan_id[0] = 0xffff;
+ flower->mask.vlan_id[0] = VLAN_VID_MASK >> VLAN_VID_SHIFT;
}
if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) {
flower->key.vlan_prio[0] =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]);
- flower->mask.vlan_prio[0] = 0xff;
+ flower->mask.vlan_prio[0] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT;
}
if (!attrs[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
@@ -590,17 +591,18 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower)
}
flower->key.encap_eth_type[1] = flower->key.encap_eth_type[0];
+ flower->mask.encap_eth_type[1] = CONSTANT_HTONS(0xffff);
flower->key.encap_eth_type[0] = encap_ethtype;
if (attrs[TCA_FLOWER_KEY_CVLAN_ID]) {
flower->key.vlan_id[1] =
nl_attr_get_u16(attrs[TCA_FLOWER_KEY_CVLAN_ID]);
- flower->mask.vlan_id[1] = 0xffff;
+ flower->mask.vlan_id[1] = VLAN_VID_MASK >> VLAN_VID_SHIFT;
}
if (attrs[TCA_FLOWER_KEY_CVLAN_PRIO]) {
flower->key.vlan_prio[1] =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_CVLAN_PRIO]);
- flower->mask.vlan_prio[1] = 0xff;
+ flower->mask.vlan_prio[1] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT;
}
}
@@ -937,24 +939,21 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) {
key->icmp_code =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]);
mask->icmp_code =
- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]);
+ nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE_MASK]);
}
if (attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]) {
- key->icmp_type =
- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]);
+ key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE]);
mask->icmp_type =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]);
}
} else if (ip_proto == IPPROTO_ICMPV6) {
if (attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]) {
- key->icmp_code =
- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]);
+ key->icmp_code = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]);
mask->icmp_code =
- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]);
+ nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]);
}
if (attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]) {
- key->icmp_type =
- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]);
+ key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE]);
mask->icmp_type =
nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]);
}
@@ -1006,14 +1005,14 @@ static const struct nl_policy pedit_policy[] = {
static int
nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
{
- struct tc_action *action;
+ struct tc_action *action = &flower->actions[flower->action_count++];
struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
const struct tc_pedit *pe;
const struct tc_pedit_key *keys;
const struct nlattr *nla, *keys_ex, *ex_type;
const void *keys_attr;
- char *rewrite_key = (void *) &flower->rewrite.key;
- char *rewrite_mask = (void *) &flower->rewrite.mask;
+ char *rewrite_key = (void *) &action->rewrite.key;
+ char *rewrite_mask = (void *) &action->rewrite.mask;
size_t keys_ex_size, left;
int type, i = 0, err;
@@ -1092,7 +1091,6 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
i++;
}
- action = &flower->actions[flower->action_count++];
action->type = TC_ACT_PEDIT;
return 0;
@@ -1487,7 +1485,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower)
if (ipv4_max) {
ovs_be32 addr = nl_attr_get_be32(ipv4_max);
- action->ct.range.ipv4.max = addr;
+ if (action->ct.range.ipv4.min != addr) {
+ action->ct.range.ipv4.max = addr;
+ }
}
} else if (ipv6_min) {
action->ct.range.ip_family = AF_INET6;
@@ -1496,7 +1496,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower)
if (ipv6_max) {
struct in6_addr addr = nl_attr_get_in6_addr(ipv6_max);
- action->ct.range.ipv6.max = addr;
+ if (!ipv6_addr_equals(&action->ct.range.ipv6.min, &addr)) {
+ action->ct.range.ipv6.max = addr;
+ }
}
}
@@ -1504,6 +1506,10 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower)
action->ct.range.port.min = nl_attr_get_be16(port_min);
if (port_max) {
action->ct.range.port.max = nl_attr_get_be16(port_max);
+ if (action->ct.range.port.min ==
+ action->ct.range.port.max) {
+ action->ct.range.port.max = 0;
+ }
}
}
}
@@ -1702,6 +1708,9 @@ static const struct nl_policy stats_policy[] = {
[TCA_STATS_BASIC] = { .type = NL_A_UNSPEC,
.min_len = sizeof(struct gnet_stats_basic),
.optional = false, },
+ [TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC,
+ .min_len = sizeof(struct gnet_stats_basic),
+ .optional = true, },
};
static int
@@ -1714,8 +1723,11 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower,
const char *act_kind;
struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
- struct ovs_flow_stats *stats = &flower->stats;
- const struct gnet_stats_basic *bs;
+ struct ovs_flow_stats *stats_sw = &flower->stats_sw;
+ struct ovs_flow_stats *stats_hw = &flower->stats_hw;
+ const struct gnet_stats_basic *bs_all = NULL;
+ const struct gnet_stats_basic *bs_hw = NULL;
+ struct gnet_stats_basic bs_sw = { .packets = 0, .bytes = 0, };
int err = 0;
if (!nl_parse_nested(action, act_policy, action_attrs,
@@ -1771,10 +1783,26 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower,
return EPROTO;
}
- bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs);
- if (bs->packets) {
- put_32aligned_u64(&stats->n_packets, bs->packets);
- put_32aligned_u64(&stats->n_bytes, bs->bytes);
+ bs_all = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs_all);
+ if (stats_attrs[TCA_STATS_BASIC_HW]) {
+ bs_hw = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC_HW],
+ sizeof *bs_hw);
+
+ bs_sw.packets = bs_all->packets - bs_hw->packets;
+ bs_sw.bytes = bs_all->bytes - bs_hw->bytes;
+ } else {
+ bs_sw.packets = bs_all->packets;
+ bs_sw.bytes = bs_all->bytes;
+ }
+
+ if (bs_sw.packets > get_32aligned_u64(&stats_sw->n_packets)) {
+ put_32aligned_u64(&stats_sw->n_packets, bs_sw.packets);
+ put_32aligned_u64(&stats_sw->n_bytes, bs_sw.bytes);
+ }
+
+ if (bs_hw && bs_hw->packets > get_32aligned_u64(&stats_hw->n_packets)) {
+ put_32aligned_u64(&stats_hw->n_packets, bs_hw->packets);
+ put_32aligned_u64(&stats_hw->n_bytes, bs_hw->bytes);
}
return 0;
@@ -2399,14 +2427,14 @@ nl_msg_put_act_flags(struct ofpbuf *request) {
* first_word_mask/last_word_mask - the mask to use for the first/last read
* (as we read entire words). */
static void
-calc_offsets(struct tc_flower *flower, struct flower_key_to_pedit *m,
+calc_offsets(struct tc_action *action, struct flower_key_to_pedit *m,
int *cur_offset, int *cnt, ovs_be32 *last_word_mask,
ovs_be32 *first_word_mask, ovs_be32 **mask, ovs_be32 **data)
{
int start_offset, max_offset, total_size;
int diff, right_zero_bits, left_zero_bits;
- char *rewrite_key = (void *) &flower->rewrite.key;
- char *rewrite_mask = (void *) &flower->rewrite.mask;
+ char *rewrite_key = (void *) &action->rewrite.key;
+ char *rewrite_mask = (void *) &action->rewrite.mask;
max_offset = m->offset + m->size;
start_offset = ROUND_DOWN(m->offset, 4);
@@ -2473,7 +2501,8 @@ csum_update_flag(struct tc_flower *flower,
static int
nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
- struct tc_flower *flower)
+ struct tc_flower *flower,
+ struct tc_action *action)
{
struct {
struct tc_pedit sel;
@@ -2497,7 +2526,7 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
continue;
}
- calc_offsets(flower, m, &cur_offset, &cnt, &last_word_mask,
+ calc_offsets(action, m, &cur_offset, &cnt, &last_word_mask,
&first_word_mask, &mask, &data);
for (j = 0; j < cnt; j++, mask++, data++, cur_offset += 4) {
@@ -2545,6 +2574,40 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
return 0;
}
+static void
+nl_msg_put_flower_acts_release(struct ofpbuf *request, uint16_t act_index)
+{
+ size_t act_offset;
+
+ act_offset = nl_msg_start_nested(request, act_index);
+ nl_msg_put_act_tunnel_key_release(request);
+ nl_msg_put_act_flags(request);
+ nl_msg_end_nested(request, act_offset);
+}
+
+/* Aggregates all previous successive pedit actions csum_update_flags
+ * to flower->csum_update_flags. Only append one csum action to the
+ * last pedit action. */
+static void
+nl_msg_put_csum_act(struct ofpbuf *request, struct tc_flower *flower,
+ uint16_t *act_index)
+{
+ size_t act_offset;
+
+ /* No pedit actions or processed already. */
+ if (!flower->csum_update_flags) {
+ return;
+ }
+
+ act_offset = nl_msg_start_nested(request, (*act_index)++);
+ nl_msg_put_act_csum(request, flower->csum_update_flags);
+ nl_msg_put_act_flags(request);
+ nl_msg_end_nested(request, act_offset);
+
+ /* Clear it. So we can have another series of pedit actions. */
+ flower->csum_update_flags = 0;
+}
+
static int
nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
{
@@ -2561,24 +2624,31 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
action = flower->actions;
for (i = 0; i < flower->action_count; i++, action++) {
+ if (action->type != TC_ACT_PEDIT) {
+ nl_msg_put_csum_act(request, flower, &act_index);
+ }
switch (action->type) {
case TC_ACT_PEDIT: {
act_offset = nl_msg_start_nested(request, act_index++);
- error = nl_msg_put_flower_rewrite_pedits(request, flower);
+ error = nl_msg_put_flower_rewrite_pedits(request, flower,
+ action);
if (error) {
return error;
}
nl_msg_end_nested(request, act_offset);
- if (flower->csum_update_flags) {
- act_offset = nl_msg_start_nested(request, act_index++);
- nl_msg_put_act_csum(request, flower->csum_update_flags);
- nl_msg_put_act_flags(request);
- nl_msg_end_nested(request, act_offset);
+ if (i == flower->action_count - 1) {
+ /* If this is the last action check csum calc again. */
+ nl_msg_put_csum_act(request, flower, &act_index);
}
}
break;
case TC_ACT_ENCAP: {
+ if (!released && flower->tunnel) {
+ nl_msg_put_flower_acts_release(request, act_index++);
+ released = true;
+ }
+
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_tunnel_key_set(request, action->encap.id_present,
action->encap.id,
@@ -2636,10 +2706,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
break;
case TC_ACT_OUTPUT: {
if (!released && flower->tunnel) {
- act_offset = nl_msg_start_nested(request, act_index++);
- nl_msg_put_act_tunnel_key_release(request);
- nl_msg_put_act_flags(request);
- nl_msg_end_nested(request, act_offset);
+ nl_msg_put_flower_acts_release(request, act_index++);
released = true;
}
@@ -2901,13 +2968,13 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
FLOWER_PUT_MASKED_VALUE(icmp_code, TCA_FLOWER_KEY_ICMPV6_CODE);
FLOWER_PUT_MASKED_VALUE(icmp_type, TCA_FLOWER_KEY_ICMPV6_TYPE);
}
-
- FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE);
- FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE);
- FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK);
- FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS);
}
+ FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE);
+ FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE);
+ FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK);
+ FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS);
+
if (host_eth_type == ETH_P_IP) {
FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_src, TCA_FLOWER_KEY_IPV4_SRC);
FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_dst, TCA_FLOWER_KEY_IPV4_DST);
@@ -2980,12 +3047,79 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
return 0;
}
+static void
+log_tc_flower_match(const char *msg,
+ const struct tc_flower *a,
+ const struct tc_flower *b)
+{
+ uint8_t key_a[sizeof(struct tc_flower_key)];
+ uint8_t key_b[sizeof(struct tc_flower_key)];
+ struct ds s = DS_EMPTY_INITIALIZER;
+
+ for (int i = 0; i < sizeof a->key; i++) {
+ uint8_t mask_a = ((uint8_t *) &a->mask)[i];
+ uint8_t mask_b = ((uint8_t *) &b->mask)[i];
+
+ key_a[i] = ((uint8_t *) &a->key)[i] & mask_a;
+ key_b[i] = ((uint8_t *) &b->key)[i] & mask_b;
+ }
+ ds_put_cstr(&s, "\nExpected Mask:\n");
+ ds_put_hex(&s, &a->mask, sizeof a->mask);
+ ds_put_cstr(&s, "\nReceived Mask:\n");
+ ds_put_hex(&s, &b->mask, sizeof b->mask);
+ ds_put_cstr(&s, "\nExpected Key:\n");
+ ds_put_hex(&s, &a->key, sizeof a->key);
+ ds_put_cstr(&s, "\nReceived Key:\n");
+ ds_put_hex(&s, &b->key, sizeof b->key);
+ ds_put_cstr(&s, "\nExpected Masked Key:\n");
+ ds_put_hex(&s, key_a, sizeof key_a);
+ ds_put_cstr(&s, "\nReceived Masked Key:\n");
+ ds_put_hex(&s, key_b, sizeof key_b);
+
+ if (a->action_count != b->action_count) {
+ /* If action count is not equal, we print all actions to see which
+ * ones are missing. */
+ const struct tc_action *action;
+ int i;
+
+ ds_put_cstr(&s, "\nExpected Actions:\n");
+ for (i = 0, action = a->actions; i < a->action_count; i++, action++) {
+ ds_put_cstr(&s, " - ");
+ ds_put_hex(&s, action, sizeof *action);
+ ds_put_cstr(&s, "\n");
+ }
+ ds_put_cstr(&s, "Received Actions:\n");
+ for (i = 0, action = b->actions; i < b->action_count; i++, action++) {
+ ds_put_cstr(&s, " - ");
+ ds_put_hex(&s, action, sizeof *action);
+ ds_put_cstr(&s, "\n");
+ }
+ } else {
+ /* Only dump the delta in actions. */
+ const struct tc_action *action_a = a->actions;
+ const struct tc_action *action_b = b->actions;
+
+ for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
+ if (memcmp(action_a, action_b, sizeof *action_a)) {
+ ds_put_format(&s,
+ "\nAction %d mismatch:\n - Expected Action: ",
+ i);
+ ds_put_hex(&s, action_a, sizeof *action_a);
+ ds_put_cstr(&s, "\n - Received Action: ");
+ ds_put_hex(&s, action_b, sizeof *action_b);
+ }
+ }
+ }
+ VLOG_DBG_RL(&error_rl, "%s%s", msg, ds_cstr(&s));
+ ds_destroy(&s);
+}
+
static bool
cmp_tc_flower_match_action(const struct tc_flower *a,
const struct tc_flower *b)
{
if (memcmp(&a->mask, &b->mask, sizeof a->mask)) {
- VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare");
+ log_tc_flower_match("tc flower compare failed mask compare:", a, b);
return false;
}
@@ -2998,8 +3132,8 @@ cmp_tc_flower_match_action(const struct tc_flower *a,
uint8_t key_b = ((uint8_t *)&b->key)[i] & mask;
if (key_a != key_b) {
- VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at "
- "%d", i);
+ log_tc_flower_match("tc flower compare failed masked key compare:",
+ a, b);
return false;
}
}
@@ -3009,14 +3143,15 @@ cmp_tc_flower_match_action(const struct tc_flower *a,
const struct tc_action *action_b = b->actions;
if (a->action_count != b->action_count) {
- VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check");
+ log_tc_flower_match("tc flower compare failed action length check",
+ a, b);
return false;
}
for (int i = 0; i < a->action_count; i++, action_a++, action_b++) {
if (memcmp(action_a, action_b, sizeof *action_a)) {
- VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare "
- "for %d", i);
+ log_tc_flower_match("tc flower compare failed action compare",
+ a, b);
return false;
}
}
diff --git a/lib/tc.h b/lib/tc.h
index a147ca461d..d6cdddd169 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -256,11 +256,23 @@ struct tc_action {
bool force;
bool commit;
} ct;
+
+ struct {
+ struct tc_flower_key key;
+ struct tc_flower_key mask;
+ } rewrite;
};
enum tc_action_type type;
};
+/* assert that if we overflow with a masked write of uint32_t to the last byte
+ * of action.rewrite we overflow inside struct tc_action.
+ * shouldn't happen unless someone moves rewrite to the end of action */
+BUILD_ASSERT_DECL(offsetof(struct tc_action, rewrite)
+ + MEMBER_SIZEOF(struct tc_action, rewrite)
+ + sizeof(uint32_t) - 2 < sizeof(struct tc_action));
+
enum tc_offloaded_state {
TC_OFFLOADED_STATE_UNDEFINED,
TC_OFFLOADED_STATE_IN_HW,
@@ -330,15 +342,10 @@ struct tc_flower {
int action_count;
struct tc_action actions[TCA_ACT_MAX_NUM];
- struct ovs_flow_stats stats;
+ struct ovs_flow_stats stats_sw;
+ struct ovs_flow_stats stats_hw;
uint64_t lastused;
- struct {
- bool rewrite;
- struct tc_flower_key key;
- struct tc_flower_key mask;
- } rewrite;
-
uint32_t csum_update_flags;
bool tunnel;
@@ -352,13 +359,6 @@ struct tc_flower {
enum tc_offload_policy tc_policy;
};
-/* assert that if we overflow with a masked write of uint32_t to the last byte
- * of flower.rewrite we overflow inside struct flower.
- * shouldn't happen unless someone moves rewrite to the end of flower */
-BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite)
- + MEMBER_SIZEOF(struct tc_flower, rewrite)
- + sizeof(uint32_t) - 2 < sizeof(struct tc_flower));
-
int tc_replace_flower(struct tcf_id *id, struct tc_flower *flower);
int tc_del_filter(struct tcf_id *id);
int tc_get_flower(struct tcf_id *id, struct tc_flower *flower);
diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index 5bda4af7e0..995c88bf17 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -32,6 +32,7 @@
#include "errno.h"
#include "flow.h"
#include "netdev.h"
+#include "ovs-atomic.h"
#include "ovs-thread.h"
#include "packets.h"
#include "openvswitch/poll-loop.h"
@@ -44,14 +45,13 @@
#include "openvswitch/vlog.h"
-/* In seconds */
-#define NEIGH_ENTRY_DEFAULT_IDLE_TIME (15 * 60)
+#define NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS (15 * 60 * 1000)
struct tnl_neigh_entry {
struct cmap_node cmap_node;
struct in6_addr ip;
struct eth_addr mac;
- time_t expires; /* Expiration time. */
+ atomic_llong expires; /* Expiration time in ms. */
char br_name[IFNAMSIZ];
};
@@ -64,6 +64,16 @@ tnl_neigh_hash(const struct in6_addr *ip)
return hash_bytes(ip->s6_addr, 16, 0);
}
+static bool
+tnl_neigh_expired(struct tnl_neigh_entry *neigh)
+{
+ long long expires;
+
+ atomic_read_explicit(&neigh->expires, &expires, memory_order_acquire);
+
+ return expires <= time_msec();
+}
+
static struct tnl_neigh_entry *
tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
{
@@ -73,11 +83,13 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
hash = tnl_neigh_hash(dst);
CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
if (ipv6_addr_equals(&neigh->ip, dst) && !strcmp(neigh->br_name, br_name)) {
- if (neigh->expires <= time_now()) {
+ if (tnl_neigh_expired(neigh)) {
return NULL;
}
- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+ atomic_store_explicit(&neigh->expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS,
+ memory_order_release);
return neigh;
}
}
@@ -113,15 +125,16 @@ tnl_neigh_delete(struct tnl_neigh_entry *neigh)
ovsrcu_postpone(neigh_entry_free, neigh);
}
-static void
-tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
- const struct eth_addr mac)
+void
+tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
+ const struct eth_addr mac)
{
ovs_mutex_lock(&mutex);
struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
if (neigh) {
if (eth_addr_equals(neigh->mac, mac)) {
- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+ atomic_store_relaxed(&neigh->expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS);
ovs_mutex_unlock(&mutex);
return;
}
@@ -133,7 +146,8 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
neigh->ip = *dst;
neigh->mac = mac;
- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+ atomic_store_relaxed(&neigh->expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS);
ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
ovs_mutex_unlock(&mutex);
@@ -144,12 +158,12 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst,
const struct eth_addr mac)
{
struct in6_addr dst6 = in6_addr_mapped_ipv4(dst);
- tnl_neigh_set__(name, &dst6, mac);
+ tnl_neigh_set(name, &dst6, mac);
}
static int
tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char name[IFNAMSIZ])
+ const char name[IFNAMSIZ], bool allow_update)
{
/* Snoop normal ARP replies and gratuitous ARP requests/replies only */
if (!is_arp(flow)
@@ -159,13 +173,17 @@ tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
return EINVAL;
}
- tnl_arp_set(name, FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src), flow->arp_sha);
+ memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
+
+ if (allow_update) {
+ tnl_arp_set(name, flow->nw_src, flow->arp_sha);
+ }
return 0;
}
static int
tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char name[IFNAMSIZ])
+ const char name[IFNAMSIZ], bool allow_update)
{
if (!is_nd(flow, wc) || flow->tp_src != htons(ND_NEIGHBOR_ADVERT)) {
return EINVAL;
@@ -184,20 +202,22 @@ tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc,
memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
memset(&wc->masks.nd_target, 0xff, sizeof wc->masks.nd_target);
- tnl_neigh_set__(name, &flow->nd_target, flow->arp_tha);
+ if (allow_update) {
+ tnl_neigh_set(name, &flow->nd_target, flow->arp_tha);
+ }
return 0;
}
int
tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char name[IFNAMSIZ])
+ const char name[IFNAMSIZ], bool allow_update)
{
int res;
- res = tnl_arp_snoop(flow, wc, name);
+ res = tnl_arp_snoop(flow, wc, name, allow_update);
if (res != EINVAL) {
return res;
}
- return tnl_nd_snoop(flow, wc, name);
+ return tnl_nd_snoop(flow, wc, name, allow_update);
}
void
@@ -208,7 +228,7 @@ tnl_neigh_cache_run(void)
ovs_mutex_lock(&mutex);
CMAP_FOR_EACH(neigh, cmap_node, &table) {
- if (neigh->expires <= time_now()) {
+ if (tnl_neigh_expired(neigh)) {
tnl_neigh_delete(neigh);
changed = true;
}
@@ -294,7 +314,7 @@ tnl_neigh_cache_add(struct unixctl_conn *conn, int argc OVS_UNUSED,
return;
}
- tnl_neigh_set__(br_name, &ip6, mac);
+ tnl_neigh_set(br_name, &ip6, mac);
unixctl_command_reply(conn, "OK");
}
@@ -319,7 +339,7 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
ds_put_format(&ds, ETH_ADDR_FMT" %s",
ETH_ADDR_ARGS(neigh->mac), neigh->br_name);
- if (neigh->expires <= time_now()) {
+ if (tnl_neigh_expired(neigh)) {
ds_put_format(&ds, " STALE");
}
ds_put_char(&ds, '\n');
diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h
index e4b42b0594..877bca3127 100644
--- a/lib/tnl-neigh-cache.h
+++ b/lib/tnl-neigh-cache.h
@@ -32,7 +32,9 @@
#include "util.h"
int tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char dev_name[IFNAMSIZ]);
+ const char dev_name[IFNAMSIZ], bool allow_update);
+void tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
+ const struct eth_addr mac);
int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr *dst,
struct eth_addr *mac);
void tnl_neigh_cache_init(void);
diff --git a/ofproto/bond.c b/ofproto/bond.c
index a4116588f4..2c0ad5ef84 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -1253,7 +1253,7 @@ insert_bal(struct ovs_list *bals, struct bond_member *member)
break;
}
}
- ovs_list_insert(&pos->bal_node, &member->bal_node);
+ ovs_list_insert(pos ? &pos->bal_node : bals, &member->bal_node);
}
/* Removes 'member' from its current list and then inserts it into 'bals' so
diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
index 796eb6f881..92692470fd 100644
--- a/ofproto/ofproto-dpif-ipfix.c
+++ b/ofproto/ofproto-dpif-ipfix.c
@@ -926,17 +926,21 @@ dpif_ipfix_bridge_exporter_destroy(struct dpif_ipfix_bridge_exporter *exporter)
static void
dpif_ipfix_bridge_exporter_set_options(
struct dpif_ipfix_bridge_exporter *exporter,
- const struct ofproto_ipfix_bridge_exporter_options *options)
+ const struct ofproto_ipfix_bridge_exporter_options *options,
+ bool *options_changed)
{
- bool options_changed;
-
if (!options || sset_is_empty(&options->targets)) {
/* No point in doing any work if there are no targets. */
- dpif_ipfix_bridge_exporter_clear(exporter);
+ if (exporter->options) {
+ dpif_ipfix_bridge_exporter_clear(exporter);
+ *options_changed = true;
+ } else {
+ *options_changed = false;
+ }
return;
}
- options_changed = (
+ *options_changed = (
!exporter->options
|| !ofproto_ipfix_bridge_exporter_options_equal(
options, exporter->options));
@@ -945,7 +949,7 @@ dpif_ipfix_bridge_exporter_set_options(
* shortchanged in collectors (which indicates that opening one or
* more of the configured collectors failed, so that we should
* retry). */
- if (options_changed
+ if (*options_changed
|| collectors_count(exporter->exporter.collectors)
< sset_count(&options->targets)) {
if (!dpif_ipfix_exporter_set_options(
@@ -957,7 +961,7 @@ dpif_ipfix_bridge_exporter_set_options(
}
/* Avoid reconfiguring if options didn't change. */
- if (!options_changed) {
+ if (!*options_changed) {
return;
}
@@ -1015,17 +1019,21 @@ dpif_ipfix_flow_exporter_destroy(struct dpif_ipfix_flow_exporter *exporter)
static bool
dpif_ipfix_flow_exporter_set_options(
struct dpif_ipfix_flow_exporter *exporter,
- const struct ofproto_ipfix_flow_exporter_options *options)
+ const struct ofproto_ipfix_flow_exporter_options *options,
+ bool *options_changed)
{
- bool options_changed;
-
if (sset_is_empty(&options->targets)) {
/* No point in doing any work if there are no targets. */
- dpif_ipfix_flow_exporter_clear(exporter);
+ if (exporter->options) {
+ dpif_ipfix_flow_exporter_clear(exporter);
+ *options_changed = true;
+ } else {
+ *options_changed = false;
+ }
return true;
}
- options_changed = (
+ *options_changed = (
!exporter->options
|| !ofproto_ipfix_flow_exporter_options_equal(
options, exporter->options));
@@ -1034,7 +1042,7 @@ dpif_ipfix_flow_exporter_set_options(
* shortchanged in collectors (which indicates that opening one or
* more of the configured collectors failed, so that we should
* retry). */
- if (options_changed
+ if (*options_changed
|| collectors_count(exporter->exporter.collectors)
< sset_count(&options->targets)) {
if (!dpif_ipfix_exporter_set_options(
@@ -1046,7 +1054,7 @@ dpif_ipfix_flow_exporter_set_options(
}
/* Avoid reconfiguring if options didn't change. */
- if (!options_changed) {
+ if (!*options_changed) {
return true;
}
@@ -1069,7 +1077,7 @@ remove_flow_exporter(struct dpif_ipfix *di,
free(node);
}
-void
+bool
dpif_ipfix_set_options(
struct dpif_ipfix *di,
const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
@@ -1077,16 +1085,19 @@ dpif_ipfix_set_options(
size_t n_flow_exporters_options) OVS_EXCLUDED(mutex)
{
int i;
+ bool beo_changed, feo_changed, entry_changed;
struct ofproto_ipfix_flow_exporter_options *options;
struct dpif_ipfix_flow_exporter_map_node *node, *next;
ovs_mutex_lock(&mutex);
dpif_ipfix_bridge_exporter_set_options(&di->bridge_exporter,
- bridge_exporter_options);
+ bridge_exporter_options,
+ &beo_changed);
/* Add new flow exporters and update current flow exporters. */
options = (struct ofproto_ipfix_flow_exporter_options *)
flow_exporters_options;
+ feo_changed = false;
for (i = 0; i < n_flow_exporters_options; i++) {
node = dpif_ipfix_find_flow_exporter_map_node(
di, options->collector_set_id);
@@ -1095,10 +1106,14 @@ dpif_ipfix_set_options(
dpif_ipfix_flow_exporter_init(&node->exporter);
hmap_insert(&di->flow_exporter_map, &node->node,
hash_int(options->collector_set_id, 0));
+ feo_changed = true;
}
- if (!dpif_ipfix_flow_exporter_set_options(&node->exporter, options)) {
+ if (!dpif_ipfix_flow_exporter_set_options(&node->exporter,
+ options,
+ &entry_changed)) {
remove_flow_exporter(di, node);
}
+ feo_changed = entry_changed ? true : feo_changed;
options++;
}
@@ -1117,10 +1132,12 @@ dpif_ipfix_set_options(
}
if (i == n_flow_exporters_options) { /* Not found. */
remove_flow_exporter(di, node);
+ feo_changed = true;
}
}
ovs_mutex_unlock(&mutex);
+ return beo_changed || feo_changed;
}
struct dpif_ipfix *
diff --git a/ofproto/ofproto-dpif-ipfix.h b/ofproto/ofproto-dpif-ipfix.h
index 1f42cd5275..75c0ab81ac 100644
--- a/ofproto/ofproto-dpif-ipfix.h
+++ b/ofproto/ofproto-dpif-ipfix.h
@@ -48,7 +48,7 @@ bool dpif_ipfix_get_bridge_exporter_output_sampling(const struct dpif_ipfix *);
bool dpif_ipfix_get_flow_exporter_tunnel_sampling(const struct dpif_ipfix *,
const uint32_t);
bool dpif_ipfix_is_tunnel_port(const struct dpif_ipfix *, odp_port_t);
-void dpif_ipfix_set_options(
+bool dpif_ipfix_set_options(
struct dpif_ipfix *,
const struct ofproto_ipfix_bridge_exporter_options *,
const struct ofproto_ipfix_flow_exporter_options *, size_t);
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index 864c136b5d..0f4a61ac6b 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -468,7 +468,8 @@ sflow_choose_agent_address(const char *agent_device,
const char *target;
SSET_FOR_EACH (target, targets) {
struct sockaddr_storage ss;
- if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss, true)) {
+ if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT,
+ &ss, true, NULL)) {
/* sFlow only supports target in default routing table with
* packet mark zero.
*/
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 1c9c720f04..57f94df544 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -2971,11 +2971,11 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
}
ds_put_char(&ds, '\n');
- for (i = 0; i < n_revalidators; i++) {
+ for (i = 0; i < udpif->n_revalidators; i++) {
struct revalidator *revalidator = &udpif->revalidators[i];
int j, elements = 0;
- for (j = i; j < N_UMAPS; j += n_revalidators) {
+ for (j = i; j < N_UMAPS; j += udpif->n_revalidators) {
elements += cmap_count(&udpif->ukeys[j].cmap);
}
ds_put_format(&ds, " %u: (keys %d)\n", revalidator->id, elements);
diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c
index dcc91cb380..9224ee2e6d 100644
--- a/ofproto/ofproto-dpif-xlate-cache.c
+++ b/ofproto/ofproto-dpif-xlate-cache.c
@@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
{
switch (entry->type) {
case XC_TABLE:
+ ofproto_unref(&(entry->table.ofproto->up));
break;
case XC_RULE:
ofproto_rule_unref(&entry->rule->up);
@@ -231,6 +232,7 @@ xlate_cache_clear_entry(struct xc_entry *entry)
free(entry->learn.ofm);
break;
case XC_NORMAL:
+ ofproto_unref(&(entry->normal.ofproto->up));
break;
case XC_FIN_TIMEOUT:
/* 'u.fin.rule' is always already held as a XC_RULE, which
diff --git a/ofproto/ofproto-dpif-xlate-cache.h b/ofproto/ofproto-dpif-xlate-cache.h
index 114aff8ea3..0fc6d2ea60 100644
--- a/ofproto/ofproto-dpif-xlate-cache.h
+++ b/ofproto/ofproto-dpif-xlate-cache.h
@@ -61,9 +61,8 @@ enum xc_type {
* that a flow relates to, although they may be used for other effects as well
* (for instance, refreshing hard timeouts for learned flows).
*
- * An explicit reference is taken to all pointers other than the ones for
- * struct ofproto_dpif. ofproto_dpif pointers are explicitly protected by
- * destroying all xlate caches before the ofproto is destroyed. */
+ * An explicit reference is taken to all pointers.
+ */
struct xc_entry {
enum xc_type type;
union {
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index a426fcfeb6..b8886105df 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -460,7 +460,7 @@ static void xlate_commit_actions(struct xlate_ctx *ctx);
static void
patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
- struct xport *out_dev);
+ struct xport *out_dev, bool is_last_action);
static void
ctx_trigger_freeze(struct xlate_ctx *ctx)
@@ -865,7 +865,7 @@ xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge)
ovs_list_init(&xbridge->xbundles);
hmap_init(&xbridge->xports);
hmap_insert(&xcfg->xbridges, &xbridge->hmap_node,
- hash_pointer(xbridge->ofproto, 0));
+ uuid_hash(&xbridge->ofproto->uuid));
}
static void
@@ -1639,7 +1639,7 @@ xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto)
xbridges = &xcfg->xbridges;
- HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0),
+ HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, uuid_hash(&ofproto->uuid),
xbridges) {
if (xbridge->ofproto == ofproto) {
return xbridge;
@@ -1661,6 +1661,23 @@ xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid)
return NULL;
}
+struct ofproto_dpif *
+xlate_ofproto_lookup(const struct uuid *uuid)
+{
+ struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
+ struct xbridge *xbridge;
+
+ if (!xcfg) {
+ return NULL;
+ }
+
+ xbridge = xbridge_lookup_by_uuid(xcfg, uuid);
+ if (xbridge != NULL) {
+ return xbridge->ofproto;
+ }
+ return NULL;
+}
+
static struct xbundle *
xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle)
{
@@ -2125,9 +2142,14 @@ mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle,
int snaplen;
/* Get the details of the mirror represented by the rightmost 1-bit. */
- ovs_assert(mirror_get(xbridge->mbridge, raw_ctz(mirrors),
- &vlans, &dup_mirrors,
- &out, &snaplen, &out_vlan));
+ if (OVS_UNLIKELY(!mirror_get(xbridge->mbridge, raw_ctz(mirrors),
+ &vlans, &dup_mirrors,
+ &out, &snaplen, &out_vlan))) {
+ /* The mirror got reconfigured before we got to read it's
+ * configuration. */
+ mirrors = zero_rightmost_1bit(mirrors);
+ continue;
+ }
/* If this mirror selects on the basis of VLAN, and it does not select
@@ -3015,7 +3037,7 @@ xlate_normal(struct xlate_ctx *ctx)
bool is_grat_arp = is_gratuitous_arp(flow, wc);
if (ctx->xin->allow_side_effects
&& flow->packet_type == htonl(PT_ETH)
- && in_port->pt_mode != NETDEV_PT_LEGACY_L3
+ && in_port && in_port->pt_mode != NETDEV_PT_LEGACY_L3
) {
update_learning_table(ctx, in_xbundle, flow->dl_src, vlan,
is_grat_arp);
@@ -3024,12 +3046,14 @@ xlate_normal(struct xlate_ctx *ctx)
struct xc_entry *entry;
/* Save just enough info to update mac learning table later. */
- entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
- entry->normal.ofproto = ctx->xbridge->ofproto;
- entry->normal.in_port = flow->in_port.ofp_port;
- entry->normal.dl_src = flow->dl_src;
- entry->normal.vlan = vlan;
- entry->normal.is_gratuitous_arp = is_grat_arp;
+ if (ofproto_try_ref(&ctx->xbridge->ofproto->up)) {
+ entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
+ entry->normal.ofproto = ctx->xbridge->ofproto;
+ entry->normal.in_port = flow->in_port.ofp_port;
+ entry->normal.dl_src = flow->dl_src;
+ entry->normal.vlan = vlan;
+ entry->normal.is_gratuitous_arp = is_grat_arp;
+ }
}
/* Determine output bundle. */
@@ -3048,7 +3072,6 @@ xlate_normal(struct xlate_ctx *ctx)
*/
ctx->xout->slow |= SLOW_ACTION;
- memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
if (mcast_snooping_is_membership(flow->tp_src) ||
mcast_snooping_is_query(flow->tp_src)) {
if (ctx->xin->allow_side_effects && ctx->xin->packet) {
@@ -3272,7 +3295,9 @@ compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port)
struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
odp_port_t tunnel_out_port = ODPP_NONE;
- if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
+ if (!ipfix ||
+ (output_odp_port == ODPP_NONE &&
+ ctx->xin->flow.in_port.ofp_port == OFPP_NONE)) {
return;
}
@@ -3521,6 +3546,9 @@ propagate_tunnel_data_to_flow__(struct flow *dst_flow,
dst_flow->dl_dst = dmac;
dst_flow->dl_src = smac;
+ /* Clear VLAN entries which do not apply for tunnel flows. */
+ memset(dst_flow->vlans, 0, sizeof dst_flow->vlans);
+
dst_flow->packet_type = htonl(PT_ETH);
dst_flow->nw_dst = src_flow->tunnel.ip_dst;
dst_flow->nw_src = src_flow->tunnel.ip_src;
@@ -3598,7 +3626,7 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac,
static int
native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
const struct flow *flow, odp_port_t tunnel_odp_port,
- bool truncate)
+ bool truncate, bool is_last_action)
{
struct netdev_tnl_build_header_params tnl_params;
struct ovs_action_push_tnl tnl_push_data;
@@ -3728,7 +3756,7 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
entry->tunnel_hdr.hdr_size = tnl_push_data.header_len;
entry->tunnel_hdr.operation = ADD;
- patch_port_output(ctx, xport, out_dev);
+ patch_port_output(ctx, xport, out_dev, is_last_action);
/* Similar to the stats update in revalidation, the x_cache entries
* are populated by the previous translation are used to update the
@@ -3822,7 +3850,7 @@ xlate_flow_is_protected(const struct xlate_ctx *ctx, const struct flow *flow, co
*/
static void
patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
- struct xport *out_dev)
+ struct xport *out_dev, bool is_last_action)
{
struct flow *flow = &ctx->xin->flow;
struct flow old_flow = ctx->xin->flow;
@@ -3864,8 +3892,9 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
if (!process_special(ctx, out_dev) && may_receive(out_dev, ctx)) {
if (xport_stp_forward_state(out_dev) &&
xport_rstp_forward_state(out_dev)) {
+
xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true,
- false, true, clone_xlate_actions);
+ false, is_last_action, clone_xlate_actions);
if (!ctx->freezing) {
xlate_action_set(ctx);
}
@@ -3880,7 +3909,7 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
mirror_mask_t old_mirrors2 = ctx->mirrors;
xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true,
- false, true, clone_xlate_actions);
+ false, is_last_action, clone_xlate_actions);
ctx->mirrors = old_mirrors2;
ctx->base_flow = old_base_flow;
ctx->odp_actions->size = old_size;
@@ -4097,7 +4126,21 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow,
(flow->dl_type == htons(ETH_TYPE_ARP) ||
flow->nw_proto == IPPROTO_ICMPV6) &&
is_neighbor_reply_correct(ctx, flow)) {
- tnl_neigh_snoop(flow, wc, ctx->xbridge->name);
+ tnl_neigh_snoop(flow, wc, ctx->xbridge->name,
+ ctx->xin->allow_side_effects);
+ } else if (*tnl_port != ODPP_NONE &&
+ ctx->xin->allow_side_effects &&
+ dl_type_is_ip_any(flow->dl_type)) {
+ struct eth_addr mac = flow->dl_src;
+ struct in6_addr s_ip6;
+
+ if (flow->dl_type == htons(ETH_TYPE_IP)) {
+ in6_addr_set_mapped_ipv4(&s_ip6, flow->nw_src);
+ } else {
+ s_ip6 = flow->ipv6_src;
+ }
+
+ tnl_neigh_set(ctx->xbridge->name, &s_ip6, mac);
}
}
@@ -4107,7 +4150,7 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow,
static void
compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
const struct xlate_bond_recirc *xr, bool check_stp,
- bool is_last_action OVS_UNUSED, bool truncate)
+ bool is_last_action, bool truncate)
{
const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
struct flow_wildcards *wc = ctx->wc;
@@ -4137,6 +4180,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
if (xport->pt_mode == NETDEV_PT_LEGACY_L3) {
flow->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE,
ntohs(flow->dl_type));
+ if (ctx->pending_encap) {
+ /* The Ethernet header was not actually added yet. */
+ ctx->pending_encap = false;
+ }
}
}
@@ -4144,7 +4191,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
if (truncate) {
xlate_report_error(ctx, "Cannot truncate output to patch port");
}
- patch_port_output(ctx, xport, xport->peer);
+ patch_port_output(ctx, xport, xport->peer, is_last_action);
return;
}
@@ -4239,7 +4286,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
xr->recirc_id);
} else if (is_native_tunnel) {
/* Output to native tunnel port. */
- native_tunnel_output(ctx, xport, flow, odp_port, truncate);
+ native_tunnel_output(ctx, xport, flow, odp_port, truncate,
+ is_last_action);
flow->tunnel = flow_tnl; /* Restore tunnel metadata */
} else if (terminate_native_tunnel(ctx, flow, wc,
@@ -6177,11 +6225,32 @@ static void
compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
bool is_last_action)
{
- ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label;
- uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark;
- size_t ct_offset;
uint16_t zone;
+ if (ofc->zone_src.field) {
+ union mf_subvalue value;
+ memset(&value, 0xff, sizeof(value));
+
+ zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow);
+ if (ctx->xin->frozen_state) {
+ /* If the upcall is a resume of a recirculation, we only need to
+ * unwildcard the fields that are not in the frozen_metadata, as
+ * when the rules update, OVS will generate a new recirc_id,
+ * which will invalidate the megaflow with old the recirc_id.
+ */
+ if (!mf_is_frozen_metadata(ofc->zone_src.field)) {
+ mf_write_subfield_flow(&ofc->zone_src, &value,
+ &ctx->wc->masks);
+ }
+ } else {
+ mf_write_subfield_flow(&ofc->zone_src, &value, &ctx->wc->masks);
+ }
+ } else {
+ zone = ofc->zone_imm;
+ }
+ size_t ct_offset;
+ ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label;
+ uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark;
/* Ensure that any prior actions are applied before composing the new
* conntrack action. */
xlate_commit_actions(ctx);
@@ -6193,11 +6262,6 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx,
is_last_action, false);
- if (ofc->zone_src.field) {
- zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow);
- } else {
- zone = ofc->zone_imm;
- }
ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT);
if (ofc->flags & NX_CT_F_COMMIT) {
@@ -6333,6 +6397,7 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx,
* then ctx->exit would be true. Reset to false so that we can
* do flow translation for 'IF_LESS_EQUAL' case. finish_freezing()
* would have taken care of Undoing the changes done for freeze. */
+ bool old_exit = ctx->exit;
ctx->exit = false;
offset_attr = nl_msg_start_nested(
@@ -6357,7 +6422,7 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx,
ctx->was_mpls = old_was_mpls;
ctx->conntracked = old_conntracked;
ctx->xin->flow = old_flow;
- ctx->exit = true;
+ ctx->exit = old_exit;
}
static void
@@ -6738,13 +6803,14 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
return;
}
+ bool exit = false;
OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
struct ofpact_controller *controller;
const struct ofpact_metadata *metadata;
const struct ofpact_set_field *set_field;
const struct mf_field *mf;
bool last = is_last_action && ofpact_last(a, ofpacts, ofpacts_len)
- && ctx->action_set.size;
+ && !ctx->action_set.size;
if (ctx->error) {
break;
@@ -6752,7 +6818,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
recirc_for_mpls(a, ctx);
- if (ctx->exit) {
+ if (ctx->exit || exit) {
/* Check if need to store the remaining actions for later
* execution. */
if (ctx->freezing) {
@@ -7149,17 +7215,18 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
break;
case OFPACT_CHECK_PKT_LARGER: {
- if (last) {
- /* If this is last action, then there is no need to
- * translate the action. */
- break;
- }
const struct ofpact *remaining_acts = ofpact_next(a);
size_t remaining_acts_len = ofpact_remaining_len(remaining_acts,
ofpacts,
ofpacts_len);
xlate_check_pkt_larger(ctx, ofpact_get_CHECK_PKT_LARGER(a),
remaining_acts, remaining_acts_len);
+ if (ctx->xbridge->support.check_pkt_len) {
+ /* If datapath supports check_pkt_len, then
+ * xlate_check_pkt_larger() does the translation for the
+ * ofpacts following 'a'. */
+ exit = true;
+ }
break;
}
}
@@ -7623,6 +7690,12 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
goto exit;
}
+ if (!xin->frozen_state
+ && xin->flow.ct_state
+ && xin->flow.ct_state & CS_TRACKED) {
+ ctx.conntracked = true;
+ }
+
/* Tunnel metadata in udpif format must be normalized before translation. */
if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) {
const struct tun_table *tun_tab = ofproto_get_tun_tab(
diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h
index 851088d794..2ba90e999c 100644
--- a/ofproto/ofproto-dpif-xlate.h
+++ b/ofproto/ofproto-dpif-xlate.h
@@ -176,6 +176,7 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name, struct dpif *,
bool forward_bpdu, bool has_in_band,
const struct dpif_backer_support *support);
void xlate_remove_ofproto(struct ofproto_dpif *);
+struct ofproto_dpif *xlate_ofproto_lookup(const struct uuid *uuid);
void xlate_bundle_set(struct ofproto_dpif *, struct ofbundle *,
const char *name, enum port_vlan_mode,
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index cba49a99e1..59eae88d87 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -215,10 +215,6 @@ struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
static struct hmap all_ofproto_dpifs_by_name =
HMAP_INITIALIZER(&all_ofproto_dpifs_by_name);
-/* All existing ofproto_dpif instances, indexed by ->uuid. */
-static struct hmap all_ofproto_dpifs_by_uuid =
- HMAP_INITIALIZER(&all_ofproto_dpifs_by_uuid);
-
static bool ofproto_use_tnl_push_pop = true;
static void ofproto_unixctl_init(void);
static void ct_zone_config_init(struct dpif_backer *backer);
@@ -1682,9 +1678,6 @@ construct(struct ofproto *ofproto_)
hmap_insert(&all_ofproto_dpifs_by_name,
&ofproto->all_ofproto_dpifs_by_name_node,
hash_string(ofproto->up.name, 0));
- hmap_insert(&all_ofproto_dpifs_by_uuid,
- &ofproto->all_ofproto_dpifs_by_uuid_node,
- uuid_hash(&ofproto->uuid));
memset(&ofproto->stats, 0, sizeof ofproto->stats);
ofproto_init_tables(ofproto_, N_TABLES);
@@ -1782,8 +1775,6 @@ destruct(struct ofproto *ofproto_, bool del)
hmap_remove(&all_ofproto_dpifs_by_name,
&ofproto->all_ofproto_dpifs_by_name_node);
- hmap_remove(&all_ofproto_dpifs_by_uuid,
- &ofproto->all_ofproto_dpifs_by_uuid_node);
OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
CLS_FOR_EACH (rule, up.cr, &table->cls) {
@@ -1819,6 +1810,8 @@ destruct(struct ofproto *ofproto_, bool del)
seq_destroy(ofproto->ams_seq);
+ /* Wait for all the meter destroy work to finish. */
+ ovsrcu_barrier();
close_dpif_backer(ofproto->backer, del);
}
@@ -2308,6 +2301,7 @@ set_ipfix(
struct dpif_ipfix *di = ofproto->ipfix;
bool has_options = bridge_exporter_options || flow_exporters_options;
bool new_di = false;
+ bool options_changed = false;
if (has_options && !di) {
di = ofproto->ipfix = dpif_ipfix_create();
@@ -2317,7 +2311,7 @@ set_ipfix(
if (di) {
/* Call set_options in any case to cleanly flush the flow
* caches in the last exporters that are to be destroyed. */
- dpif_ipfix_set_options(
+ options_changed = dpif_ipfix_set_options(
di, bridge_exporter_options, flow_exporters_options,
n_flow_exporters_options);
@@ -2333,6 +2327,10 @@ set_ipfix(
dpif_ipfix_unref(di);
ofproto->ipfix = NULL;
}
+
+ if (new_di || options_changed) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
}
return 0;
@@ -4433,12 +4431,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig);
}
if (xcache) {
- struct xc_entry *entry;
+ if (ofproto_try_ref(&ofproto->up)) {
+ struct xc_entry *entry;
- entry = xlate_cache_add_entry(xcache, XC_TABLE);
- entry->table.ofproto = ofproto;
- entry->table.id = *table_id;
- entry->table.match = true;
+ entry = xlate_cache_add_entry(xcache, XC_TABLE);
+ entry->table.ofproto = ofproto;
+ entry->table.id = *table_id;
+ entry->table.match = true;
+ }
}
return rule;
}
@@ -4469,12 +4469,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
stats->n_packets, &orig);
}
if (xcache) {
- struct xc_entry *entry;
+ if (ofproto_try_ref(&ofproto->up)) {
+ struct xc_entry *entry;
- entry = xlate_cache_add_entry(xcache, XC_TABLE);
- entry->table.ofproto = ofproto;
- entry->table.id = next_id;
- entry->table.match = (rule != NULL);
+ entry = xlate_cache_add_entry(xcache, XC_TABLE);
+ entry->table.ofproto = ofproto;
+ entry->table.id = next_id;
+ entry->table.match = (rule != NULL);
+ }
}
if (rule) {
goto out; /* Match. */
@@ -5556,6 +5558,7 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id,
ct_timeout_policy_unref(backer, ct_zone->ct_tp);
ct_zone->ct_tp = ct_tp;
ct_tp->ref_count++;
+ backer->need_revalidate = REV_RECONFIGURE;
}
} else {
struct ct_zone *new_ct_zone = ct_zone_alloc(zone_id);
@@ -5563,6 +5566,7 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id,
cmap_insert(&backer->ct_zones, &new_ct_zone->node,
hash_int(zone_id, 0));
ct_tp->ref_count++;
+ backer->need_revalidate = REV_RECONFIGURE;
}
}
@@ -5579,6 +5583,7 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id)
if (ct_zone) {
ct_timeout_policy_unref(backer, ct_zone->ct_tp);
ct_zone_remove_and_destroy(backer, ct_zone);
+ backer->need_revalidate = REV_RECONFIGURE;
}
}
@@ -5779,15 +5784,7 @@ ofproto_dpif_lookup_by_name(const char *name)
struct ofproto_dpif *
ofproto_dpif_lookup_by_uuid(const struct uuid *uuid)
{
- struct ofproto_dpif *ofproto;
-
- HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node,
- uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) {
- if (uuid_equals(&ofproto->uuid, uuid)) {
- return ofproto;
- }
- }
- return NULL;
+ return xlate_ofproto_lookup(uuid);
}
static void
@@ -6496,6 +6493,7 @@ ofproto_unixctl_dpif_show_dp_features(struct unixctl_conn *conn,
dpif_show_support(&ofproto->backer->bt_support, &ds);
unixctl_command_reply(conn, ds_cstr(&ds));
+ ds_destroy(&ds);
}
static void
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 57c7d17cb2..47e96e62e1 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -66,6 +66,7 @@ struct bfd_cfg;
struct meter;
struct ofoperation;
struct ofproto_packet_out;
+struct rule_collection;
struct smap;
extern struct ovs_mutex ofproto_mutex;
@@ -115,6 +116,9 @@ struct ofproto {
/* List of expirable flows, in all flow tables. */
struct ovs_list expirable OVS_GUARDED_BY(ofproto_mutex);
+ /* List of flows to remove from flow tables. */
+ struct rule_collection *to_remove OVS_GUARDED_BY(ofproto_mutex);
+
/* Meter table. */
struct ofputil_meter_features meter_features;
struct hmap meters; /* uint32_t indexed 'struct meter *'. */
@@ -139,6 +143,8 @@ struct ofproto {
/* Variable length mf_field mapping. Stores all configured variable length
* meta-flow fields (struct mf_field) in a switch. */
struct vl_mff_map vl_mff_map;
+ /* refcount to this ofproto, held by rule/group/xlate_caches */
+ struct ovs_refcount refcount;
};
void ofproto_init_tables(struct ofproto *, int n_tables);
@@ -1962,6 +1968,7 @@ struct ofproto_flow_mod {
bool modify_may_add_flow;
bool modify_keep_counts;
enum nx_flow_update_event event;
+ uint8_t table_id;
/* These are only used during commit execution.
* ofproto_flow_mod_uninit() does NOT clean these up. */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index bd6103b1c8..7e09a588a2 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -213,6 +213,8 @@ static void ofproto_rule_insert__(struct ofproto *, struct rule *)
OVS_REQUIRES(ofproto_mutex);
static void ofproto_rule_remove__(struct ofproto *, struct rule *)
OVS_REQUIRES(ofproto_mutex);
+static void remove_rules_postponed(struct rule_collection *)
+ OVS_REQUIRES(ofproto_mutex);
/* The source of an OpenFlow request.
*
@@ -530,6 +532,8 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
hindex_init(&ofproto->cookies);
hmap_init(&ofproto->learned_cookies);
ovs_list_init(&ofproto->expirable);
+ ofproto->to_remove = xzalloc(sizeof *ofproto->to_remove);
+ rule_collection_init(ofproto->to_remove);
ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name);
ofproto->min_mtu = INT_MAX;
cmap_init(&ofproto->groups);
@@ -545,6 +549,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
ovs_mutex_init(&ofproto->vl_mff_map.mutex);
cmap_init(&ofproto->vl_mff_map.cmap);
+ ovs_refcount_init(&ofproto->refcount);
error = ofproto->ofproto_class->construct(ofproto);
if (error) {
@@ -1631,6 +1636,7 @@ ofproto_flush__(struct ofproto *ofproto, bool del)
}
ofproto_group_delete_all__(ofproto);
meter_delete_all(ofproto);
+ remove_rules_postponed(ofproto->to_remove);
/* XXX: Concurrent handler threads may insert new learned flows based on
* learn actions of the now deleted flows right after we release
* 'ofproto_mutex'. */
@@ -1682,12 +1688,41 @@ ofproto_destroy__(struct ofproto *ofproto)
ovs_assert(hmap_is_empty(&ofproto->learned_cookies));
hmap_destroy(&ofproto->learned_cookies);
+ ovs_mutex_lock(&ofproto_mutex);
+ rule_collection_destroy(ofproto->to_remove);
+ free(ofproto->to_remove);
+ ovs_mutex_unlock(&ofproto_mutex);
+
ofproto->ofproto_class->dealloc(ofproto);
}
-/* Destroying rules is doubly deferred, must have 'ofproto' around for them.
- * - 1st we defer the removal of the rules from the classifier
- * - 2nd we defer the actual destruction of the rules. */
+/*
+ * Rule destruction requires ofproto to remain accessible.
+ * Depending on the rule destruction call (shown in below), it can take several
+ * RCU grace periods before the ofproto reference is not needed anymore.
+ * The ofproto destruction callback is thus protected by a refcount,
+ * and such destruction is itself deferred.
+ *
+ * remove_rules_postponed (one grace period)
+ * -> remove_rule_rcu
+ * -> remove_rule_rcu__
+ * -> ofproto_rule_unref -> ref count != 1
+ * -> ... more grace periods.
+ * -> rule_destroy_cb (> 2 grace periods)
+ * -> free
+ *
+ * NOTE: The original ofproto destruction is only deferred by two grace
+ * periods to keep ofproto accessible. By using refcount together the
+ * destruction can be deferred for longer time. Now ofproto has 3 states:
+ *
+ * state 1: alive, with refcount >= 1
+ * state 2: dying, with refcount == 0, however pointer is valid
+ * state 3: died, memory freed, pointer might be dangling.
+ *
+ * We only need to add refcount to certain objects whose destruction can
+ * take several RCU grace periods (rule, group, xlate_cache). Other
+ * references to ofproto must be cleared before the 2 RCU grace periods.
+ */
static void
ofproto_destroy_defer__(struct ofproto *ofproto)
OVS_EXCLUDED(ofproto_mutex)
@@ -1695,6 +1730,26 @@ ofproto_destroy_defer__(struct ofproto *ofproto)
ovsrcu_postpone(ofproto_destroy__, ofproto);
}
+void
+ofproto_ref(struct ofproto *ofproto)
+{
+ ovs_refcount_ref(&ofproto->refcount);
+}
+
+bool
+ofproto_try_ref(struct ofproto *ofproto)
+{
+ return ovs_refcount_try_ref_rcu(&ofproto->refcount);
+}
+
+void
+ofproto_unref(struct ofproto *ofproto)
+{
+ if (ofproto && ovs_refcount_unref(&ofproto->refcount) == 1) {
+ ovsrcu_postpone(ofproto_destroy_defer__, ofproto);
+ }
+}
+
void
ofproto_destroy(struct ofproto *p, bool del)
OVS_EXCLUDED(ofproto_mutex)
@@ -1726,8 +1781,7 @@ ofproto_destroy(struct ofproto *p, bool del)
p->connmgr = NULL;
ovs_mutex_unlock(&ofproto_mutex);
- /* Destroying rules is deferred, must have 'ofproto' around for them. */
- ovsrcu_postpone(ofproto_destroy_defer__, p);
+ ofproto_unref(p);
}
/* Destroys the datapath with the respective 'name' and 'type'. With the Linux
@@ -1878,6 +1932,9 @@ ofproto_run(struct ofproto *p)
connmgr_run(p->connmgr, handle_openflow);
+ ovs_mutex_lock(&ofproto_mutex);
+ remove_rules_postponed(p->to_remove);
+ ovs_mutex_unlock(&ofproto_mutex);
return error;
}
@@ -2916,6 +2973,9 @@ ofproto_rule_destroy__(struct rule *rule)
cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr));
rule_actions_destroy(rule_get_actions(rule));
ovs_mutex_destroy(&rule->mutex);
+ /* ofproto_unref() must be called first. It is possible because ofproto
+ * destruction is deferred by an RCU grace period. */
+ ofproto_unref(rule->ofproto);
rule->ofproto->ofproto_class->rule_dealloc(rule);
}
@@ -3056,6 +3116,9 @@ group_destroy_cb(struct ofgroup *group)
&group->props));
ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
&group->buckets));
+ /* ofproto_unref() must be called first. It is possible because ofproto
+ * destruction is deferred by an RCU grace period. */
+ ofproto_unref(group->ofproto);
group->ofproto->ofproto_class->group_dealloc(group);
}
@@ -4437,6 +4500,20 @@ rule_criteria_destroy(struct rule_criteria *criteria)
criteria->version = OVS_VERSION_NOT_REMOVED; /* Mark as destroyed. */
}
+/* Adds rules to the 'to_remove' collection, so they can be destroyed
+ * later all together. Destroys 'rules'. */
+static void
+rules_mark_for_removal(struct ofproto *ofproto, struct rule_collection *rules)
+ OVS_REQUIRES(ofproto_mutex)
+{
+ struct rule *rule;
+
+ RULE_COLLECTION_FOR_EACH (rule, rules) {
+ rule_collection_add(ofproto->to_remove, rule);
+ }
+ rule_collection_destroy(rules);
+}
+
/* Schedules postponed removal of rules, destroys 'rules'. */
static void
remove_rules_postponed(struct rule_collection *rules)
@@ -5244,10 +5321,15 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr,
struct rule *rule;
enum ofperr error;
+ if (!ofproto_try_ref(ofproto)) {
+ return OFPERR_OFPFMFC_UNKNOWN;
+ }
+
/* Allocate new rule. */
rule = ofproto->ofproto_class->rule_alloc();
if (!rule) {
cls_rule_destroy(cr);
+ ofproto_unref(ofproto);
VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name);
return OFPERR_OFPFMFC_UNKNOWN;
}
@@ -5833,7 +5915,7 @@ modify_flows_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
}
}
learned_cookies_flush(ofproto, &dead_cookies);
- remove_rules_postponed(old_rules);
+ rules_mark_for_removal(ofproto, old_rules);
}
return error;
@@ -5941,7 +6023,7 @@ delete_flows_finish__(struct ofproto *ofproto,
learned_cookies_dec(ofproto, rule_get_actions(rule),
&dead_cookies);
}
- remove_rules_postponed(rules);
+ rules_mark_for_removal(ofproto, rules);
learned_cookies_flush(ofproto, &dead_cookies);
}
@@ -7312,8 +7394,13 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
return OFPERR_OFPGMFC_BAD_TYPE;
}
+ if (!ofproto_try_ref(ofproto)) {
+ return OFPERR_OFPFMFC_UNKNOWN;
+ }
+
*ofgroup = ofproto->ofproto_class->group_alloc();
if (!*ofgroup) {
+ ofproto_unref(ofproto);
VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name);
return OFPERR_OFPGMFC_OUT_OF_GROUPS;
}
@@ -7350,6 +7437,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm,
&(*ofgroup)->props));
ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *,
&(*ofgroup)->buckets));
+ ofproto_unref(ofproto);
ofproto->ofproto_class->group_dealloc(*ofgroup);
}
return error;
@@ -7967,6 +8055,7 @@ ofproto_flow_mod_init(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
ofm->criteria.version = OVS_VERSION_NOT_REMOVED;
ofm->conjs = NULL;
ofm->n_conjs = 0;
+ ofm->table_id = fm->table_id;
bool check_buffer_id = false;
@@ -8104,6 +8193,33 @@ ofproto_flow_mod_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
return error;
}
+static void
+ofproto_table_classifier_defer(struct ofproto *ofproto,
+ const struct ofproto_flow_mod *ofm)
+{
+ if (check_table_id(ofproto, ofm->table_id)) {
+ if (ofm->table_id == OFPTT_ALL) {
+ struct oftable *table;
+
+ OFPROTO_FOR_EACH_TABLE (table, ofproto) {
+ classifier_defer(&table->cls);
+ }
+ } else {
+ classifier_defer(&ofproto->tables[ofm->table_id].cls);
+ }
+ }
+}
+
+static void
+ofproto_publish_classifiers(struct ofproto *ofproto)
+{
+ struct oftable *table;
+
+ OFPROTO_FOR_EACH_TABLE (table, ofproto) {
+ classifier_publish(&table->cls);
+ }
+}
+
/* Commit phases (all while locking ofproto_mutex):
*
* 1. Begin: Gather resources and make changes visible in the next version.
@@ -8165,6 +8281,10 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
/* Store the version in which the changes should take
* effect. */
be->ofm.version = version;
+ /* Publishing of the classifier update for every flow
+ * modification in a bundle separately is expensive in
+ * CPU time and memory. Deferring. */
+ ofproto_table_classifier_defer(ofproto, &be->ofm);
error = ofproto_flow_mod_start(ofproto, &be->ofm);
} else if (be->type == OFPTYPE_GROUP_MOD) {
/* Store the version in which the changes should take
@@ -8173,6 +8293,9 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
error = ofproto_group_mod_start(ofproto, &be->ogm);
} else if (be->type == OFPTYPE_PACKET_OUT) {
be->opo.version = version;
+ /* Need to use current version of flows for packet-out,
+ * so publishing all classifiers now. */
+ ofproto_publish_classifiers(ofproto);
error = ofproto_packet_out_start(ofproto, &be->opo);
} else {
OVS_NOT_REACHED();
@@ -8183,6 +8306,9 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
}
}
+ /* Publishing all changes made to classifiers. */
+ ofproto_publish_classifiers(ofproto);
+
if (error) {
/* Send error referring to the original message. */
ofconn_send_error(ofconn, be->msg, error);
@@ -8191,14 +8317,23 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
/* 2. Revert. Undo all the changes made above. */
LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) {
if (be->type == OFPTYPE_FLOW_MOD) {
+ /* Publishing of the classifier update for every flow
+ * modification in a bundle separately is expensive in
+ * CPU time and memory. Deferring. */
+ ofproto_table_classifier_defer(ofproto, &be->ofm);
ofproto_flow_mod_revert(ofproto, &be->ofm);
} else if (be->type == OFPTYPE_GROUP_MOD) {
ofproto_group_mod_revert(ofproto, &be->ogm);
} else if (be->type == OFPTYPE_PACKET_OUT) {
+ /* Need to use current version of flows for packet-out,
+ * so publishing all classifiers now. */
+ ofproto_publish_classifiers(ofproto);
ofproto_packet_out_revert(ofproto, &be->opo);
}
/* Nothing needs to be reverted for a port mod. */
}
+ /* Publishing all changes made to classifiers. */
+ ofproto_publish_classifiers(ofproto);
} else {
/* 4. Finish. */
LIST_FOR_EACH (be, node, &bundle->msg_list) {
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index b0262da2df..4e15167ab7 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -563,6 +563,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *,
enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *,
uint8_t table_id);
+void ofproto_ref(struct ofproto *);
+void ofproto_unref(struct ofproto *);
+bool ofproto_try_ref(struct ofproto *);
+
#ifdef __cplusplus
}
#endif
diff --git a/ovsdb/file.c b/ovsdb/file.c
index 59220824fa..ca80c28235 100644
--- a/ovsdb/file.c
+++ b/ovsdb/file.c
@@ -113,19 +113,17 @@ ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting,
if (row_contains_diff
&& !ovsdb_datum_is_default(&row->fields[column->index],
&column->type)) {
- struct ovsdb_datum new_datum;
-
- error = ovsdb_datum_apply_diff(&new_datum,
+ error = ovsdb_datum_apply_diff_in_place(
&row->fields[column->index],
&datum, &column->type);
ovsdb_datum_destroy(&datum, &column->type);
if (error) {
return error;
}
- ovsdb_datum_swap(&datum, &new_datum);
+ } else {
+ ovsdb_datum_swap(&row->fields[column->index], &datum);
+ ovsdb_datum_destroy(&datum, &column->type);
}
- ovsdb_datum_swap(&row->fields[column->index], &datum);
- ovsdb_datum_destroy(&datum, &column->type);
}
return NULL;
@@ -526,6 +524,7 @@ ovsdb_file_read__(const char *filename, bool rw,
error = ovsdb_txn_replay_commit(txn);
if (error) {
+ ovsdb_error_destroy(error);
ovsdb_storage_unread(storage);
break;
}
diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c
index 532dedcb64..ab814cf20e 100644
--- a/ovsdb/monitor.c
+++ b/ovsdb/monitor.c
@@ -1231,6 +1231,15 @@ ovsdb_monitor_get_update(
condition,
ovsdb_monitor_compose_row_update2);
if (!condition || !condition->conditional) {
+ if (json) {
+ struct json *json_serialized;
+
+ /* Pre-serializing the object to avoid doing this
+ * for every client. */
+ json_serialized = json_serialized_object_create(json);
+ json_destroy(json);
+ json = json_serialized;
+ }
ovsdb_monitor_json_cache_insert(dbmon, version, mcs,
json);
}
diff --git a/ovsdb/mutation.c b/ovsdb/mutation.c
index 56edc5f000..03d1c3499e 100644
--- a/ovsdb/mutation.c
+++ b/ovsdb/mutation.c
@@ -383,7 +383,7 @@ ovsdb_mutation_set_execute(struct ovsdb_row *row,
break;
case OVSDB_M_INSERT:
- ovsdb_datum_union(dst, arg, dst_type, false);
+ ovsdb_datum_union(dst, arg, dst_type);
error = ovsdb_mutation_check_count(dst, dst_type);
break;
diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
index 61cded16d3..a2ee10af1b 100755
--- a/ovsdb/ovsdb-idlc.in
+++ b/ovsdb/ovsdb-idlc.in
@@ -551,20 +551,20 @@ static void
print(" smap_init(&row->%s);" % columnName)
print(" for (size_t i = 0; i < datum->n; i++) {")
print(" smap_add(&row->%s," % columnName)
- print(" datum->keys[i].string,")
- print(" datum->values[i].string);")
+ print(" datum->keys[i].s->string,")
+ print(" datum->values[i].s->string);")
print(" }")
elif (type.n_min == 1 and type.n_max == 1) or type.is_optional_pointer():
print("")
print(" if (datum->n >= 1) {")
if not type.key.ref_table:
- print(" %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_string()))
+ print(" %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_rvalue_string()))
else:
print(" %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->keys[0].uuid));" % (keyVar, prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower()))
if valueVar:
if not type.value.ref_table:
- print(" %s = datum->values[0].%s;" % (valueVar, type.value.type.to_string()))
+ print(" %s = datum->values[0].%s;" % (valueVar, type.value.type.to_rvalue_string()))
else:
print(" %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->values[0].uuid));" % (valueVar, prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower()))
print(" } else {")
@@ -592,7 +592,7 @@ static void
""" % (prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower()))
keySrc = "keyRow"
else:
- keySrc = "datum->keys[i].%s" % type.key.type.to_string()
+ keySrc = "datum->keys[i].%s" % type.key.type.to_rvalue_string()
if type.value and type.value.ref_table:
print("""\
struct %s%s *valueRow = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->values[i].uuid));
@@ -602,7 +602,7 @@ static void
""" % (prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower()))
valueSrc = "valueRow"
elif valueVar:
- valueSrc = "datum->values[i].%s" % type.value.type.to_string()
+ valueSrc = "datum->values[i].%s" % type.value.type.to_rvalue_string()
print(" if (!row->n_%s) {" % (columnName))
print(" %s = xmalloc(%s * sizeof *%s);" % (
@@ -910,45 +910,45 @@ void
'args': ', '.join(['%(type)s%(name)s'
% m for m in members])})
if type.n_min == 1 and type.n_max == 1:
- print(" union ovsdb_atom key;")
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
if type.value:
- print(" union ovsdb_atom value;")
+ print(" union ovsdb_atom *value = xmalloc(sizeof *value);")
print("")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar))
if type.value:
- print(" datum.values = &value;")
- print(" "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar))
+ print(" datum.values = value;")
+ print(" " + type.value.copyCValue("value->%s" % type.value.type.to_lvalue_string(), valueVar))
else:
print(" datum.values = NULL;")
- txn_write_func = "ovsdb_idl_txn_write_clone"
+ txn_write_func = "ovsdb_idl_txn_write"
elif type.is_optional_pointer():
- print(" union ovsdb_atom key;")
print("")
print(" if (%s) {" % keyVar)
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
print(" }")
print(" datum.values = NULL;")
- txn_write_func = "ovsdb_idl_txn_write_clone"
+ txn_write_func = "ovsdb_idl_txn_write"
elif type.n_max == 1:
- print(" union ovsdb_atom key;")
print("")
print(" if (%s) {" % nVar)
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
print(" }")
print(" datum.values = NULL;")
- txn_write_func = "ovsdb_idl_txn_write_clone"
+ txn_write_func = "ovsdb_idl_txn_write"
else:
print("")
print(" datum.n = %s;" % nVar)
@@ -958,9 +958,9 @@ void
else:
print(" datum.values = NULL;")
print(" for (size_t i = 0; i < %s; i++) {" % nVar)
- print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar))
+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar))
if type.value:
- print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar))
+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar))
print(" }")
if type.value:
valueType = type.value.toAtomicType()
@@ -996,9 +996,8 @@ void
''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix),
'valtype':column.type.value.to_const_c_type(prefix), 'S': structName.upper(),
'C': columnName.upper(), 't': tableName})
-
- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "new_key"))
- print(" "+ type.value.copyCValue("datum->values[0].%s" % type.value.type.to_string(), "new_value"))
+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "new_key"))
+ print(" " + type.value.copyCValue("datum->values[0].%s" % type.value.type.to_lvalue_string(), "new_value"))
print('''
ovsdb_idl_txn_write_partial_map(&row->header_,
&%(s)s_col_%(c)s,
@@ -1022,8 +1021,7 @@ void
''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix),
'valtype':column.type.value.to_const_c_type(prefix), 'S': structName.upper(),
'C': columnName.upper(), 't': tableName})
-
- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "delete_key"))
+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "delete_key"))
print('''
ovsdb_idl_txn_delete_partial_map(&row->header_,
&%(s)s_col_%(c)s,
@@ -1049,8 +1047,7 @@ void
datum->values = NULL;
''' % {'s': structName, 'c': columnName,
'valtype':column.type.key.to_const_c_type(prefix), 't': tableName})
-
- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "new_value"))
+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "new_value"))
print('''
ovsdb_idl_txn_write_partial_set(&row->header_,
&%(s)s_col_%(c)s,
@@ -1074,8 +1071,7 @@ void
''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix),
'valtype':column.type.key.to_const_c_type(prefix), 'S': structName.upper(),
'C': columnName.upper(), 't': tableName})
-
- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "delete_value"))
+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "delete_value"))
print('''
ovsdb_idl_txn_delete_partial_set(&row->header_,
&%(s)s_col_%(c)s,
@@ -1143,37 +1139,36 @@ void
print(" struct ovsdb_datum datum;")
free = []
if type.n_min == 1 and type.n_max == 1:
- print(" union ovsdb_atom key;")
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
if type.value:
- print(" union ovsdb_atom value;")
+ print(" union ovsdb_atom *value = xmalloc(sizeof *value);")
print("")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar, refTable=False))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar, refTable=False))
if type.value:
- print(" datum.values = &value;")
- print(" "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar, refTable=False))
+ print(" " + type.value.copyCValue("value.%s" % type.value.type.to_lvalue_string(), valueVar, refTable=False))
else:
print(" datum.values = NULL;")
elif type.is_optional_pointer():
- print(" union ovsdb_atom key;")
print("")
print(" if (%s) {" % keyVar)
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar, refTable=False))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar, refTable=False))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
print(" }")
print(" datum.values = NULL;")
elif type.n_max == 1:
- print(" union ovsdb_atom key;")
print("")
print(" if (%s) {" % nVar)
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar, refTable=False))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar, refTable=False))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
@@ -1182,16 +1177,14 @@ void
else:
print(" datum.n = %s;" % nVar)
print(" datum.keys = %s ? xmalloc(%s * sizeof *datum.keys) : NULL;" % (nVar, nVar))
- free += ['datum.keys']
if type.value:
print(" datum.values = xmalloc(%s * sizeof *datum.values);" % nVar)
- free += ['datum.values']
else:
print(" datum.values = NULL;")
print(" for (size_t i = 0; i < %s; i++) {" % nVar)
- print(" " + type.key.assign_c_value_casting_away_const("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar, refTable=False))
+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar, refTable=False))
if type.value:
- print(" " + type.value.assign_c_value_casting_away_const("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar, refTable=False))
+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar, refTable=False))
print(" }")
if type.value:
valueType = type.value.toAtomicType()
@@ -1211,8 +1204,8 @@ void
's': structName,
'S': structName.upper(),
'c': columnName})
- for var in free:
- print(" free(%s);" % var)
+ print(" ovsdb_datum_destroy(&datum, &%(s)s_col_%(c)s.type);" \
+ % {'s': structName, 'c': columnName})
print("}")
# Index table related functions
@@ -1272,7 +1265,7 @@ struct ovsdb_idl_cursor
struct ovsdb_idl_index *index, const struct %(s)s *target)
{
ovs_assert(index->table->class_ == &%(p)stable_%(tl)s);
- return ovsdb_idl_cursor_first_ge(index, &target->header_);
+ return ovsdb_idl_cursor_first_ge(index, target ? &target->header_ : NULL);
}
struct %(s)s *
@@ -1309,8 +1302,8 @@ struct %(s)s *
i = 0;
SMAP_FOR_EACH (node, %(c)s) {
- datum->keys[i].string = node->key;
- datum->values[i].string = node->value;
+ datum->keys[i].s = ovsdb_atom_string_create(node->key);
+ datum->values[i].s = ovsdb_atom_string_create(node->value);
i++;
}
ovsdb_datum_sort_unique(datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING);
@@ -1359,10 +1352,10 @@ struct %(s)s *
print()
print(" datum.n = 1;")
print(" datum.keys = key;")
- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar))
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar))
if type.value:
print(" datum.values = value;")
- print(" "+ type.value.assign_c_value_casting_away_const("value->%s" % type.value.type.to_string(), valueVar))
+ print(" " + type.value.copyCValue("value->%s" % type.value.type.to_lvalue_string(), valueVar))
else:
print(" datum.values = NULL;")
txn_write_func = "ovsdb_idl_index_write"
@@ -1373,7 +1366,7 @@ struct %(s)s *
print(" key = xmalloc(sizeof (union ovsdb_atom));")
print(" datum.n = 1;")
print(" datum.keys = key;")
- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar))
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
@@ -1387,7 +1380,7 @@ struct %(s)s *
print(" key = xmalloc(sizeof(union ovsdb_atom));")
print(" datum.n = 1;")
print(" datum.keys = key;")
- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), "*" + keyVar))
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
@@ -1404,9 +1397,9 @@ struct %(s)s *
else:
print(" datum.values = NULL;")
print(" for (i = 0; i < %s; i++) {" % nVar)
- print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar))
+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar))
if type.value:
- print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar))
+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar))
print(" }")
if type.value:
valueType = type.value.toAtomicType()
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 0b3d2bb714..5772955c92 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -26,6 +26,7 @@
#include "command-line.h"
#include "daemon.h"
#include "dirs.h"
+#include "dns-resolve.h"
#include "openvswitch/dynamic-string.h"
#include "fatal-signal.h"
#include "file.h"
@@ -329,6 +330,7 @@ main(int argc, char *argv[])
service_start(&argc, &argv);
fatal_ignore_sigpipe();
process_init();
+ dns_resolve_init(true);
bool active = false;
parse_options(argc, argv, &db_filenames, &remotes, &unixctl_path,
@@ -511,6 +513,7 @@ main(int argc, char *argv[])
run_command, process_status_msg(status));
}
}
+ dns_resolve_destroy();
perf_counters_destroy();
service_stop();
return 0;
@@ -904,8 +907,8 @@ query_db_string(const struct shash *all_dbs, const char *name,
datum = &row->fields[column->index];
for (i = 0; i < datum->n; i++) {
- if (datum->keys[i].string[0]) {
- return datum->keys[i].string;
+ if (datum->keys[i].s->string[0]) {
+ return datum->keys[i].s->string;
}
}
}
@@ -1018,7 +1021,7 @@ query_db_remotes(const char *name, const struct shash *all_dbs,
datum = &row->fields[column->index];
for (i = 0; i < datum->n; i++) {
- add_remote(remotes, datum->keys[i].string);
+ add_remote(remotes, datum->keys[i].s->string);
}
}
} else if (column->type.key.type == OVSDB_TYPE_UUID
diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c
index 05a0223e71..d4a9e34cc4 100644
--- a/ovsdb/ovsdb-tool.c
+++ b/ovsdb/ovsdb-tool.c
@@ -919,7 +919,8 @@ print_raft_header(const struct raft_header *h,
if (!uuid_is_zero(&h->snap.eid)) {
printf(" prev_eid: %04x\n", uuid_prefix(&h->snap.eid, 4));
}
- print_data("prev_", h->snap.data, schemap, names);
+ print_data("prev_", raft_entry_get_parsed_data(&h->snap),
+ schemap, names);
}
}
@@ -973,11 +974,13 @@ raft_header_to_standalone_log(const struct raft_header *h,
struct ovsdb_log *db_log_data)
{
if (h->snap_index) {
- if (!h->snap.data || json_array(h->snap.data)->n != 2) {
+ const struct json *data = raft_entry_get_parsed_data(&h->snap);
+
+ if (!data || json_array(data)->n != 2) {
ovs_fatal(0, "Incorrect raft header data array length");
}
- struct json_array *pa = json_array(h->snap.data);
+ struct json_array *pa = json_array(data);
struct json *schema_json = pa->elems[0];
struct ovsdb_error *error = NULL;
@@ -1373,7 +1376,7 @@ do_check_cluster(struct ovs_cmdl_context *ctx)
}
struct raft_entry *e = &s->entries[log_idx];
e->term = r->term;
- e->data = r->entry.data;
+ raft_entry_set_parsed_data_nocopy(e, r->entry.data);
e->eid = r->entry.eid;
e->servers = r->entry.servers;
break;
diff --git a/ovsdb/ovsdb-util.c b/ovsdb/ovsdb-util.c
index c4075cdae3..6d7be066b6 100644
--- a/ovsdb/ovsdb-util.c
+++ b/ovsdb/ovsdb-util.c
@@ -111,13 +111,13 @@ ovsdb_util_read_map_string_column(const struct ovsdb_row *row,
for (i = 0; i < datum->n; i++) {
atom_key = &datum->keys[i];
- if (!strcmp(atom_key->string, key)) {
+ if (!strcmp(atom_key->s->string, key)) {
atom_value = &datum->values[i];
break;
}
}
- return atom_value ? atom_value->string : NULL;
+ return atom_value ? atom_value->s->string : NULL;
}
/* Read string-uuid key-values from a map. Returns the row associated with
@@ -143,7 +143,7 @@ ovsdb_util_read_map_string_uuid_column(const struct ovsdb_row *row,
const struct ovsdb_datum *datum = &row->fields[column->index];
for (size_t i = 0; i < datum->n; i++) {
union ovsdb_atom *atom_key = &datum->keys[i];
- if (!strcmp(atom_key->string, key)) {
+ if (!strcmp(atom_key->s->string, key)) {
const union ovsdb_atom *atom_value = &datum->values[i];
return ovsdb_table_get_row(ref_table, &atom_value->uuid);
}
@@ -181,7 +181,7 @@ ovsdb_util_read_string_column(const struct ovsdb_row *row,
const union ovsdb_atom *atom;
atom = ovsdb_util_read_column(row, column_name, OVSDB_TYPE_STRING);
- *stringp = atom ? atom->string : NULL;
+ *stringp = atom ? atom->s->string : NULL;
return atom != NULL;
}
@@ -269,8 +269,10 @@ ovsdb_util_write_string_column(struct ovsdb_row *row, const char *column_name,
const char *string)
{
if (string) {
- const union ovsdb_atom atom = { .string = CONST_CAST(char *, string) };
+ union ovsdb_atom atom = {
+ .s = ovsdb_atom_string_create(CONST_CAST(char *, string)) };
ovsdb_util_write_singleton(row, column_name, &atom, OVSDB_TYPE_STRING);
+ ovsdb_atom_destroy(&atom, OVSDB_TYPE_STRING);
} else {
ovsdb_util_clear_column(row, column_name);
}
@@ -305,8 +307,8 @@ ovsdb_util_write_string_string_column(struct ovsdb_row *row,
datum->values = xmalloc(n * sizeof *datum->values);
for (i = 0; i < n; ++i) {
- datum->keys[i].string = keys[i];
- datum->values[i].string = values[i];
+ datum->keys[i].s = ovsdb_atom_string_create_nocopy(keys[i]);
+ datum->values[i].s = ovsdb_atom_string_create_nocopy(values[i]);
}
/* Sort and check constraints. */
diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index 126d16a2f5..e6d866182c 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage)
ovs_list_init(&db->triggers);
db->run_triggers_now = db->run_triggers = false;
+ db->n_atoms = 0;
+
db->is_relay = false;
ovs_list_init(&db->txn_forward_new);
hmap_init(&db->txn_forward_sent);
@@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage)
}
simap_increase(usage, "cells", cells);
+ simap_increase(usage, "atoms", db->n_atoms);
+ simap_increase(usage, "txn-history", db->n_txn_history);
+ simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms);
if (db->storage) {
ovsdb_storage_get_memory_usage(db->storage, usage);
diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
index 4a7bd0f0ec..ec2d235ec2 100644
--- a/ovsdb/ovsdb.h
+++ b/ovsdb/ovsdb.h
@@ -90,8 +90,11 @@ struct ovsdb {
/* History trasanctions for incremental monitor transfer. */
bool need_txn_history; /* Need to maintain history of transactions. */
unsigned int n_txn_history; /* Current number of history transactions. */
+ unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */
+ size_t n_atoms; /* Total number of ovsdb atoms in the database. */
+
/* Relay mode. */
bool is_relay; /* True, if database is in relay mode. */
/* List that holds transactions waiting to be forwarded to the server. */
diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c
index 26d39a087f..4145c8729f 100644
--- a/ovsdb/raft-private.c
+++ b/ovsdb/raft-private.c
@@ -18,11 +18,14 @@
#include "raft-private.h"
+#include "coverage.h"
#include "openvswitch/dynamic-string.h"
#include "ovsdb-error.h"
#include "ovsdb-parser.h"
#include "socket-util.h"
#include "sset.h"
+
+COVERAGE_DEFINE(raft_entry_serialize);
/* Addresses of Raft servers. */
@@ -33,7 +36,10 @@ raft_address_validate(const char *address)
return NULL;
} else if (!strncmp(address, "ssl:", 4) || !strncmp(address, "tcp:", 4)) {
struct sockaddr_storage ss;
- if (!inet_parse_active(address + 4, -1, &ss, true)) {
+ bool dns_failure = false;
+
+ if (!inet_parse_active(address + 4, -1, &ss, true, &dns_failure)
+ && !dns_failure) {
return ovsdb_error(NULL, "%s: syntax error in address", address);
}
return NULL;
@@ -281,7 +287,8 @@ void
raft_entry_clone(struct raft_entry *dst, const struct raft_entry *src)
{
dst->term = src->term;
- dst->data = json_nullable_clone(src->data);
+ dst->data.full_json = json_nullable_clone(src->data.full_json);
+ dst->data.serialized = json_nullable_clone(src->data.serialized);
dst->eid = src->eid;
dst->servers = json_nullable_clone(src->servers);
dst->election_timer = src->election_timer;
@@ -291,7 +298,8 @@ void
raft_entry_uninit(struct raft_entry *e)
{
if (e) {
- json_destroy(e->data);
+ json_destroy(e->data.full_json);
+ json_destroy(e->data.serialized);
json_destroy(e->servers);
}
}
@@ -301,8 +309,9 @@ raft_entry_to_json(const struct raft_entry *e)
{
struct json *json = json_object_create();
raft_put_uint64(json, "term", e->term);
- if (e->data) {
- json_object_put(json, "data", json_clone(e->data));
+ if (raft_entry_has_data(e)) {
+ json_object_put(json, "data",
+ json_clone(raft_entry_get_serialized_data(e)));
json_object_put_format(json, "eid", UUID_FMT, UUID_ARGS(&e->eid));
}
if (e->servers) {
@@ -323,9 +332,10 @@ raft_entry_from_json(struct json *json, struct raft_entry *e)
struct ovsdb_parser p;
ovsdb_parser_init(&p, json, "raft log entry");
e->term = raft_parse_required_uint64(&p, "term");
- e->data = json_nullable_clone(
+ raft_entry_set_parsed_data(e,
ovsdb_parser_member(&p, "data", OP_OBJECT | OP_ARRAY | OP_OPTIONAL));
- e->eid = e->data ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO;
+ e->eid = raft_entry_has_data(e)
+ ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO;
e->servers = json_nullable_clone(
ovsdb_parser_member(&p, "servers", OP_OBJECT | OP_OPTIONAL));
if (e->servers) {
@@ -344,9 +354,72 @@ bool
raft_entry_equals(const struct raft_entry *a, const struct raft_entry *b)
{
return (a->term == b->term
- && json_equal(a->data, b->data)
&& uuid_equals(&a->eid, &b->eid)
- && json_equal(a->servers, b->servers));
+ && json_equal(a->servers, b->servers)
+ && json_equal(raft_entry_get_parsed_data(a),
+ raft_entry_get_parsed_data(b)));
+}
+
+bool
+raft_entry_has_data(const struct raft_entry *e)
+{
+ return e->data.full_json || e->data.serialized;
+}
+
+static void
+raft_entry_data_serialize(struct raft_entry *e)
+{
+ if (!raft_entry_has_data(e) || e->data.serialized) {
+ return;
+ }
+ COVERAGE_INC(raft_entry_serialize);
+ e->data.serialized = json_serialized_object_create(e->data.full_json);
+}
+
+void
+raft_entry_set_parsed_data_nocopy(struct raft_entry *e, struct json *json)
+{
+ ovs_assert(!json || json->type != JSON_SERIALIZED_OBJECT);
+ e->data.full_json = json;
+ e->data.serialized = NULL;
+}
+
+void
+raft_entry_set_parsed_data(struct raft_entry *e, const struct json *json)
+{
+ raft_entry_set_parsed_data_nocopy(e, json_nullable_clone(json));
+}
+
+/* Returns a pointer to the fully parsed json object of the data.
+ * Caller takes the ownership of the result.
+ *
+ * Entry will no longer contain a fully parsed json object.
+ * Subsequent calls for the same raft entry will return NULL. */
+struct json * OVS_WARN_UNUSED_RESULT
+raft_entry_steal_parsed_data(struct raft_entry *e)
+{
+ /* Ensure that serialized version exists. */
+ raft_entry_data_serialize(e);
+
+ struct json *json = e->data.full_json;
+ e->data.full_json = NULL;
+
+ return json;
+}
+
+/* Returns a pointer to the fully parsed json object of the data, if any. */
+const struct json *
+raft_entry_get_parsed_data(const struct raft_entry *e)
+{
+ return e->data.full_json;
+}
+
+/* Returns a pointer to the JSON_SERIALIZED_OBJECT of the data. */
+const struct json *
+raft_entry_get_serialized_data(const struct raft_entry *e)
+{
+ raft_entry_data_serialize(CONST_CAST(struct raft_entry *, e));
+ return e->data.serialized;
}
void
@@ -402,8 +475,8 @@ raft_header_from_json__(struct raft_header *h, struct ovsdb_parser *p)
* present, all of them must be. */
h->snap_index = raft_parse_optional_uint64(p, "prev_index");
if (h->snap_index) {
- h->snap.data = json_nullable_clone(
- ovsdb_parser_member(p, "prev_data", OP_ANY));
+ raft_entry_set_parsed_data(
+ &h->snap, ovsdb_parser_member(p, "prev_data", OP_ANY));
h->snap.eid = raft_parse_required_uuid(p, "prev_eid");
h->snap.term = raft_parse_required_uint64(p, "prev_term");
h->snap.election_timer = raft_parse_optional_uint64(
@@ -455,8 +528,9 @@ raft_header_to_json(const struct raft_header *h)
if (h->snap_index) {
raft_put_uint64(json, "prev_index", h->snap_index);
raft_put_uint64(json, "prev_term", h->snap.term);
- if (h->snap.data) {
- json_object_put(json, "prev_data", json_clone(h->snap.data));
+ if (raft_entry_has_data(&h->snap)) {
+ json_object_put(json, "prev_data",
+ json_clone(raft_entry_get_serialized_data(&h->snap)));
}
json_object_put_format(json, "prev_eid",
UUID_FMT, UUID_ARGS(&h->snap.eid));
diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h
index a69e37e5c2..48c6df511f 100644
--- a/ovsdb/raft-private.h
+++ b/ovsdb/raft-private.h
@@ -118,7 +118,10 @@ void raft_servers_format(const struct hmap *servers, struct ds *ds);
* entry. */
struct raft_entry {
uint64_t term;
- struct json *data;
+ struct {
+ struct json *full_json; /* Fully parsed JSON object. */
+ struct json *serialized; /* JSON_SERIALIZED_OBJECT version of data. */
+ } data;
struct uuid eid;
struct json *servers;
uint64_t election_timer;
@@ -130,6 +133,13 @@ struct json *raft_entry_to_json(const struct raft_entry *);
struct ovsdb_error *raft_entry_from_json(struct json *, struct raft_entry *)
OVS_WARN_UNUSED_RESULT;
bool raft_entry_equals(const struct raft_entry *, const struct raft_entry *);
+bool raft_entry_has_data(const struct raft_entry *);
+void raft_entry_set_parsed_data(struct raft_entry *, const struct json *);
+void raft_entry_set_parsed_data_nocopy(struct raft_entry *, struct json *);
+struct json *raft_entry_steal_parsed_data(struct raft_entry *)
+ OVS_WARN_UNUSED_RESULT;
+const struct json *raft_entry_get_parsed_data(const struct raft_entry *);
+const struct json *raft_entry_get_serialized_data(const struct raft_entry *);
/* On disk data serialization and deserialization. */
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 2fb5156519..b70fbed5d4 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -74,9 +74,12 @@ enum raft_failure_test {
FT_CRASH_BEFORE_SEND_EXEC_REQ,
FT_CRASH_AFTER_SEND_EXEC_REQ,
FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE,
+ FT_CRASH_BEFORE_SEND_SNAPSHOT_REP,
FT_DELAY_ELECTION,
FT_DONT_SEND_VOTE_REQUEST,
FT_STOP_RAFT_RPC,
+ FT_TRANSFER_LEADERSHIP,
+ FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ,
};
static enum raft_failure_test failure_test;
@@ -379,12 +382,19 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *);
static void raft_run_reconfigure(struct raft *);
static void raft_set_leader(struct raft *, const struct uuid *sid);
+
static struct raft_server *
raft_find_server(const struct raft *raft, const struct uuid *sid)
{
return raft_server_find(&raft->servers, sid);
}
+static struct raft_server *
+raft_find_new_server(struct raft *raft, const struct uuid *uuid)
+{
+ return raft_server_find(&raft->add_servers, uuid);
+}
+
static char *
raft_make_address_passive(const char *address_)
{
@@ -494,11 +504,11 @@ raft_create_cluster(const char *file_name, const char *name,
.snap_index = index++,
.snap = {
.term = term,
- .data = json_nullable_clone(data),
.eid = uuid_random(),
.servers = json_object_create(),
},
};
+ raft_entry_set_parsed_data(&h.snap, data);
shash_add_nocopy(json_object(h.snap.servers),
xasprintf(UUID_FMT, UUID_ARGS(&h.sid)),
json_string_create(local_address));
@@ -727,10 +737,10 @@ raft_add_entry(struct raft *raft,
uint64_t index = raft->log_end++;
struct raft_entry *entry = &raft->entries[index - raft->log_start];
entry->term = term;
- entry->data = data;
entry->eid = eid ? *eid : UUID_ZERO;
entry->servers = servers;
entry->election_timer = election_timer;
+ raft_entry_set_parsed_data_nocopy(entry, data);
return index;
}
@@ -741,13 +751,16 @@ raft_write_entry(struct raft *raft, uint64_t term, struct json *data,
const struct uuid *eid, struct json *servers,
uint64_t election_timer)
{
+ uint64_t index = raft_add_entry(raft, term, data, eid, servers,
+ election_timer);
+ const struct json *entry_data = raft_entry_get_serialized_data(
+ &raft->entries[index - raft->log_start]);
struct raft_record r = {
.type = RAFT_REC_ENTRY,
.term = term,
.entry = {
- .index = raft_add_entry(raft, term, data, eid, servers,
- election_timer),
- .data = data,
+ .index = index,
+ .data = CONST_CAST(struct json *, entry_data),
.servers = servers,
.election_timer = election_timer,
.eid = eid ? *eid : UUID_ZERO,
@@ -1864,6 +1877,8 @@ raft_open_conn(struct raft *raft, const char *address, const struct uuid *sid)
static void
raft_conn_close(struct raft_conn *conn)
{
+ VLOG_DBG("closing connection to server %s (%s)",
+ conn->nickname, jsonrpc_session_get_name(conn->js));
jsonrpc_session_close(conn->js);
ovs_list_remove(&conn->list_node);
free(conn->nickname);
@@ -1918,6 +1933,13 @@ raft_run(struct raft *raft)
return;
}
+ if (failure_test == FT_TRANSFER_LEADERSHIP) {
+ /* Using this function as it conveniently implements all we need and
+ * snapshotting is the main test scenario for leadership transfer. */
+ raft_notify_snapshot_recommended(raft);
+ failure_test = FT_NO_TEST;
+ }
+
raft_waiters_run(raft);
if (!raft->listener && time_msec() >= raft->listen_backoff) {
@@ -1954,16 +1976,30 @@ raft_run(struct raft *raft)
}
/* Close unneeded sessions. */
+ struct raft_server *server;
struct raft_conn *next;
LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) {
if (!raft_conn_should_stay_open(raft, conn)) {
+ server = raft_find_new_server(raft, &conn->sid);
+ if (server) {
+ /* We only have one incoming connection from joining servers,
+ * so if it's closed, we need to destroy the record about the
+ * server. This way the process can be started over on the
+ * next join request. */
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ VLOG_INFO_RL(&rl, "cluster "CID_FMT": server %s (%s) "
+ "disconnected while joining",
+ CID_ARGS(&raft->cid),
+ server->nickname, server->address);
+ hmap_remove(&raft->add_servers, &server->hmap_node);
+ raft_server_destroy(server);
+ }
raft->n_disconnections++;
raft_conn_close(conn);
}
}
/* Open needed sessions. */
- struct raft_server *server;
HMAP_FOR_EACH (server, hmap_node, &raft->servers) {
raft_open_conn(raft, server->address, &server->sid);
}
@@ -2040,7 +2076,14 @@ raft_run(struct raft *raft)
HMAP_FOR_EACH_SAFE (cmd, next_cmd, hmap_node, &raft->commands) {
if (cmd->timestamp
&& now - cmd->timestamp > raft->election_timer * 2) {
- raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT);
+ if (cmd->index && raft->role != RAFT_LEADER) {
+ /* This server lost leadership and command didn't complete
+ * in time. Likely, it wasn't replicated to the majority
+ * of servers before losing the leadership. */
+ raft_command_complete(raft, cmd, RAFT_CMD_LOST_LEADERSHIP);
+ } else {
+ raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT);
+ }
}
}
raft_reset_ping_timer(raft);
@@ -2161,7 +2204,7 @@ raft_get_eid(const struct raft *raft, uint64_t index)
{
for (; index >= raft->log_start; index--) {
const struct raft_entry *e = raft_get_entry(raft, index);
- if (e->data) {
+ if (raft_entry_has_data(e)) {
return &e->eid;
}
}
@@ -2232,6 +2275,9 @@ raft_command_initiate(struct raft *raft,
if (failure_test == FT_CRASH_AFTER_SEND_APPEND_REQ) {
ovs_fatal(0, "Raft test: crash after sending append_request.");
}
+ if (failure_test == FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ) {
+ failure_test = FT_TRANSFER_LEADERSHIP;
+ }
raft_reset_ping_timer(raft);
return cmd;
@@ -2598,7 +2644,13 @@ raft_become_follower(struct raft *raft)
* configuration is already part of the log. Possibly the configuration
* log entry will not be committed, but until we know that we must use the
* new configuration. Our AppendEntries processing will properly update
- * the server configuration later, if necessary. */
+ * the server configuration later, if necessary.
+ *
+ * Also we do not complete commands here, as they can still be completed
+ * if their log entries have already been replicated to other servers.
+ * If the entries were actually committed according to the new leader, our
+ * AppendEntries processing will complete the corresponding commands.
+ */
struct raft_server *s;
HMAP_FOR_EACH (s, hmap_node, &raft->add_servers) {
raft_send_add_server_reply__(raft, &s->sid, s->address, false,
@@ -2612,8 +2664,6 @@ raft_become_follower(struct raft *raft)
raft_server_destroy(raft->remove_server);
raft->remove_server = NULL;
}
-
- raft_complete_all_commands(raft, RAFT_CMD_LOST_LEADERSHIP);
}
static void
@@ -2826,8 +2876,8 @@ raft_truncate(struct raft *raft, uint64_t new_end)
return servers_changed;
}
-static const struct json *
-raft_peek_next_entry(struct raft *raft, struct uuid *eid)
+static const struct raft_entry *
+raft_peek_next_entry(struct raft *raft)
{
/* Invariant: log_start - 2 <= last_applied <= commit_index < log_end. */
ovs_assert(raft->log_start <= raft->last_applied + 2);
@@ -2839,32 +2889,20 @@ raft_peek_next_entry(struct raft *raft, struct uuid *eid)
}
if (raft->log_start == raft->last_applied + 2) {
- *eid = raft->snap.eid;
- return raft->snap.data;
+ return &raft->snap;
}
while (raft->last_applied < raft->commit_index) {
const struct raft_entry *e = raft_get_entry(raft,
raft->last_applied + 1);
- if (e->data) {
- *eid = e->eid;
- return e->data;
+ if (raft_entry_has_data(e)) {
+ return e;
}
raft->last_applied++;
}
return NULL;
}
-static const struct json *
-raft_get_next_entry(struct raft *raft, struct uuid *eid)
-{
- const struct json *data = raft_peek_next_entry(raft, eid);
- if (data) {
- raft->last_applied++;
- }
- return data;
-}
-
/* Updates commit index in raft log. If commit index is already up-to-date
* it does nothing and return false, otherwise, returns true. */
static bool
@@ -2874,61 +2912,56 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index)
return false;
}
- if (raft->role == RAFT_LEADER) {
- while (raft->commit_index < new_commit_index) {
- uint64_t index = ++raft->commit_index;
- const struct raft_entry *e = raft_get_entry(raft, index);
- if (e->data) {
- struct raft_command *cmd
- = raft_find_command_by_eid(raft, &e->eid);
- if (cmd) {
- if (!cmd->index) {
- VLOG_DBG("Command completed after role change from"
- " follower to leader "UUID_FMT,
- UUID_ARGS(&e->eid));
- cmd->index = index;
- }
- raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS);
+ while (raft->commit_index < new_commit_index) {
+ uint64_t index = ++raft->commit_index;
+ const struct raft_entry *e = raft_get_entry(raft, index);
+
+ if (raft_entry_has_data(e)) {
+ struct raft_command *cmd = raft_find_command_by_eid(raft, &e->eid);
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+
+ if (cmd) {
+ if (!cmd->index && raft->role == RAFT_LEADER) {
+ VLOG_INFO_RL(&rl,
+ "command completed after role change from "
+ "follower to leader (eid: "UUID_FMT", "
+ "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index);
+ } else if (!cmd->index && raft->role != RAFT_LEADER) {
+ /* This can happen when leader fail-over before sending
+ * execute_command_reply. */
+ VLOG_INFO_RL(&rl,
+ "command completed without reply (eid: "UUID_FMT", "
+ "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index);
+ } else if (cmd->index && raft->role != RAFT_LEADER) {
+ /* This can happen if current server lost leadership after
+ * sending append requests to the majority of servers, but
+ * before receiving majority of append replies. */
+ VLOG_INFO_RL(&rl,
+ "command completed after role change from "
+ "leader to follower (eid: "UUID_FMT", "
+ "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index);
+ /* Clearing 'sid' to avoid sending cmd execution reply. */
+ cmd->sid = UUID_ZERO;
+ } else {
+ /* (cmd->index && raft->role == RAFT_LEADER)
+ * Normal command completion on a leader. */
}
- }
- if (e->election_timer) {
- VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64,
- raft->election_timer, e->election_timer);
- raft->election_timer = e->election_timer;
- raft->election_timer_new = 0;
- raft_update_probe_intervals(raft);
- }
- if (e->servers) {
- /* raft_run_reconfigure() can write a new Raft entry, which can
- * reallocate raft->entries, which would invalidate 'e', so
- * this case must be last, after the one for 'e->data'. */
- raft_run_reconfigure(raft);
+ cmd->index = index;
+ raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS);
}
}
- } else {
- while (raft->commit_index < new_commit_index) {
- uint64_t index = ++raft->commit_index;
- const struct raft_entry *e = raft_get_entry(raft, index);
- if (e->election_timer) {
- VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64,
- raft->election_timer, e->election_timer);
- raft->election_timer = e->election_timer;
- raft_update_probe_intervals(raft);
- }
+ if (e->election_timer) {
+ VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64,
+ raft->election_timer, e->election_timer);
+ raft->election_timer = e->election_timer;
+ raft->election_timer_new = 0;
+ raft_update_probe_intervals(raft);
}
- /* Check if any pending command can be completed, and complete it.
- * This can happen when leader fail-over before sending
- * execute_command_reply. */
- const struct uuid *eid = raft_get_eid(raft, new_commit_index);
- struct raft_command *cmd = raft_find_command_by_eid(raft, eid);
- if (cmd) {
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
- VLOG_INFO_RL(&rl,
- "Command completed without reply (eid: "UUID_FMT", "
- "commit index: %"PRIu64")",
- UUID_ARGS(eid), new_commit_index);
- cmd->index = new_commit_index;
- raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS);
+ if (e->servers && raft->role == RAFT_LEADER) {
+ /* raft_run_reconfigure() can write a new Raft entry, which can
+ * reallocate raft->entries, which would invalidate 'e', so
+ * this case must be last, after the one for 'e->data'. */
+ raft_run_reconfigure(raft);
}
}
@@ -3059,7 +3092,9 @@ raft_handle_append_entries(struct raft *raft,
for (; i < n_entries; i++) {
const struct raft_entry *e = &entries[i];
error = raft_write_entry(raft, e->term,
- json_nullable_clone(e->data), &e->eid,
+ json_nullable_clone(
+ raft_entry_get_parsed_data(e)),
+ &e->eid,
json_nullable_clone(e->servers),
e->election_timer);
if (error) {
@@ -3314,20 +3349,29 @@ bool
raft_has_next_entry(const struct raft *raft_)
{
struct raft *raft = CONST_CAST(struct raft *, raft_);
- struct uuid eid;
- return raft_peek_next_entry(raft, &eid) != NULL;
+ return raft_peek_next_entry(raft) != NULL;
}
/* Returns the next log entry or snapshot from 'raft', or NULL if there are
- * none left to read. Stores the entry ID of the log entry in '*eid'. Stores
- * true in '*is_snapshot' if the returned data is a snapshot, false if it is a
- * log entry. */
-const struct json *
-raft_next_entry(struct raft *raft, struct uuid *eid, bool *is_snapshot)
+ * none left to read. Stores the entry ID of the log entry in '*eid'.
+ *
+ * The caller takes ownership of the result. */
+struct json * OVS_WARN_UNUSED_RESULT
+raft_next_entry(struct raft *raft, struct uuid *eid)
{
- const struct json *data = raft_get_next_entry(raft, eid);
- *is_snapshot = data == raft->snap.data;
- return data;
+ const struct raft_entry *e = raft_peek_next_entry(raft);
+
+ if (!e) {
+ return NULL;
+ }
+
+ raft->last_applied++;
+ *eid = e->eid;
+
+ /* DB will only read each entry once, so we don't need to store the fully
+ * parsed json object any longer. The serialized version is sufficient
+ * for sending to other cluster members or writing to the log. */
+ return raft_entry_steal_parsed_data(CONST_CAST(struct raft_entry *, e));
}
/* Returns the log index of the last-read snapshot or log entry. */
@@ -3352,12 +3396,6 @@ raft_find_peer(struct raft *raft, const struct uuid *uuid)
return s && !uuid_equals(&raft->sid, &s->sid) ? s : NULL;
}
-static struct raft_server *
-raft_find_new_server(struct raft *raft, const struct uuid *uuid)
-{
- return raft_server_find(&raft->add_servers, uuid);
-}
-
/* Figure 3.1: "If there exists an N such that N > commitIndex, a
* majority of matchIndex[i] >= N, and log[N].term == currentTerm, set
* commitIndex = N (sections 3.5 and 3.6)." */
@@ -3420,6 +3458,7 @@ raft_send_install_snapshot_request(struct raft *raft,
const struct raft_server *s,
const char *comment)
{
+ const struct json *data = raft_entry_get_serialized_data(&raft->snap);
union raft_rpc rpc = {
.install_snapshot_request = {
.common = {
@@ -3432,7 +3471,7 @@ raft_send_install_snapshot_request(struct raft *raft,
.last_term = raft->snap.term,
.last_servers = raft->snap.servers,
.last_eid = raft->snap.eid,
- .data = raft->snap.data,
+ .data = CONST_CAST(struct json *, data),
.election_timer = raft->election_timer, /* use latest value */
}
};
@@ -3980,6 +4019,10 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log,
uint64_t new_log_start,
const struct raft_entry *new_snapshot)
{
+ /* Ensure that new snapshot contains serialized data object, so it will
+ * not be allocated while serializing the on-stack raft header object. */
+ ovs_assert(raft_entry_get_serialized_data(new_snapshot));
+
struct raft_header h = {
.sid = raft->sid,
.cid = raft->cid,
@@ -3998,12 +4041,13 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log,
/* Write log records. */
for (uint64_t index = new_log_start; index < raft->log_end; index++) {
const struct raft_entry *e = &raft->entries[index - raft->log_start];
+ const struct json *log_data = raft_entry_get_serialized_data(e);
struct raft_record r = {
.type = RAFT_REC_ENTRY,
.term = e->term,
.entry = {
.index = index,
- .data = e->data,
+ .data = CONST_CAST(struct json *, log_data),
.servers = e->servers,
.election_timer = e->election_timer,
.eid = e->eid,
@@ -4093,19 +4137,21 @@ raft_handle_install_snapshot_request__(
/* Case 3: The new snapshot starts past the end of our current log, so
* discard all of our current log. */
- const struct raft_entry new_snapshot = {
+ struct raft_entry new_snapshot = {
.term = rq->last_term,
- .data = rq->data,
.eid = rq->last_eid,
- .servers = rq->last_servers,
+ .servers = json_clone(rq->last_servers),
.election_timer = rq->election_timer,
};
+ raft_entry_set_parsed_data(&new_snapshot, rq->data);
+
struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start,
&new_snapshot);
if (error) {
char *error_s = ovsdb_error_to_string_free(error);
VLOG_WARN("could not save snapshot: %s", error_s);
free(error_s);
+ raft_entry_uninit(&new_snapshot);
return false;
}
@@ -4120,7 +4166,7 @@ raft_handle_install_snapshot_request__(
}
raft_entry_uninit(&raft->snap);
- raft_entry_clone(&raft->snap, &new_snapshot);
+ raft->snap = new_snapshot;
raft_get_servers_from_log(raft, VLL_INFO);
raft_get_election_timer_from_log(raft);
@@ -4132,6 +4178,10 @@ static void
raft_handle_install_snapshot_request(
struct raft *raft, const struct raft_install_snapshot_request *rq)
{
+ if (failure_test == FT_CRASH_BEFORE_SEND_SNAPSHOT_REP) {
+ ovs_fatal(0, "Raft test: crash before sending install_snapshot_reply");
+ }
+
if (raft_handle_install_snapshot_request__(raft, rq)) {
union raft_rpc rpy = {
.install_snapshot_reply = {
@@ -4216,7 +4266,7 @@ raft_may_snapshot(const struct raft *raft)
&& !raft->leaving
&& !raft->left
&& !raft->failed
- && raft->role != RAFT_LEADER
+ && (raft->role == RAFT_FOLLOWER || hmap_count(&raft->servers) == 1)
&& raft->last_applied >= raft->log_start);
}
@@ -4265,11 +4315,12 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data)
uint64_t new_log_start = raft->last_applied + 1;
struct raft_entry new_snapshot = {
.term = raft_get_term(raft, new_log_start - 1),
- .data = json_clone(new_snapshot_data),
.eid = *raft_get_eid(raft, new_log_start - 1),
.servers = json_clone(raft_servers_for_index(raft, new_log_start - 1)),
.election_timer = raft->election_timer,
};
+ raft_entry_set_parsed_data(&new_snapshot, new_snapshot_data);
+
struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start,
&new_snapshot);
if (error) {
@@ -4286,6 +4337,9 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data)
memmove(&raft->entries[0], &raft->entries[new_log_start - raft->log_start],
(raft->log_end - new_log_start) * sizeof *raft->entries);
raft->log_start = new_log_start;
+ /* It's a snapshot of the current database state, ovsdb-server will not
+ * read it back. Destroying the parsed json object to not waste memory. */
+ json_destroy(raft_entry_steal_parsed_data(&raft->snap));
return NULL;
}
@@ -4926,6 +4980,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
failure_test = FT_CRASH_AFTER_SEND_EXEC_REQ;
} else if (!strcmp(test, "crash-after-receiving-append-request-update")) {
failure_test = FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE;
+ } else if (!strcmp(test, "crash-before-sending-install-snapshot-reply")) {
+ failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP;
} else if (!strcmp(test, "delay-election")) {
failure_test = FT_DELAY_ELECTION;
struct raft *raft;
@@ -4938,6 +4994,11 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
failure_test = FT_DONT_SEND_VOTE_REQUEST;
} else if (!strcmp(test, "stop-raft-rpc")) {
failure_test = FT_STOP_RAFT_RPC;
+ } else if (!strcmp(test,
+ "transfer-leadership-after-sending-append-request")) {
+ failure_test = FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ;
+ } else if (!strcmp(test, "transfer-leadership")) {
+ failure_test = FT_TRANSFER_LEADERSHIP;
} else if (!strcmp(test, "clear")) {
failure_test = FT_NO_TEST;
unixctl_command_reply(conn, "test dismissed");
diff --git a/ovsdb/raft.h b/ovsdb/raft.h
index 3545c41c2c..599bc0ae86 100644
--- a/ovsdb/raft.h
+++ b/ovsdb/raft.h
@@ -132,8 +132,8 @@ bool raft_left(const struct raft *);
bool raft_failed(const struct raft *);
/* Reading snapshots and log entries. */
-const struct json *raft_next_entry(struct raft *, struct uuid *eid,
- bool *is_snapshot);
+struct json *raft_next_entry(struct raft *, struct uuid *eid)
+ OVS_WARN_UNUSED_RESULT;
bool raft_has_next_entry(const struct raft *);
uint64_t raft_get_applied_index(const struct raft *);
diff --git a/ovsdb/rbac.c b/ovsdb/rbac.c
index 2986027c90..ff411675f0 100644
--- a/ovsdb/rbac.c
+++ b/ovsdb/rbac.c
@@ -53,8 +53,8 @@ ovsdb_find_row_by_string_key(const struct ovsdb_table *table,
HMAP_FOR_EACH (row, hmap_node, &table->rows) {
const struct ovsdb_datum *datum = &row->fields[column->index];
for (size_t i = 0; i < datum->n; i++) {
- if (datum->keys[i].string[0] &&
- !strcmp(key, datum->keys[i].string)) {
+ if (datum->keys[i].s->string[0] &&
+ !strcmp(key, datum->keys[i].s->string)) {
return row;
}
}
@@ -113,7 +113,7 @@ ovsdb_rbac_authorized(const struct ovsdb_row *perms,
}
for (i = 0; i < datum->n; i++) {
- const char *name = datum->keys[i].string;
+ const char *name = datum->keys[i].s->string;
const char *value = NULL;
bool is_map;
@@ -271,7 +271,7 @@ rbac_column_modification_permitted(const struct ovsdb_column *column,
size_t i;
for (i = 0; i < modifiable->n; i++) {
- char *name = modifiable->keys[i].string;
+ char *name = modifiable->keys[i].s->string;
if (!strcmp(name, column->name)) {
return true;
diff --git a/ovsdb/row.c b/ovsdb/row.c
index 65a0546211..e83c60a218 100644
--- a/ovsdb/row.c
+++ b/ovsdb/row.c
@@ -38,8 +38,7 @@ allocate_row(const struct ovsdb_table *table)
struct ovsdb_row *row = xmalloc(row_size);
row->table = CONST_CAST(struct ovsdb_table *, table);
row->txn_row = NULL;
- ovs_list_init(&row->src_refs);
- ovs_list_init(&row->dst_refs);
+ hmap_init(&row->dst_refs);
row->n_refs = 0;
return row;
}
@@ -61,6 +60,78 @@ ovsdb_row_create(const struct ovsdb_table *table)
return row;
}
+static struct ovsdb_weak_ref *
+ovsdb_weak_ref_clone(struct ovsdb_weak_ref *src)
+{
+ struct ovsdb_weak_ref *weak = xzalloc(sizeof *weak);
+
+ hmap_node_nullify(&weak->dst_node);
+ ovs_list_init(&weak->src_node);
+ weak->src_table = src->src_table;
+ weak->src = src->src;
+ weak->dst_table = src->dst_table;
+ weak->dst = src->dst;
+ ovsdb_atom_clone(&weak->key, &src->key, src->type.key.type);
+ if (src->type.value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_clone(&weak->value, &src->value, src->type.value.type);
+ }
+ ovsdb_type_clone(&weak->type, &src->type);
+ weak->column_idx = src->column_idx;
+ weak->by_key = src->by_key;
+ return weak;
+}
+
+uint32_t
+ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *weak)
+{
+ return uuid_hash(&weak->src);
+}
+
+static bool
+ovsdb_weak_ref_equals(const struct ovsdb_weak_ref *a,
+ const struct ovsdb_weak_ref *b)
+{
+ if (a == b) {
+ return true;
+ }
+ return a->src_table == b->src_table
+ && a->dst_table == b->dst_table
+ && uuid_equals(&a->src, &b->src)
+ && uuid_equals(&a->dst, &b->dst)
+ && a->column_idx == b->column_idx
+ && a->by_key == b->by_key
+ && ovsdb_atom_equals(&a->key, &b->key, a->type.key.type);
+}
+
+struct ovsdb_weak_ref *
+ovsdb_row_find_weak_ref(const struct ovsdb_row *row,
+ const struct ovsdb_weak_ref *ref)
+{
+ struct ovsdb_weak_ref *weak;
+ HMAP_FOR_EACH_WITH_HASH (weak, dst_node,
+ ovsdb_weak_ref_hash(ref), &row->dst_refs) {
+ if (ovsdb_weak_ref_equals(weak, ref)) {
+ return weak;
+ }
+ }
+ return NULL;
+}
+
+void
+ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *weak)
+{
+ if (!weak) {
+ return;
+ }
+ ovs_assert(ovs_list_is_empty(&weak->src_node));
+ ovsdb_atom_destroy(&weak->key, weak->type.key.type);
+ if (weak->type.value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_destroy(&weak->value, weak->type.value.type);
+ }
+ ovsdb_type_destroy(&weak->type);
+ free(weak);
+}
+
struct ovsdb_row *
ovsdb_row_clone(const struct ovsdb_row *old)
{
@@ -75,6 +146,13 @@ ovsdb_row_clone(const struct ovsdb_row *old)
&old->fields[column->index],
&column->type);
}
+
+ struct ovsdb_weak_ref *weak, *clone;
+ HMAP_FOR_EACH (weak, dst_node, &old->dst_refs) {
+ clone = ovsdb_weak_ref_clone(weak);
+ hmap_insert(&new->dst_refs, &clone->dst_node,
+ ovsdb_weak_ref_hash(clone));
+ }
return new;
}
@@ -85,20 +163,13 @@ ovsdb_row_destroy(struct ovsdb_row *row)
{
if (row) {
const struct ovsdb_table *table = row->table;
- struct ovsdb_weak_ref *weak, *next;
+ struct ovsdb_weak_ref *weak;
const struct shash_node *node;
- LIST_FOR_EACH_SAFE (weak, next, dst_node, &row->dst_refs) {
- ovs_list_remove(&weak->src_node);
- ovs_list_remove(&weak->dst_node);
- free(weak);
- }
-
- LIST_FOR_EACH_SAFE (weak, next, src_node, &row->src_refs) {
- ovs_list_remove(&weak->src_node);
- ovs_list_remove(&weak->dst_node);
- free(weak);
+ HMAP_FOR_EACH_POP (weak, dst_node, &row->dst_refs) {
+ ovsdb_weak_ref_destroy(weak);
}
+ hmap_destroy(&row->dst_refs);
SHASH_FOR_EACH (node, &table->schema->columns) {
const struct ovsdb_column *column = node->data;
diff --git a/ovsdb/row.h b/ovsdb/row.h
index 394ac8eb49..fe04555d0c 100644
--- a/ovsdb/row.h
+++ b/ovsdb/row.h
@@ -36,11 +36,28 @@ struct ovsdb_column_set;
* ovsdb_weak_ref" structures are created for them.
*/
struct ovsdb_weak_ref {
- struct ovs_list src_node; /* In src->src_refs list. */
- struct ovs_list dst_node; /* In destination row's dst_refs list. */
- struct ovsdb_row *src; /* Source row. */
- struct ovsdb_table *dst_table; /* Destination table. */
+ struct hmap_node dst_node; /* In ovsdb_row's 'dst_refs' hmap. */
+ struct ovs_list src_node; /* In txn_row's 'deleted/added_refs'. */
+
+ struct ovsdb_table *src_table; /* Source row table. */
+ struct uuid src; /* Source row uuid. */
+
+ struct ovsdb_table *dst_table; /* Destination row table. */
struct uuid dst; /* Destination row uuid. */
+
+ /* Source row's key-value pair that created this reference.
+ * This information is needed in order to find and delete the reference
+ * from the source row. We need both key and value in order to avoid
+ * accidential deletion of an updated data, i.e. if value in datum got
+ * updated and the reference was created by the old value.
+ * Storing column index in order to remove references from the correct
+ * column. 'by_key' flag allows to distinguish 2 references in a corner
+ * case where key and value are the same. */
+ union ovsdb_atom key;
+ union ovsdb_atom value;
+ struct ovsdb_type type; /* Datum type of the key-value pair. */
+ unsigned int column_idx; /* Row column index for this pair. */
+ bool by_key; /* 'true' if reference is a 'key'. */
};
/* A row in a database table. */
@@ -50,8 +67,7 @@ struct ovsdb_row {
struct ovsdb_txn_row *txn_row; /* Transaction that row is in, if any. */
/* Weak references. Updated and checked only at transaction commit. */
- struct ovs_list src_refs; /* Weak references from this row. */
- struct ovs_list dst_refs; /* Weak references to this row. */
+ struct hmap dst_refs; /* Weak references to this row. */
/* Number of strong refs to this row from other rows, in this table or
* other tables, through 'uuid' columns that have a 'refTable' constraint
@@ -69,6 +85,12 @@ struct ovsdb_row {
* index 'i' is contained in hmap table->indexes[i]. */
};
+uint32_t ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *);
+struct ovsdb_weak_ref * ovsdb_row_find_weak_ref(const struct ovsdb_row *,
+ const struct ovsdb_weak_ref *);
+void ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *);
+
+
struct ovsdb_row *ovsdb_row_create(const struct ovsdb_table *);
struct ovsdb_row *ovsdb_row_clone(const struct ovsdb_row *);
void ovsdb_row_destroy(struct ovsdb_row *);
diff --git a/ovsdb/storage.c b/ovsdb/storage.c
index d727b1eacd..d4984be250 100644
--- a/ovsdb/storage.c
+++ b/ovsdb/storage.c
@@ -268,9 +268,7 @@ ovsdb_storage_read(struct ovsdb_storage *storage,
struct json *schema_json = NULL;
struct json *txn_json = NULL;
if (storage->raft) {
- bool is_snapshot;
- json = json_nullable_clone(
- raft_next_entry(storage->raft, txnid, &is_snapshot));
+ json = raft_next_entry(storage->raft, txnid);
if (!json) {
return NULL;
} else if (json->type != JSON_ARRAY || json->array.n != 2) {
@@ -509,7 +507,11 @@ schedule_next_snapshot(struct ovsdb_storage *storage, bool quick)
long long int now = time_msec();
storage->next_snapshot_min = now + base + random_range(range);
- storage->next_snapshot_max = now + 60LL * 60 * 24 * 1000; /* 1 day */
+ if (!quick) {
+ long long int one_day = 60LL * 60 * 24 * 1000;
+
+ storage->next_snapshot_max = now + one_day;
+ }
} else {
storage->next_snapshot_min = LLONG_MAX;
storage->next_snapshot_max = LLONG_MAX;
@@ -517,7 +519,7 @@ schedule_next_snapshot(struct ovsdb_storage *storage, bool quick)
}
bool
-ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage)
+ovsdb_storage_should_snapshot(struct ovsdb_storage *storage)
{
if (storage->raft || storage->log) {
/* If we haven't reached the minimum snapshot time, don't snapshot. */
@@ -546,6 +548,15 @@ ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage)
}
if (!snapshot_recommended) {
+ if (storage->raft) {
+ /* Re-scheduling with a quick retry in order to avoid condition
+ * where all the raft servers passed the minimal time already,
+ * but the log didn't grow a lot, so they are all checking on
+ * every iteration. This will randomize the time of the next
+ * attempt, so all the servers will not start snapshotting at
+ * the same time when the log reaches a critical size. */
+ schedule_next_snapshot(storage, true);
+ }
return false;
}
diff --git a/ovsdb/storage.h b/ovsdb/storage.h
index e120094d7a..ff026b77fa 100644
--- a/ovsdb/storage.h
+++ b/ovsdb/storage.h
@@ -76,7 +76,7 @@ uint64_t ovsdb_write_get_commit_index(const struct ovsdb_write *);
void ovsdb_write_wait(const struct ovsdb_write *);
void ovsdb_write_destroy(struct ovsdb_write *);
-bool ovsdb_storage_should_snapshot(const struct ovsdb_storage *);
+bool ovsdb_storage_should_snapshot(struct ovsdb_storage *);
struct ovsdb_error *ovsdb_storage_store_snapshot(struct ovsdb_storage *storage,
const struct json *schema,
const struct json *snapshot)
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index 8ffefcf7c9..db86d847c3 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -41,6 +41,9 @@ struct ovsdb_txn {
struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */
struct ds comment;
struct uuid txnid; /* For clustered mode only. It is the eid. */
+ size_t n_atoms; /* Number of atoms in all transaction rows. */
+ ssize_t n_atoms_diff; /* Difference between number of added and
+ * removed atoms. */
};
/* A table modified by a transaction. */
@@ -86,6 +89,10 @@ struct ovsdb_txn_row {
struct uuid uuid;
struct ovsdb_table *table;
+ /* Weak refs that needs to be added/deleted to/from destination rows. */
+ struct ovs_list added_refs;
+ struct ovs_list deleted_refs;
+
/* Used by for_each_txn_row(). */
unsigned int serial; /* Serial number of in-progress commit. */
@@ -151,6 +158,23 @@ ovsdb_txn_row_abort(struct ovsdb_txn *txn OVS_UNUSED,
} else {
hmap_replace(&new->table->rows, &new->hmap_node, &old->hmap_node);
}
+
+ struct ovsdb_weak_ref *weak, *next;
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) {
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
+ if (hmap_node_is_null(&weak->dst_node)) {
+ ovsdb_weak_ref_destroy(weak);
+ }
+ }
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) {
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
+ if (hmap_node_is_null(&weak->dst_node)) {
+ ovsdb_weak_ref_destroy(weak);
+ }
+ }
+
ovsdb_row_destroy(new);
free(txn_row);
@@ -266,9 +290,9 @@ ovsdb_txn_adjust_atom_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r,
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
ovsdb_txn_adjust_row_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r,
- const struct ovsdb_column *column, int delta)
+ const struct ovsdb_column *column,
+ const struct ovsdb_datum *field, int delta)
{
- const struct ovsdb_datum *field = &r->fields[column->index];
struct ovsdb_error *error;
error = ovsdb_txn_adjust_atom_refs(txn, r, column, &column->type.key,
@@ -291,14 +315,39 @@ update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r)
struct ovsdb_error *error;
if (bitmap_is_set(r->changed, column->index)) {
- if (r->old) {
- error = ovsdb_txn_adjust_row_refs(txn, r->old, column, -1);
+ if (r->old && !r->new) {
+ error = ovsdb_txn_adjust_row_refs(
+ txn, r->old, column,
+ &r->old->fields[column->index], -1);
if (error) {
return OVSDB_WRAP_BUG("error decreasing refcount", error);
}
- }
- if (r->new) {
- error = ovsdb_txn_adjust_row_refs(txn, r->new, column, 1);
+ } else if (!r->old && r->new) {
+ error = ovsdb_txn_adjust_row_refs(
+ txn, r->new, column,
+ &r->new->fields[column->index], 1);
+ if (error) {
+ return error;
+ }
+ } else if (r->old && r->new) {
+ struct ovsdb_datum added, removed;
+
+ ovsdb_datum_added_removed(&added, &removed,
+ &r->old->fields[column->index],
+ &r->new->fields[column->index],
+ &column->type);
+
+ error = ovsdb_txn_adjust_row_refs(
+ txn, r->old, column, &removed, -1);
+ ovsdb_datum_destroy(&removed, &column->type);
+ if (error) {
+ ovsdb_datum_destroy(&added, &column->type);
+ return OVSDB_WRAP_BUG("error decreasing refcount", error);
+ }
+
+ error = ovsdb_txn_adjust_row_refs(
+ txn, r->new, column, &added, 1);
+ ovsdb_datum_destroy(&added, &column->type);
if (error) {
return error;
}
@@ -459,93 +508,125 @@ static struct ovsdb_error *
ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED,
struct ovsdb_txn_row *txn_row)
{
- struct ovsdb_weak_ref *weak, *next;
+ struct ovsdb_weak_ref *weak, *next, *dst_weak;
+ struct ovsdb_row *dst_row;
- /* Remove the weak references originating in the old version of the row. */
- if (txn_row->old) {
- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->old->src_refs) {
- ovs_list_remove(&weak->src_node);
- ovs_list_remove(&weak->dst_node);
- free(weak);
+ /* Find and clean up deleted references from destination rows. */
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) {
+ dst_row = CONST_CAST(struct ovsdb_row *,
+ ovsdb_table_get_row(weak->dst_table, &weak->dst));
+ if (dst_row) {
+ dst_weak = ovsdb_row_find_weak_ref(dst_row, weak);
+ hmap_remove(&dst_row->dst_refs, &dst_weak->dst_node);
+ ovs_assert(ovs_list_is_empty(&dst_weak->src_node));
+ ovsdb_weak_ref_destroy(dst_weak);
+ }
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
+ if (hmap_node_is_null(&weak->dst_node)) {
+ ovsdb_weak_ref_destroy(weak);
}
}
- /* Although the originating rows have the responsibility of updating the
- * weak references in the dst, it is possible that some source rows aren't
- * part of the transaction. In that situation this row needs to move the
- * list of incoming weak references from the old row into the new one.
- */
- if (txn_row->old && txn_row->new) {
- /* Move the incoming weak references from old to new. */
- ovs_list_push_back_all(&txn_row->new->dst_refs,
- &txn_row->old->dst_refs);
- }
-
- /* Insert the weak references originating in the new version of the row. */
- struct ovsdb_row *dst_row;
- if (txn_row->new) {
- LIST_FOR_EACH (weak, src_node, &txn_row->new->src_refs) {
- /* dst_row MUST exist. */
- dst_row = CONST_CAST(struct ovsdb_row *,
+ /* Insert the weak references added in the new version of the row. */
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) {
+ dst_row = CONST_CAST(struct ovsdb_row *,
ovsdb_table_get_row(weak->dst_table, &weak->dst));
- ovs_list_insert(&dst_row->dst_refs, &weak->dst_node);
- }
+
+ ovs_assert(!ovsdb_row_find_weak_ref(dst_row, weak));
+ hmap_insert(&dst_row->dst_refs, &weak->dst_node,
+ ovsdb_weak_ref_hash(weak));
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
}
return NULL;
}
static void
-add_weak_ref(const struct ovsdb_row *src_, const struct ovsdb_row *dst_)
+add_weak_ref(struct ovsdb_txn_row *txn_row, const struct ovsdb_row *dst_,
+ struct ovs_list *ref_list,
+ const union ovsdb_atom *key, const union ovsdb_atom *value,
+ bool by_key, const struct ovsdb_column *column)
{
- struct ovsdb_row *src = CONST_CAST(struct ovsdb_row *, src_);
struct ovsdb_row *dst = CONST_CAST(struct ovsdb_row *, dst_);
struct ovsdb_weak_ref *weak;
- if (src == dst) {
+ if (txn_row->new == dst) {
return;
}
- if (!ovs_list_is_empty(&dst->dst_refs)) {
- /* Omit duplicates. */
- weak = CONTAINER_OF(ovs_list_back(&dst->dst_refs),
- struct ovsdb_weak_ref, dst_node);
- if (weak->src == src) {
- return;
- }
- }
-
- weak = xmalloc(sizeof *weak);
- weak->src = src;
+ weak = xzalloc(sizeof *weak);
+ weak->src_table = txn_row->new->table;
+ weak->src = *ovsdb_row_get_uuid(txn_row->new);
weak->dst_table = dst->table;
weak->dst = *ovsdb_row_get_uuid(dst);
- /* The dst_refs list is updated at commit time. */
- ovs_list_init(&weak->dst_node);
- ovs_list_push_back(&src->src_refs, &weak->src_node);
+ ovsdb_type_clone(&weak->type, &column->type);
+ ovsdb_atom_clone(&weak->key, key, column->type.key.type);
+ if (column->type.value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_clone(&weak->value, value, column->type.value.type);
+ }
+ weak->by_key = by_key;
+ weak->column_idx = column->index;
+ hmap_node_nullify(&weak->dst_node);
+ ovs_list_push_back(ref_list, &weak->src_node);
+}
+
+static void
+find_and_add_weak_ref(struct ovsdb_txn_row *txn_row,
+ const union ovsdb_atom *key,
+ const union ovsdb_atom *value,
+ const struct ovsdb_column *column,
+ bool by_key, struct ovs_list *ref_list,
+ struct ovsdb_datum *not_found, bool *zero)
+{
+ const struct ovsdb_row *row = by_key
+ ? ovsdb_table_get_row(column->type.key.uuid.refTable, &key->uuid)
+ : ovsdb_table_get_row(column->type.value.uuid.refTable, &value->uuid);
+
+ if (row) {
+ add_weak_ref(txn_row, row, ref_list, key, value, by_key, column);
+ } else if (not_found) {
+ if (uuid_is_zero(by_key ? &key->uuid : &value->uuid)) {
+ *zero = true;
+ }
+ ovsdb_datum_add_unsafe(not_found, key, value, &column->type, NULL);
+ }
}
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
{
+ struct ovsdb_weak_ref *weak, *next;
struct ovsdb_table *table;
struct shash_node *node;
if (txn_row->old && !txn_row->new) {
/* Mark rows that have weak references to 'txn_row' as modified, so
- * that their weak references will get reassessed. */
- struct ovsdb_weak_ref *weak, *next;
-
- LIST_FOR_EACH_SAFE (weak, next, dst_node, &txn_row->old->dst_refs) {
- if (!weak->src->txn_row) {
- ovsdb_txn_row_modify(txn, weak->src);
+ * that their weak references will get reassessed. Adding all weak
+ * refs to 'deleted_ref' lists of their source rows, so they will be
+ * cleaned up from datums and deleted on commit. */
+
+ HMAP_FOR_EACH (weak, dst_node, &txn_row->old->dst_refs) {
+ struct ovsdb_txn_row *src_txn_row;
+
+ src_txn_row = find_or_make_txn_row(txn, weak->src_table,
+ &weak->src);
+ if (!src_txn_row) {
+ /* Source row is also removed. */
+ continue;
}
+ ovs_assert(src_txn_row);
+ ovs_assert(ovs_list_is_empty(&weak->src_node));
+ ovs_list_insert(&src_txn_row->deleted_refs, &weak->src_node);
}
}
if (!txn_row->new) {
- /* We don't have to do anything about references that originate at
- * 'txn_row', because ovsdb_row_destroy() will remove those weak
- * references. */
+ /* Since all the atoms will be destroyed by the ovsdb_row_destroy(),
+ * there is no need to check them here. Source references queued
+ * into 'deleted_ref' while removing other rows will be cleaned up at
+ * commit time. */
return NULL;
}
@@ -553,50 +634,94 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
SHASH_FOR_EACH (node, &table->schema->columns) {
const struct ovsdb_column *column = node->data;
struct ovsdb_datum *datum = &txn_row->new->fields[column->index];
+ struct ovsdb_datum added, removed, deleted_refs;
unsigned int orig_n, i;
bool zero = false;
orig_n = datum->n;
+ /* Collecting all key-value pairs that references deleted rows. */
+ ovsdb_datum_init_empty(&deleted_refs);
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) {
+ if (column->index == weak->column_idx) {
+ ovsdb_datum_add_unsafe(&deleted_refs, &weak->key, &weak->value,
+ &column->type, NULL);
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
+ }
+ }
+ ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type,
+ column->type.value.type);
+
+ /* Removing elements that references deleted rows. */
+ ovsdb_datum_subtract(datum, &column->type,
+ &deleted_refs, &column->type);
+ ovsdb_datum_destroy(&deleted_refs, &column->type);
+
+ /* Generating the difference between old and new data. */
+ if (txn_row->old) {
+ ovsdb_datum_added_removed(&added, &removed,
+ &txn_row->old->fields[column->index],
+ datum, &column->type);
+ } else {
+ ovsdb_datum_init_empty(&removed);
+ ovsdb_datum_clone(&added, datum, &column->type);
+ }
+
+ /* Checking added data and creating new references. */
+ ovsdb_datum_init_empty(&deleted_refs);
if (ovsdb_base_type_is_weak_ref(&column->type.key)) {
- for (i = 0; i < datum->n; ) {
- const struct ovsdb_row *row;
-
- row = ovsdb_table_get_row(column->type.key.uuid.refTable,
- &datum->keys[i].uuid);
- if (row) {
- add_weak_ref(txn_row->new, row);
- i++;
- } else {
- if (uuid_is_zero(&datum->keys[i].uuid)) {
- zero = true;
- }
- ovsdb_datum_remove_unsafe(datum, i, &column->type);
- }
+ for (i = 0; i < added.n; i++) {
+ find_and_add_weak_ref(txn_row, &added.keys[i],
+ added.values ? &added.values[i] : NULL,
+ column, true, &txn_row->added_refs,
+ &deleted_refs, &zero);
}
}
if (ovsdb_base_type_is_weak_ref(&column->type.value)) {
- for (i = 0; i < datum->n; ) {
- const struct ovsdb_row *row;
-
- row = ovsdb_table_get_row(column->type.value.uuid.refTable,
- &datum->values[i].uuid);
- if (row) {
- add_weak_ref(txn_row->new, row);
- i++;
- } else {
- if (uuid_is_zero(&datum->values[i].uuid)) {
- zero = true;
- }
- ovsdb_datum_remove_unsafe(datum, i, &column->type);
- }
+ for (i = 0; i < added.n; i++) {
+ find_and_add_weak_ref(txn_row, &added.keys[i],
+ &added.values[i],
+ column, false, &txn_row->added_refs,
+ &deleted_refs, &zero);
+ }
+ }
+ if (deleted_refs.n) {
+ /* Removing all the references that doesn't point to valid rows. */
+ ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type,
+ column->type.value.type);
+ ovsdb_datum_subtract(datum, &column->type,
+ &deleted_refs, &column->type);
+ ovsdb_datum_destroy(&deleted_refs, &column->type);
+ }
+ ovsdb_datum_destroy(&added, &column->type);
+
+ /* Creating refs that needs to be removed on commit. This includes
+ * both: the references that got directly removed from the datum and
+ * references removed due to deletion of a referenced row. */
+ if (ovsdb_base_type_is_weak_ref(&column->type.key)) {
+ for (i = 0; i < removed.n; i++) {
+ find_and_add_weak_ref(txn_row, &removed.keys[i],
+ removed.values
+ ? &removed.values[i] : NULL,
+ column, true, &txn_row->deleted_refs,
+ NULL, NULL);
}
}
+ if (ovsdb_base_type_is_weak_ref(&column->type.value)) {
+ for (i = 0; i < removed.n; i++) {
+ find_and_add_weak_ref(txn_row, &removed.keys[i],
+ &removed.values[i],
+ column, false, &txn_row->deleted_refs,
+ NULL, NULL);
+ }
+ }
+ ovsdb_datum_destroy(&removed, &column->type);
+
if (datum->n != orig_n) {
bitmap_set1(txn_row->changed, column->index);
- ovsdb_datum_sort_assert(datum, column->type.key.type);
if (datum->n < column->type.n_min) {
const struct uuid *row_uuid = ovsdb_row_get_uuid(txn_row->new);
if (zero && !txn_row->old) {
@@ -817,6 +942,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED,
return NULL;
}
+static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
+count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
+{
+ struct ovsdb_table *table = txn_row->table;
+ ssize_t n_atoms_old = 0, n_atoms_new = 0;
+ struct shash_node *node;
+
+ SHASH_FOR_EACH (node, &table->schema->columns) {
+ const struct ovsdb_column *column = node->data;
+ const struct ovsdb_type *type = &column->type;
+ unsigned int idx = column->index;
+
+ if (txn_row->old) {
+ n_atoms_old += txn_row->old->fields[idx].n;
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ n_atoms_old += txn_row->old->fields[idx].n;
+ }
+ }
+ if (txn_row->new) {
+ n_atoms_new += txn_row->new->fields[idx].n;
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ n_atoms_new += txn_row->new->fields[idx].n;
+ }
+ }
+ }
+
+ txn->n_atoms += n_atoms_old + n_atoms_new;
+ txn->n_atoms_diff += n_atoms_new - n_atoms_old;
+ return NULL;
+}
+
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row)
{
@@ -885,6 +1041,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn)
return error;
}
+ /* Count atoms. */
+ error = for_each_txn_row(txn, count_atoms);
+ if (error) {
+ return OVSDB_WRAP_BUG("can't happen", error);
+ }
+
/* Update _version for rows that changed. */
error = for_each_txn_row(txn, update_version);
if (error) {
@@ -900,6 +1062,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn)
struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned);
ovs_list_init(&txn_cloned->txn_tables);
txn_cloned->txnid = txn->txnid;
+ txn_cloned->n_atoms = txn->n_atoms;
+ txn_cloned->n_atoms_diff = txn->n_atoms_diff;
struct ovsdb_txn_table *t;
LIST_FOR_EACH (t, node, &txn->txn_tables) {
@@ -958,6 +1122,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn)
node->txn = ovsdb_txn_clone(txn);
ovs_list_push_back(&txn->db->txn_history, &node->node);
txn->db->n_txn_history++;
+ txn->db->n_txn_history_atoms += txn->n_atoms;
}
}
@@ -968,6 +1133,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn)
if (!ovsdb_txn_is_empty(txn)) {
txn->db->run_triggers_now = txn->db->run_triggers = true;
+ txn->db->n_atoms += txn->n_atoms_diff;
ovsdb_monitors_commit(txn->db, txn);
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs));
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit));
@@ -1215,6 +1381,9 @@ ovsdb_txn_row_create(struct ovsdb_txn *txn, struct ovsdb_table *table,
txn_row->n_refs = old ? old->n_refs : 0;
txn_row->serial = serial - 1;
+ ovs_list_init(&txn_row->added_refs);
+ ovs_list_init(&txn_row->deleted_refs);
+
if (old) {
old->txn_row = txn_row;
}
@@ -1423,12 +1592,20 @@ ovsdb_txn_history_run(struct ovsdb *db)
if (!db->need_txn_history) {
return;
}
- /* Remove old histories to limit the size of the history */
- while (db->n_txn_history > 100) {
+ /* Remove old histories to limit the size of the history. Removing until
+ * the number of ovsdb atoms in history becomes less than the number of
+ * atoms in the database, because it will be faster to just get a database
+ * snapshot than re-constructing changes from the history that big.
+ * Keeping at least one transaction to avoid sending UUID_ZERO as a last id
+ * if all entries got removed due to the size limit. */
+ while (db->n_txn_history > 1 &&
+ (db->n_txn_history > 100 ||
+ db->n_txn_history_atoms > db->n_atoms)) {
struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF(
ovs_list_pop_front(&db->txn_history),
struct ovsdb_txn_history_node, node);
+ db->n_txn_history_atoms -= txn_h_node->txn->n_atoms;
ovsdb_txn_destroy_cloned(txn_h_node->txn);
free(txn_h_node);
db->n_txn_history--;
@@ -1440,6 +1617,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history)
{
db->need_txn_history = need_txn_history;
db->n_txn_history = 0;
+ db->n_txn_history_atoms = 0;
ovs_list_init(&db->txn_history);
}
@@ -1458,4 +1636,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db)
free(txn_h_node);
}
db->n_txn_history = 0;
+ db->n_txn_history_atoms = 0;
}
diff --git a/python/ovs/db/data.py b/python/ovs/db/data.py
index 2a2102d6be..99bf80ed62 100644
--- a/python/ovs/db/data.py
+++ b/python/ovs/db/data.py
@@ -204,7 +204,7 @@ class Atom(object):
else:
return '.boolean = false'
elif self.type == ovs.db.types.StringType:
- return '.string = "%s"' % escapeCString(self.value)
+ return '.s = %s' % escapeCString(self.value)
elif self.type == ovs.db.types.UuidType:
return '.uuid = %s' % ovs.ovsuuid.to_c_assignment(self.value)
@@ -563,16 +563,41 @@ class Datum(object):
if n == 0:
return ["static struct ovsdb_datum %s = { .n = 0 };"]
- s = ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)]
- for key in sorted(self.values):
- s += [" { %s }," % key.cInitAtom(key)]
- s += ["};"]
+ s = []
+ if self.type.key.type == ovs.db.types.StringType:
+ s += ["static struct ovsdb_atom_string %s_key_strings[%d] = {"
+ % (name, n)]
+ for key in sorted(self.values):
+ s += [' { .string = "%s", .n_refs = 2 },'
+ % escapeCString(key.value)]
+ s += ["};"]
+ s += ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)]
+ for i in range(n):
+ s += [" { .s = &%s_key_strings[%d] }," % (name, i)]
+ s += ["};"]
+ else:
+ s = ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)]
+ for key in sorted(self.values):
+ s += [" { %s }," % key.cInitAtom(key)]
+ s += ["};"]
if self.type.value:
- s = ["static union ovsdb_atom %s_values[%d] = {" % (name, n)]
- for k, v in sorted(self.values.items()):
- s += [" { %s }," % v.cInitAtom(v)]
- s += ["};"]
+ if self.type.value.type == ovs.db.types.StringType:
+ s += ["static struct ovsdb_atom_string %s_val_strings[%d] = {"
+ % (name, n)]
+ for k, v in sorted(self.values):
+ s += [' { .string = "%s", .n_refs = 2 },'
+ % escapeCString(v.value)]
+ s += ["};"]
+ s += ["static union ovsdb_atom %s_values[%d] = {" % (name, n)]
+ for i in range(n):
+ s += [" { .s = &%s_val_strings[%d] }," % (name, i)]
+ s += ["};"]
+ else:
+ s = ["static union ovsdb_atom %s_values[%d] = {" % (name, n)]
+ for k, v in sorted(self.values.items()):
+ s += [" { %s }," % v.cInitAtom(v)]
+ s += ["};"]
s += ["static struct ovsdb_datum %s = {" % name]
s += [" .n = %d," % n]
diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
index ecae5e1432..87ee06cdef 100644
--- a/python/ovs/db/idl.py
+++ b/python/ovs/db/idl.py
@@ -1505,6 +1505,11 @@ class Transaction(object):
if self != self.idl.txn:
return self._status
+ if self.idl.state != Idl.IDL_S_MONITORING:
+ self._status = Transaction.TRY_AGAIN
+ self.__disassemble()
+ return self._status
+
# If we need a lock but don't have it, give up quickly.
if self.idl.lock_name and not self.idl.has_lock:
self._status = Transaction.NOT_LOCKED
diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py
index 626ae8fc44..3318a3b6f8 100644
--- a/python/ovs/db/types.py
+++ b/python/ovs/db/types.py
@@ -48,6 +48,16 @@ class AtomicType(object):
def to_string(self):
return self.name
+ def to_rvalue_string(self):
+ if self == StringType:
+ return 's->' + self.name
+ return self.name
+
+ def to_lvalue_string(self):
+ if self == StringType:
+ return 's'
+ return self.name
+
def to_json(self):
return self.name
@@ -373,18 +383,7 @@ class BaseType(object):
return "%(dst)s = *%(src)s;" % args
return ("%(dst)s = %(src)s->header_.uuid;") % args
elif self.type == StringType:
- return "%(dst)s = xstrdup(%(src)s);" % args
- else:
- return "%(dst)s = %(src)s;" % args
-
- def assign_c_value_casting_away_const(self, dst, src, refTable=True):
- args = {'dst': dst, 'src': src}
- if self.ref_table_name:
- if not refTable:
- return "%(dst)s = *%(src)s;" % args
- return ("%(dst)s = %(src)s->header_.uuid;") % args
- elif self.type == StringType:
- return "%(dst)s = CONST_CAST(char *, %(src)s);" % args
+ return "%(dst)s = ovsdb_atom_string_create(%(src)s);" % args
else:
return "%(dst)s = %(src)s;" % args
diff --git a/python/ovs/poller.py b/python/ovs/poller.py
index 3624ec8655..157719c3a4 100644
--- a/python/ovs/poller.py
+++ b/python/ovs/poller.py
@@ -26,9 +26,9 @@ if sys.platform == "win32":
import ovs.winutils as winutils
try:
- from OpenSSL import SSL
+ import ssl
except ImportError:
- SSL = None
+ ssl = None
try:
from eventlet import patcher as eventlet_patcher
@@ -73,7 +73,7 @@ class _SelectSelect(object):
def register(self, fd, events):
if isinstance(fd, socket.socket):
fd = fd.fileno()
- if SSL and isinstance(fd, SSL.Connection):
+ if ssl and isinstance(fd, ssl.SSLSocket):
fd = fd.fileno()
if sys.platform != 'win32':
diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py
index c4c6c87e9f..6b0d023ae3 100644
--- a/python/ovs/reconnect.py
+++ b/python/ovs/reconnect.py
@@ -44,7 +44,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return None
@staticmethod
@@ -56,7 +56,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return None
@staticmethod
@@ -68,7 +68,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return fsm.state_entered + fsm.backoff
@staticmethod
@@ -80,7 +80,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return fsm.state_entered + max(1000, fsm.backoff)
@staticmethod
@@ -92,13 +92,24 @@ class Reconnect(object):
is_connected = True
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
if fsm.probe_interval:
base = max(fsm.last_activity, fsm.state_entered)
expiration = base + fsm.probe_interval
- if (fsm.last_receive_attempt is None or
+ if (now < expiration or
+ fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
+ # We still have time before the expiration or the time has
+ # already passed and there was no activity. In the first
+ # case we need to wait for the expiration, in the second -
+ # we're already past the deadline. */
return expiration
+ else:
+ # Time has already passed, but we didn't attempt to receive
+ # anything. We need to wake up and try to receive even if
+ # nothing is pending, so we can update the expiration time
+ # or transition to a different state.
+ return now + 1
return None
@staticmethod
@@ -114,12 +125,15 @@ class Reconnect(object):
is_connected = True
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
if fsm.probe_interval:
expiration = fsm.state_entered + fsm.probe_interval
- if (fsm.last_receive_attempt is None or
+ if (now < expiration or
+ fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
return expiration
+ else:
+ return now + 1
return None
@staticmethod
@@ -134,7 +148,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return fsm.state_entered
@staticmethod
@@ -545,7 +559,7 @@ class Reconnect(object):
returned if the "probe interval" is nonzero--see
self.set_probe_interval())."""
- deadline = self.state.deadline(self)
+ deadline = self.state.deadline(self, now)
if deadline is not None and now >= deadline:
return self.state.run(self, now)
else:
@@ -562,7 +576,7 @@ class Reconnect(object):
"""Returns the number of milliseconds after which self.run() should be
called if nothing else notable happens in the meantime, or None if this
is currently unnecessary."""
- deadline = self.state.deadline(self)
+ deadline = self.state.deadline(self, now)
if deadline is not None:
remaining = deadline - now
return max(0, remaining)
diff --git a/python/ovs/socket_util.py b/python/ovs/socket_util.py
index 3faa64e9d7..651012bf06 100644
--- a/python/ovs/socket_util.py
+++ b/python/ovs/socket_util.py
@@ -222,8 +222,7 @@ def inet_parse_active(target, default_port):
return (host_name, port)
-def inet_open_active(style, target, default_port, dscp):
- address = inet_parse_active(target, default_port)
+def inet_create_socket_active(style, address):
try:
is_addr_inet = is_valid_ipv4_address(address[0])
if is_addr_inet:
@@ -235,23 +234,32 @@ def inet_open_active(style, target, default_port, dscp):
except socket.error as e:
return get_exception_errno(e), None
+ return family, sock
+
+
+def inet_connect_active(sock, address, family, dscp):
try:
set_nonblocking(sock)
set_dscp(sock, family, dscp)
- try:
- sock.connect(address)
- except socket.error as e:
- error = get_exception_errno(e)
- if sys.platform == 'win32' and error == errno.WSAEWOULDBLOCK:
- # WSAEWOULDBLOCK would be the equivalent on Windows
- # for EINPROGRESS on Unix.
- error = errno.EINPROGRESS
- if error != errno.EINPROGRESS:
- raise
- return 0, sock
+ error = sock.connect_ex(address)
+ if error not in (0, errno.EINPROGRESS, errno.EWOULDBLOCK):
+ sock.close()
+ return error
+ return 0
except socket.error as e:
sock.close()
- return get_exception_errno(e), None
+ return get_exception_errno(e)
+
+
+def inet_open_active(style, target, default_port, dscp):
+ address = inet_parse_active(target, default_port)
+ family, sock = inet_create_socket_active(style, address)
+ if sock is None:
+ return family, sock
+ error = inet_connect_active(sock, address, family, dscp)
+ if error:
+ return error, None
+ return 0, sock
def get_exception_errno(e):
diff --git a/python/ovs/stream.py b/python/ovs/stream.py
index f5a520862c..ac5b0fd0c6 100644
--- a/python/ovs/stream.py
+++ b/python/ovs/stream.py
@@ -22,9 +22,9 @@ import ovs.socket_util
import ovs.vlog
try:
- from OpenSSL import SSL
+ import ssl
except ImportError:
- SSL = None
+ ssl = None
if sys.platform == 'win32':
import ovs.winutils as winutils
@@ -322,6 +322,12 @@ class Stream(object):
The recv function will not block waiting for data to arrive. If no
data have been received, it returns (errno.EAGAIN, "") immediately."""
+ try:
+ return self._recv(n)
+ except socket.error as e:
+ return (ovs.socket_util.get_exception_errno(e), "")
+
+ def _recv(self, n):
retval = self.connect()
if retval != 0:
return (retval, "")
@@ -331,10 +337,7 @@ class Stream(object):
if sys.platform == 'win32' and self.socket is None:
return self.__recv_windows(n)
- try:
- return (0, self.socket.recv(n))
- except socket.error as e:
- return (ovs.socket_util.get_exception_errno(e), "")
+ return (0, self.socket.recv(n))
def __recv_windows(self, n):
if self._read_pending:
@@ -396,6 +399,12 @@ class Stream(object):
Will not block. If no bytes can be immediately accepted for
transmission, returns -errno.EAGAIN immediately."""
+ try:
+ return self._send(buf)
+ except socket.error as e:
+ return -ovs.socket_util.get_exception_errno(e)
+
+ def _send(self, buf):
retval = self.connect()
if retval != 0:
return -retval
@@ -409,10 +418,7 @@ class Stream(object):
if sys.platform == 'win32' and self.socket is None:
return self.__send_windows(buf)
- try:
- return self.socket.send(buf)
- except socket.error as e:
- return -ovs.socket_util.get_exception_errno(e)
+ return self.socket.send(buf)
def __send_windows(self, buf):
if self._write_pending:
@@ -769,35 +775,42 @@ class SSLStream(Stream):
def check_connection_completion(sock):
try:
return Stream.check_connection_completion(sock)
- except SSL.SysCallError as e:
+ except ssl.SSLSyscallError as e:
return ovs.socket_util.get_exception_errno(e)
@staticmethod
def needs_probes():
return True
- @staticmethod
- def verify_cb(conn, cert, errnum, depth, ok):
- return ok
-
@staticmethod
def _open(suffix, dscp):
- error, sock = TCPStream._open(suffix, dscp)
- if error:
- return error, None
+ address = ovs.socket_util.inet_parse_active(suffix, 0)
+ family, sock = ovs.socket_util.inet_create_socket_active(
+ socket.SOCK_STREAM, address)
+ if sock is None:
+ return family, sock
# Create an SSL context
- ctx = SSL.Context(SSL.SSLv23_METHOD)
- ctx.set_verify(SSL.VERIFY_PEER, SSLStream.verify_cb)
- ctx.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3)
+ ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ ctx.verify_mode = ssl.CERT_REQUIRED
+ ctx.options |= ssl.OP_NO_SSLv2
+ ctx.options |= ssl.OP_NO_SSLv3
# If the client has not set the SSL configuration files
# exception would be raised.
- ctx.use_privatekey_file(Stream._SSL_private_key_file)
- ctx.use_certificate_file(Stream._SSL_certificate_file)
ctx.load_verify_locations(Stream._SSL_ca_cert_file)
+ ctx.load_cert_chain(Stream._SSL_certificate_file,
+ Stream._SSL_private_key_file)
+ ssl_sock = ctx.wrap_socket(sock, do_handshake_on_connect=False)
- ssl_sock = SSL.Connection(ctx, sock)
- ssl_sock.set_connect_state()
+ # Connect
+ error = ovs.socket_util.inet_connect_active(ssl_sock, address, family,
+ dscp)
+ if not error:
+ try:
+ ssl_sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+ except socket.error as e:
+ ssl_sock.close()
+ return ovs.socket_util.get_exception_errno(e), None
return error, ssl_sock
def connect(self):
@@ -809,40 +822,44 @@ class SSLStream(Stream):
# TCP Connection is successful. Now do the SSL handshake
try:
self.socket.do_handshake()
- except SSL.WantReadError:
+ except ssl.SSLWantReadError:
return errno.EAGAIN
- except SSL.SysCallError as e:
+ except ssl.SSLSyscallError as e:
return ovs.socket_util.get_exception_errno(e)
return 0
def recv(self, n):
try:
- return super(SSLStream, self).recv(n)
- except SSL.WantReadError:
+ return super(SSLStream, self)._recv(n)
+ except ssl.SSLWantReadError:
return (errno.EAGAIN, "")
- except SSL.SysCallError as e:
+ except ssl.SSLSyscallError as e:
return (ovs.socket_util.get_exception_errno(e), "")
- except SSL.ZeroReturnError:
+ except ssl.SSLZeroReturnError:
return (0, "")
+ except socket.error as e:
+ return (ovs.socket_util.get_exception_errno(e), "")
def send(self, buf):
try:
- return super(SSLStream, self).send(buf)
- except SSL.WantWriteError:
+ return super(SSLStream, self)._send(buf)
+ except ssl.SSLWantWriteError:
return -errno.EAGAIN
- except SSL.SysCallError as e:
+ except ssl.SSLSyscallError as e:
+ return -ovs.socket_util.get_exception_errno(e)
+ except socket.error as e:
return -ovs.socket_util.get_exception_errno(e)
def close(self):
if self.socket:
try:
- self.socket.shutdown()
- except SSL.Error:
+ self.socket.shutdown(socket.SHUT_RDWR)
+ except socket.error:
pass
return super(SSLStream, self).close()
-if SSL:
+if ssl:
# Register SSL only if the OpenSSL module is available
Stream.register_method("ssl", SSLStream)
diff --git a/tests/alb.at b/tests/alb.at
index 903238fcb2..67eb14f473 100644
--- a/tests/alb.at
+++ b/tests/alb.at
@@ -86,6 +86,52 @@ OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([ALB - cross-numa])
+OVS_VSWITCHD_START([add-port br0 p0 \
+ -- set Interface p0 type=dummy-pmd options:n_rxq=4 \
+ -- set Interface p0 options:numa_id=0 \
+ -- set Open_vSwitch . other_config:pmd-cpu-mask=0x3 \
+ -- set open_vswitch . other_config:pmd-rxq-assign=group \
+ -- set open_vswitch . other_config:pmd-rxq-isolate=false \
+ -- set open_vswitch . other_config:pmd-auto-lb="true" \
+ -- set open_vswitch . other_config:pmd-auto-lb-load-threshold=0],
+ [], [], [--dummy-numa 1,2,1,2])
+OVS_WAIT_UNTIL([grep "PMD auto load balance is enabled" ovs-vswitchd.log])
+AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg])
+
+# no pinned rxqs - cross-numa pmd could change
+get_log_next_line_num
+ovs-appctl time/warp 600000 10000
+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."])
+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"])
+
+# all pinned rxqs - cross-numa pmd will not change
+AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1,3:1'])
+get_log_next_line_num
+ovs-appctl time/warp 600000 10000
+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."])
+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "Variance improvement 0%."])
+
+# mix of pinned (non-isolated) and non-pinned rxqs - cross-numa pmd could change
+AT_CHECK([ovs-vsctl remove Interface p0 other_config pmd-rxq-affinity])
+AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1'])
+get_log_next_line_num
+ovs-appctl time/warp 600000 10000
+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."])
+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"])
+
+# mix of pinned (isolated) and non-pinned rxqs - cross-numa pmd could change
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf])
+AT_CHECK([ovs-vsctl set Interface p0 options:n_rxq=6])
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-isolate=true])
+get_log_next_line_num
+ovs-appctl time/warp 600000 10000
+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."])
+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([ALB - PMD/RxQ assignment type])
OVS_VSWITCHD_START([add-port br0 p0 \
-- set Interface p0 type=dummy-pmd options:n_rxq=3 \
diff --git a/tests/classifier.at b/tests/classifier.at
index cdcd72c156..f652b59837 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -129,6 +129,31 @@ Datapath actions: 3
OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"])
AT_CLEANUP
+AT_SETUP([flow classifier - ipv6 ND dependency])
+OVS_VSWITCHD_START
+add_of_ports br0 1 2
+AT_DATA([flows.txt], [dnl
+ table=0,priority=100,ipv6,ipv6_src=1000::/10 actions=resubmit(,1)
+ table=0,priority=0 actions=NORMAL
+ table=1,priority=110,ipv6,ipv6_dst=1000::3 actions=resubmit(,2)
+ table=1,priority=100,ipv6,ipv6_dst=1000::4 actions=resubmit(,2)
+ table=1,priority=0 actions=NORMAL
+ table=2,priority=120,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1 actions=NORMAL
+ table=2,priority=100,tcp actions=NORMAL
+ table=2,priority=100,icmp6 actions=NORMAL
+ table=2,priority=0 actions=NORMAL
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+# test ICMPv6 echo request (which should have no nd_target field)
+AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=1,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,icmpv6_type=128,icmpv6_code=0"], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+ [Megaflow: recirc_id=0,eth,icmp6,in_port=1,dl_src=f6:d2:b0:19:5e:7b,dl_dst=d2:49:19:91:78:fe,ipv6_src=1000::/10,ipv6_dst=1000::4,nw_ttl=0,nw_frag=no
+Datapath actions: 100,2
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_BANNER([conjunctive match])
AT_SETUP([single conjunctive match])
diff --git a/tests/drop-stats.at b/tests/drop-stats.at
index f3e19cd83b..1d3af98dab 100644
--- a/tests/drop-stats.at
+++ b/tests/drop-stats.at
@@ -83,6 +83,9 @@ AT_CHECK([
ovs-ofctl -Oopenflow13 add-flows br0 flows.txt
ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions ], [0], [ignore])
+ovs-appctl time/warp 15000
+AT_CHECK([ovs-appctl revalidator/wait])
+
AT_CHECK([
ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'
], [0], [ignore])
diff --git a/tests/flowgen.py b/tests/flowgen.py
index 7ef32d13cb..cb0e9df388 100755
--- a/tests/flowgen.py
+++ b/tests/flowgen.py
@@ -135,7 +135,7 @@ def output(attrs):
12893) # urgent pointer
if attrs['TP_PROTO'] == 'TCP+options':
tcp = (tcp[:12]
- + struct.pack('H', (6 << 12) | 0x02 | 0x10)
+ + struct.pack('>H', (6 << 12) | 0x02 | 0x10)
+ tcp[14:])
tcp += struct.pack('>BBH', 2, 4, 1975) # MSS option
tcp += b'payload'
@@ -166,15 +166,15 @@ def output(attrs):
ip = ip[:2] + struct.pack('>H', len(ip)) + ip[4:]
packet += ip
if attrs['DL_HEADER'].startswith('802.2'):
- packet_len = len(packet)
+ packet_len = len(packet) - 14
if flow['DL_VLAN'] != 0xffff:
packet_len -= 4
packet = (packet[:len_ofs]
+ struct.pack('>H', packet_len)
+ packet[len_ofs + 2:])
- print(' '.join(['%s=%s' for k, v in attrs.items()]))
- print(' '.join(['%s=%s' for k, v in flow.items()]))
+ print(' '.join(['%s=%s' % (k, v) for k, v in attrs.items()]))
+ print(' '.join(['%s=%s' % (k, v) for k, v in flow.items()]))
print()
flows.write(struct.pack('>LH',
diff --git a/tests/library.at b/tests/library.at
index 1702b7556b..e27d9e8bce 100644
--- a/tests/library.at
+++ b/tests/library.at
@@ -247,7 +247,7 @@ AT_CHECK([ovstest test-ofpbuf], [0], [])
AT_CLEANUP
AT_SETUP([rcu])
-AT_CHECK([ovstest test-rcu-quiesce], [0], [])
+AT_CHECK([ovstest test-rcu], [0], [])
AT_CLEANUP
AT_SETUP([stopwatch module])
diff --git a/tests/mcast-snooping.at b/tests/mcast-snooping.at
index 757cf7186e..fe475e7b38 100644
--- a/tests/mcast-snooping.at
+++ b/tests/mcast-snooping.at
@@ -216,3 +216,70 @@ AT_CHECK([ovs-appctl mdb/show br0], [0], [dnl
])
AT_CLEANUP
+
+
+AT_SETUP([mcast - igmp flood for non-snoop enabled])
+OVS_VSWITCHD_START([])
+
+AT_CHECK([
+ ovs-vsctl set bridge br0 \
+ datapath_type=dummy], [0])
+
+add_of_ports br0 1 2
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+
+ovs-appctl time/stop
+
+dnl Basic scenario - needs to flood for IGMP followed by unicast ICMP
+dnl in reverse direction
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+ '0101000c29a0aa55aa550001080046c00028000040000102d3494565eb4ae0000016940400002200f9020000000104000000e00000fb000000000000'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p2 \
+ 'aa55aa5500010101000c29a008004500001c00010000400164dc0a0101010a0101020800f7ffffffffff'])
+
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -e .*ipv4 | sort | dnl
+ strip_stats | strip_used | strip_recirc | dnl
+ sed -e 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'],
+ [0], [dnl
+recirc_id(<recirc>),in_port(1),eth(src=aa:55:aa:55:00:01,dst=01:01:00:0c:29:a0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:100,2
+recirc_id(<recirc>),in_port(2),eth(src=01:01:00:0c:29:a0,dst=aa:55:aa:55:00:01),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:1
+])
+
+ovs-appctl time/warp 100000
+
+dnl Next we should clear the flows and install a complex case
+AT_CHECK([ovs-ofctl del-flows br0])
+
+AT_DATA([flows.txt], [dnl
+table=0, arp actions=NORMAL
+table=0, ip,in_port=1 actions=ct(table=1,zone=64000)
+table=0, in_port=2 actions=output:1
+table=1, ip,ct_state=+trk+inv actions=drop
+table=1 ip,in_port=1,icmp,ct_state=+trk+new actions=output:2
+table=1, in_port=1,ip,ct_state=+trk+new actions=controller(userdata=00.de.ad.be.ef.ca.fe.01)
+table=1, in_port=1,ip,ct_state=+trk+est actions=output:2
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+ovs-appctl time/warp 100000
+
+dnl Send the IGMP, followed by a unicast ICMP - ensure we won't black hole
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+ '0101000c29a0aa55aa550001080046c00028000040000102d3494565eb4ae0000016940400002200f9020000000104000000e00000fb000000000000'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \
+ 'aa55aa550001aa55aa55000208004500001c00010000400164dc0a0101010a0101020800f7ffffffffff'])
+
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -e .*ipv4 | sort | dnl
+ strip_stats | strip_used | strip_recirc | dnl
+ sed 's/pid=[[0-9]]*,//
+ s/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'],
+ [0], [dnl
+ct_state(+new-inv+trk),recirc_id(<recirc>),in_port(1),eth_type(0x0800),ipv4(proto=1,frag=no), packets:0, bytes:0, used:never, actions:2
+ct_state(+new-inv+trk),recirc_id(<recirc>),in_port(1),eth_type(0x0800),ipv4(proto=2,frag=no), packets:0, bytes:0, used:never, actions:userspace(controller(reason=1,dont_send=0,continuation=0,recirc_id=<recirc>,rule_cookie=0,controller_id=0,max_len=65535))
+recirc_id(<recirc>),in_port(1),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:0.0s, actions:ct(zone=64000),recirc(<recirc>)
+])
+
+AT_CLEANUP
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 956a69e1fa..43cded03b8 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -81,11 +81,12 @@ recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff:
ovs-appctl netdev-dummy/set-admin-state p1 up
ovs-appctl time/warp 100
-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl
---- bond0 ----
bond_mode: active-backup
bond may use recirculation: no, <del>
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: off
@@ -99,7 +100,6 @@ member p1: enabled
member p2: enabled
may_enable: true
-
])
OVS_VSWITCHD_STOP
@@ -129,11 +129,12 @@ ovs-appctl time/warp 100
OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'member p1: disabled'`"])
ovs-appctl netdev-dummy/set-admin-state p1 up
ovs-appctl time/warp 100
-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl
---- bond0 ----
bond_mode: active-backup
bond may use recirculation: no, <del>
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: off
@@ -150,7 +151,6 @@ member p2: enabled
member p3: enabled
may_enable: true
-
])
dnl Now delete the primary and verify that the output shows that the
@@ -171,11 +171,12 @@ ovs-vsctl \
--id=@p1 create Interface name=p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
set Port bond0 interfaces="$uuids, @p1]"
ovs-appctl time/warp 100
-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl
---- bond0 ----
bond_mode: active-backup
bond may use recirculation: no, <del>
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: off
@@ -192,17 +193,17 @@ member p2: enabled
member p3: enabled
may_enable: true
-
])
dnl Switch to another primary
ovs-vsctl set port bond0 other_config:bond-primary=p2
ovs-appctl time/warp 100
-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl
---- bond0 ----
bond_mode: active-backup
bond may use recirculation: no, <del>
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: off
@@ -211,25 +212,25 @@ active-backup primary: p2
<active member mac del>
member p1: enabled
- active member
may_enable: true
member p2: enabled
+ active member
may_enable: true
member p3: enabled
may_enable: true
-
])
dnl Remove the "bond-primary" config directive from the bond.
AT_CHECK([ovs-vsctl remove Port bond0 other_config bond-primary])
ovs-appctl time/warp 100
-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl
---- bond0 ----
bond_mode: active-backup
bond may use recirculation: no, <del>
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: off
@@ -238,15 +239,14 @@ active-backup primary: <none>
<active member mac del>
member p1: enabled
- active member
may_enable: true
member p2: enabled
+ active member
may_enable: true
member p3: enabled
may_enable: true
-
])
OVS_VSWITCHD_STOP
@@ -4862,6 +4862,54 @@ recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,fr
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([ofproto-dpif - handling of malformed TCP packets])
+OVS_VSWITCHD_START
+add_of_ports br0 1 90
+
+dnl drop packet has tcp port 0-f but allow other tcp packets
+AT_DATA([flows.txt], [dnl
+priority=75 tcp tp_dst=0/0xfff0 actions=drop
+priority=50 tcp actions=output:1
+])
+AT_CHECK([ovs-ofctl replace-flows br0 flows.txt])
+
+dnl good tcp pkt, tcp(sport=100,dpor=16)
+pkt1="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a141414140064001000000000000000005002200053330000"
+
+dnl malformed tcp pkt(tcp_hdr < 20 byte), tcp(sport=100,dport=16,dataofs=1)
+pkt2="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a141414140064001000000000000000001002200093330000"
+
+dnl malformed tcp pkt(tcp_hdr > pkt_len), tcp(sport=100,dport=16,dataofs=15)
+pkt3="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a14141414006400100000000000000000f002200093330000"
+
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
+
+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt1"], [0], [stdout])
+dnl for good tcp pkt, ovs can extract the tp_dst=16
+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=16/0xfff0), packets:0, bytes:0, used:never, actions:1
+])
+
+AT_CHECK([ovs-appctl revalidator/purge], [0], [stdout])
+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt2"], [0], [stdout])
+dnl for malformed tcp pkt(tcp_hdr < 20 byte), ovs uses default value tp_dst=0
+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=0/0xfff0), packets:0, bytes:0, used:never, actions:drop
+])
+
+AT_CHECK([ovs-appctl revalidator/purge], [0], [stdout])
+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt3"], [0], [stdout])
+dnl for malformed tcp pkt(tcp_hdr > pkt_len), ovs uses default value tp_dst=0
+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=0/0xfff0), packets:0, bytes:0, used:never, actions:drop
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([ofproto-dpif - exit])
OVS_VSWITCHD_START
add_of_ports br0 1 2 3 10 11 12 13 14
@@ -5525,7 +5573,36 @@ check_flows () {
echo "n_packets=$n"
test "$n" = 1
}
-OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0])
+OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+# Checks for regression against a bug in which OVS crashed
+# with in_port=OFPP_NONE or in_port=OFPP_CONTROLLER and
+# recirculation is involved.
+AT_SETUP([ofproto-dpif - packet-out recirculation with OFPP_NONE and OFPP_CONTROLLER])
+OVS_VSWITCHD_START
+add_of_ports br0 1 2
+
+AT_DATA([flows.txt], [dnl
+table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1)
+table=1 ip actions=ct(commit),normal
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=none,packet=$packet actions=table"])
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller,packet=$packet actions=table"])
+
+# Dumps out the flow table, extracts the number of packets that have gone
+# through the (single) flow in table 1, and returns success if it's exactly 2.
+check_flows () {
+ n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p')
+ echo "n_packets=$n"
+ test "$n" = 2
+}
+OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0])
OVS_VSWITCHD_STOP
AT_CLEANUP
@@ -7524,13 +7601,28 @@ dnl configure bridge IPFIX and ensure that sample action generation works at the
dnl datapath level.
AT_SETUP([ofproto-dpif - Bridge IPFIX sanity check])
OVS_VSWITCHD_START
-add_of_ports br0 1 2
+dnl first revalidation triggered by add interface
+AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl
+1
+])
+
+add_of_ports br0 1 2 3
+AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl
+2
+])
dnl Sample every packet using bridge-based sampling.
AT_CHECK([ovs-vsctl -- set bridge br0 ipfix=@fix -- \
--id=@fix create ipfix targets=\"127.0.0.1:4739\" \
- sampling=1], [0], [ignore])
+ sampling=2], [0], [ignore])
+AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl
+3
+])
+AT_CHECK([ovs-vsctl set ipfix `ovs-vsctl get bridge br0 ipfix` sampling=1], [0])
+AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl
+4
+])
dnl Send some packets that should be sampled.
for i in `seq 1 3`; do
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800)'])
@@ -7540,6 +7632,28 @@ flow-dump from the main thread:
packets:2, bytes:68, used:0.001s, actions:userspace(pid=0,ipfix(output_port=4294967295))
])
+AT_CHECK([ovs-appctl revalidator/purge])
+
+dnl Check sample is performed even if only one of the ports is present.
+AT_DATA([flows.txt], [dnl
+table=0,in_port=3,tcp actions=load:0xffff->NXM_OF_IN_PORT[],ct(zone=1,table=1)
+table=1,tcp, actions=output:2
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+for i in `seq 1 3`; do
+ AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:08,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no)'])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/.*\(packets:\)/\1/' | sed 's/used:[[0-9]].[[0-9]]*s/used:0.001s/'], [0], [dnl
+flow-dump from the main thread:
+packets:2, bytes:236, used:0.001s, actions:userspace(pid=0,ipfix(output_port=2)),2
+packets:2, bytes:236, used:0.001s, actions:userspace(pid=0,ipfix(output_port=4294967295)),ct(zone=1),recirc(0x1)
+])
+
+AT_CHECK([ovs-ofctl del-flows br0 in_port=3])
+AT_CHECK([ovs-ofctl del-flows br0 table=1])
+
AT_CHECK([ovs-appctl revalidator/purge])
dnl
dnl Add a slowpath meter. The userspace action should be metered.
@@ -8591,6 +8705,34 @@ AT_CHECK([sed -n 's/=[[0-9]][[0-9]]\(\.[[0-9]][[0-9]]*\)\{0,1\}s/=?s/p' stdout],
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - patch ports - meter (clone)])
+
+OVS_VSWITCHD_START(
+ [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 -- \
+ add-port br0 p1 -- set Interface p1 type=patch \
+ options:peer=p2 ofport_request=2 -- \
+ add-br br1 -- \
+ set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+ set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+ fail-mode=secure -- \
+ add-port br1 p2 -- set Interface p2 type=patch \
+ options:peer=p1 -- \
+ add-port br1 p3 -- set Interface p3 type=dummy ofport_request=3])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br1 'meter=1 pktps stats bands=type=drop rate=2'])
+AT_CHECK([ovs-ofctl del-flows br0])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 in_port=local,ip,actions=2,1])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 in_port=1,ip,actions=meter:1,3])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=10.1.1.22,dst=10.0.0.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=53295,dst=8080)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(meter(0),3),1
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
dnl ----------------------------------------------------------------------
AT_BANNER([ofproto-dpif -- megaflows])
@@ -9695,6 +9837,26 @@ OFPST_TABLE reply (OF1.3) (xid=0x2):
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([ofproto-dpif packet-out table meter drop])
+OVS_VSWITCHD_START
+add_of_ports br0 1 2
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps bands=type=drop rate=1'])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 'in_port=1 action=meter:1,output:2'])
+
+ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)"
+ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)"
+
+# Check that vswitchd hasn't crashed by dumping the meter added above
+AT_CHECK([ovs-ofctl -O OpenFlow13 dump-meters br0 | ofctl_strip], [0], [dnl
+OFPST_METER_CONFIG reply (OF1.3):
+meter=1 pktps bands=
+type=drop rate=1
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([ofproto-dpif - ICMPv6])
OVS_VSWITCHD_START
add_of_ports br0 1
@@ -11404,6 +11566,23 @@ Megaflow: recirc_id=0x3,eth,ip,in_port=1,nw_frag=no
Datapath actions: 4
])
+ovs-ofctl del-flows br0
+
+AT_DATA([flows.txt], [dnl
+table=0,in_port=1 actions=load:0x1->NXM_NX_REG1[[]],resubmit(,1),load:0x2->NXM_NX_REG1[[]],resubmit(,1),load:0x3->NXM_NX_REG1[[]],resubmit(,1)
+table=1,in_port=1,reg1=0x1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,4)
+table=1,in_port=1,reg1=0x2 actions=output:2
+table=1,in_port=1,reg1=0x3 actions=output:4
+table=4,in_port=1 actions=output:3
+])
+
+AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([cat stdout | grep Datapath -B1], [0], [dnl
+Megaflow: recirc_id=0,eth,ip,in_port=1,nw_frag=no
+Datapath actions: check_pkt_len(size=200,gt(3),le(3)),2,4
+])
+
OVS_VSWITCHD_STOP
AT_CLEANUP
diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at
index 736d9809cb..f906b5c3b5 100644
--- a/tests/ofproto-macros.at
+++ b/tests/ofproto-macros.at
@@ -134,6 +134,21 @@ strip_ufid () {
sed 's/mega_ufid:[[-0-9a-f]]* //
s/ufid:[[-0-9a-f]]* //'
}
+
+# Strips packets: and bytes: from output
+strip_stats () {
+ sed 's/packets:[[0-9]]*/packets:0/
+ s/bytes:[[0-9]]*/bytes:0/'
+}
+
+# Changes all 'recirc(...)' and 'recirc=...' to say 'recirc(<recirc_id>)' and
+# 'recirc=<recirc_id>' respectively. This should make output easier to
+# compare.
+strip_recirc() {
+ sed 's/recirc_id([[x0-9]]*)/recirc_id(<recirc>)/
+ s/recirc_id=[[x0-9]]*/recirc_id=<recirc>/
+ s/recirc([[x0-9]]*)/recirc(<recirc>)/'
+}
m4_divert_pop([PREPARE_TESTS])
m4_define([TESTABLE_LOG], [-vPATTERN:ANY:'%c|%p|%m'])
diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at
index 66545da572..e6c5bc6e94 100644
--- a/tests/ovs-macros.at
+++ b/tests/ovs-macros.at
@@ -259,7 +259,20 @@ dnl Executes shell COMMAND in a loop until it returns zero. If COMMAND does
dnl not return zero within a reasonable time limit, executes the commands
dnl in IF-FAILED (if provided) and fails the test.
m4_define([OVS_WAIT_UNTIL],
- [OVS_WAIT([$1], [$2], [AT_LINE], [until $1])])
+ [AT_FAIL_IF([test "$#" -ge 3])
+ dnl The second argument should not be a number (confused with AT_CHECK ?).
+ AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null])
+ OVS_WAIT([$1], [$2], [AT_LINE], [until $1])])
+
+dnl OVS_WAIT_UNTIL_EQUAL(COMMAND, OUTPUT)
+dnl
+dnl Executes shell COMMAND in a loop until it returns zero and the output
+dnl equals OUTPUT. If COMMAND does not return zero or a desired output within
+dnl a reasonable time limit, fails the test.
+m4_define([OVS_WAIT_UNTIL_EQUAL],
+ [AT_FAIL_IF([test "$#" -ge 3])
+ echo "$2" > wait_until_expected
+ OVS_WAIT_UNTIL([$1 | diff -u wait_until_expected - ])])
dnl OVS_WAIT_WHILE(COMMAND, [IF-FAILED])
dnl
@@ -267,7 +280,10 @@ dnl Executes shell COMMAND in a loop until it returns nonzero. If COMMAND does
dnl not return nonzero within a reasonable time limit, executes the commands
dnl in IF-FAILED (if provided) and fails the test.
m4_define([OVS_WAIT_WHILE],
- [OVS_WAIT([if $1; then return 1; else return 0; fi], [$2],
+ [AT_FAIL_IF([test "$#" -ge 3])
+ dnl The second argument should not be a number (confused with AT_CHECK ?).
+ AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null])
+ OVS_WAIT([if $1; then return 1; else return 0; fi], [$2],
[AT_LINE], [while $1])])
dnl OVS_APP_EXIT_AND_WAIT(DAEMON)
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index 604f15c2d1..c93cb9f16c 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -3246,3 +3246,22 @@ dnl because we need ovs-vswitchd to have the controller config before starting
dnl the controller to 'snoop' the OpenFlow messages from beginning
OVS_VSWITCHD_STOP(["/connection failed (No such file or directory)/d"])
AT_CLEANUP
+
+
+AT_SETUP([ovs-ofctl show-flows - Oversized flow])
+OVS_VSWITCHD_START
+
+printf " priority=90,icmp,reg15=0x8005,metadata=0x1,nw_dst=11.0.0.1,icmp_type=8,icmp_code=0 actions=" > flow.txt
+for i in `seq 1 1022`; do printf "set_field:0x399->reg13,set_field:0x$i->reg15,resubmit(,39),"; done >> flow.txt
+printf "resubmit(,39)\n" >> flow.txt
+
+AT_CHECK([ovs-ofctl -O OpenFlow15 add-flows br0 flow.txt])
+
+AT_CHECK([ovs-ofctl -O OpenFlow10 dump-flows br0 | ofctl_strip | sed '/NXST_FLOW/d' | sort], [0], [])
+OVS_WAIT_UNTIL([grep -q "ofp_flow|WARN|Flow exceeded the maximum flow statistics reply size and was excluded from the response set" ovs-vswitchd.log])
+
+cat flow.txt > expout
+AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sed '/OFPST_FLOW/d' | sort], [0], [expout])
+
+OVS_VSWITCHD_STOP(["/Flow exceeded the maximum flow statistics reply size and was excluded from the response set/d"])
+AT_CLEANUP
diff --git a/tests/ovsdb-client.at b/tests/ovsdb-client.at
index 06b671df8c..2d14f1ac26 100644
--- a/tests/ovsdb-client.at
+++ b/tests/ovsdb-client.at
@@ -3,6 +3,7 @@ AT_BANNER([OVSDB -- ovsdb-client commands])
AT_SETUP([ovsdb-client get-schema-version and get-schema-cksum])
AT_KEYWORDS([ovsdb client positive])
ordinal_schema > schema
+on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore])
AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore])
AT_CHECK([ovsdb-client get-schema-version unix:socket ordinals], [0], [5.1.3
@@ -14,6 +15,7 @@ AT_CLEANUP
AT_SETUP([ovsdb-client needs-conversion (no conversion needed)])
AT_KEYWORDS([ovsdb client file positive])
+on_exit 'kill `cat *.pid`'
ordinal_schema > schema
touch .db.~lock~
AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore])
@@ -27,6 +29,7 @@ AT_SETUP([ovsdb-client needs-conversion (conversion needed)])
AT_KEYWORDS([ovsdb client file positive])
ordinal_schema > schema
touch .db.~lock~
+on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore])
AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore])
sed 's/5\.1\.3/5.1.4/' < schema > schema2
diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
index fc6253cfe9..0f7076a052 100644
--- a/tests/ovsdb-cluster.at
+++ b/tests/ovsdb-cluster.at
@@ -400,6 +400,61 @@ done
AT_CLEANUP
+AT_BANNER([OVSDB - cluster failure while joining])
+AT_SETUP([OVSDB cluster - follower crash while joining])
+AT_KEYWORDS([ovsdb server negative unix cluster join])
+
+n=3
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+ordinal_schema > schema
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db dnl
+ $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=`ovsdb-tool db-cid s1.db`
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+for i in `seq 2 $n`; do
+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill `cat *.pid`'
+
+dnl Starting followers first, so we can configure them to crash on join.
+for j in `seq $n`; do
+ i=$(($n + 1 - $j))
+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl
+ --detach --no-chdir --log-file=s$i.log dnl
+ --pidfile=s$i.pid --unixctl=s$i dnl
+ --remote=punix:s$i.ovsdb s$i.db])
+ if test $i != 1; then
+ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i dnl
+ cluster/failure-test crash-before-sending-install-snapshot-reply dnl
+ | grep -q "engaged"])
+ fi
+done
+
+dnl Make sure that followers really crashed.
+for i in `seq 2 $n`; do
+ OVS_WAIT_WHILE([test -s s$i.pid])
+done
+
+dnl Bring them back.
+for i in `seq 2 $n`; do
+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl
+ --detach --no-chdir --log-file=s$i.log dnl
+ --pidfile=s$i.pid --unixctl=s$i dnl
+ --remote=punix:s$i.ovsdb s$i.db])
+done
+
+dnl Make sure that all servers joined the cluster.
+for i in `seq $n`; do
+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+for i in `seq $n`; do
+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+done
+
+AT_CLEANUP
+
OVS_START_SHELL_HELPERS
@@ -413,12 +468,12 @@ ovsdb_cluster_failure_test () {
if test "$crash_node" == "1"; then
new_leader=$5
fi
+ log_grep=$6
cp $top_srcdir/vswitchd/vswitch.ovsschema schema
schema=`ovsdb-tool schema-name schema`
- AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
-ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns
-])
+ AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr])
+ AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"])
n=3
join_cluster() {
@@ -434,7 +489,7 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral
start_server() {
local i=$1
printf "\ns$i: starting\n"
- AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+ AT_CHECK([ovsdb-server -vjsonrpc -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
}
connect_server() {
local i=$1
@@ -460,14 +515,23 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral
fi
AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore])
fi
+
+ # Initializing the database separately to avoid extra 'wait' operation
+ # in later transactions.
+ AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait init], [0], [ignore], [ignore])
+
AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore])
AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait create QoS type=x], [0], [ignore], [ignore])
- # Make sure that the node really crashed.
- AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore])
- # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed).
- if test "$remote_1" = "$crash_node"; then
- db=unix:s$remote_2.ovsdb
+ # Make sure that the node really crashed or has specific log message.
+ if test -z "$log_grep"; then
+ AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore])
+ # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed).
+ if test "$remote_1" = "$crash_node"; then
+ db=unix:s$remote_2.ovsdb
+ fi
+ else
+ OVS_WAIT_UNTIL([grep -q "$log_grep" s${crash_node}.log])
fi
AT_CHECK([ovs-vsctl --db="$db" --no-leader-only --no-wait --columns=type --bare list QoS], [0], [x
])
@@ -563,6 +627,11 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update
AT_CLEANUP
+AT_SETUP([OVSDB cluster - txn on leader, leader transfers leadership after sending appendReq])
+AT_KEYWORDS([ovsdb server negative unix cluster pending-txn transfer])
+ovsdb_cluster_failure_test 1 2 1 transfer-leadership-after-sending-append-request -1 "Transferring leadership"
+AT_CLEANUP
+
AT_SETUP([OVSDB cluster - competing candidates])
AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates])
@@ -629,9 +698,8 @@ ovsdb_torture_test () {
local variant=$3 # 'kill' and restart or 'remove' and add
cp $top_srcdir/vswitchd/vswitch.ovsschema schema
schema=`ovsdb-tool schema-name schema`
- AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl
-ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns
-])
+ AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr])
+ AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"])
join_cluster() {
local i=$1
diff --git a/tests/ovsdb-data.at b/tests/ovsdb-data.at
index 8cd2a26cb3..25c6acdac6 100644
--- a/tests/ovsdb-data.at
+++ b/tests/ovsdb-data.at
@@ -846,18 +846,21 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- integer],
[[diff-data '["integer"]' '[0]' '[2]']],
[[diff: 2
apply diff: 2
+apply diff in place: 2
OK]])
OVSDB_CHECK_POSITIVE([generate and apply diff -- boolean],
[[diff-data '["boolean"]' '[true]' '[false]']],
[[diff: false
apply diff: false
+apply diff in place: false
OK]])
OVSDB_CHECK_POSITIVE([generate and apply diff -- string],
[[diff-data '["string"]' '["AAA"]' '["BBB"]']],
[[diff: "BBB"
apply diff: "BBB"
+apply diff in place: "BBB"
OK]])
dnl Test set modifications.
@@ -870,15 +873,19 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- set],
]],
[[diff: ["set",[0,2]]
apply diff: ["set",[1,2]]
+apply diff in place: ["set",[1,2]]
OK
diff: 0
apply diff: 1
+apply diff in place: 1
OK
diff: ["set",[0,1]]
apply diff: ["set",[0,1]]
+apply diff in place: ["set",[0,1]]
OK
diff: ["set",[0,1]]
apply diff: ["set",[]]
+apply diff in place: ["set",[]]
OK]])
dnl Test set modifications causes data to violate set size constrain.
@@ -898,18 +905,23 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- map],
]],
[[diff: ["map",[["2 gills","1 chopin"],["2 pints","1 quart"]]]
apply diff: ["map",[["2 pints","1 quart"]]]
+apply diff in place: ["map",[["2 pints","1 quart"]]]
OK
diff: ["map",[]]
apply diff: ["map",[["2 gills","1 chopin"]]]
+apply diff in place: ["map",[["2 gills","1 chopin"]]]
OK
diff: ["map",[["2 gills","1 chopin"]]]
apply diff: ["map",[]]
+apply diff in place: ["map",[]]
OK
diff: ["map",[["2 pints","1 quart"]]]
apply diff: ["map",[["2 pints","1 quart"]]]
+apply diff in place: ["map",[["2 pints","1 quart"]]]
OK
diff: ["map",[["2 gills","1 gallon"]]]
apply diff: ["map",[["2 gills","1 gallon"]]]
+apply diff in place: ["map",[["2 gills","1 gallon"]]]
OK]])
OVSDB_CHECK_NEGATIVE([generate and apply diff with map -- size error],
diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at
index 1386f13770..91d34d0de6 100644
--- a/tests/ovsdb-idl.at
+++ b/tests/ovsdb-idl.at
@@ -225,7 +225,7 @@ m4_define([OVSDB_CHECK_IDL_TCP6_MULTIPLE_REMOTES_PY],
m4_define([OVSDB_CHECK_IDL_SSL_PY],
[AT_SETUP([$1 - Python3 - SSL])
AT_SKIP_IF([test "$HAVE_OPENSSL" = no])
- $PYTHON3 -c "import OpenSSL.SSL"
+ $PYTHON3 -c "import ssl"
SSL_PRESENT=$?
AT_SKIP_IF([test $SSL_PRESENT != 0])
AT_KEYWORDS([ovsdb server idl positive Python with ssl socket $5])
@@ -2309,7 +2309,7 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect],
'condition simple [["i","==",2]]' \
'condition simple [["i","==",1]]' \
'+reconnect' \
- '["idltest",
+ '?["idltest",
{"op": "update",
"table": "simple",
"where": [["i", "==", 1]],
@@ -2320,7 +2320,7 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect],
003: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
004: change conditions
005: reconnect
-006: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
+006: table simple
007: {"error":null,"result":[{"count":1}]}
008: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
009: done
diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at
index ac243d6a79..e672c13b27 100644
--- a/tests/ovsdb-server.at
+++ b/tests/ovsdb-server.at
@@ -4,7 +4,7 @@ m4_define([OVSDB_SERVER_SHUTDOWN],
[OVS_APP_EXIT_AND_WAIT_BY_TARGET([ovsdb-server], [ovsdb-server.pid])])
m4_define([OVSDB_SERVER_SHUTDOWN_N],
- [cp pid$1 savepid$1
+ [cp $1.pid savepid$1
AT_CHECK([ovs-appctl -t "`pwd`"/unixctl$1 -e exit], [0], [ignore], [ignore])
OVS_WAIT_WHILE([kill -0 `cat savepid$1`], [kill `cat savepid$1`])])
@@ -30,14 +30,13 @@ m4_define([OVSDB_CHECK_EXECUTION],
AT_KEYWORDS([ovsdb server positive unix $5])
$2 > schema
AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore])
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore])
m4_foreach([txn], [$3],
- [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore],
- [test ! -e pid || kill `cat pid`])
+ [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore])
cat stdout >> output
])
- AT_CHECK([uuidfilt output], [0], [$4], [ignore],
- [test ! -e pid || kill `cat pid`])
+ AT_CHECK([uuidfilt output], [0], [$4], [ignore])
OVSDB_SERVER_SHUTDOWN
AT_CLEANUP])
@@ -88,8 +87,7 @@ AT_CHECK([uuidfilt output], [0],
[[[{"uuid":["uuid","<0>"]}]
[{"uuid":["uuid","<1>"]}]
[{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}]
-]], [],
- [test ! -e pid || kill `cat pid`])
+]], [])
AT_CLEANUP
AT_SETUP([truncating database log with bad transaction])
@@ -136,8 +134,7 @@ AT_CHECK([uuidfilt output], [0],
[[[{"uuid":["uuid","<0>"]}]
[{"uuid":["uuid","<1>"]}]
[{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}]
-]], [],
- [test ! -e pid || kill `cat pid`])
+]], [])
AT_CLEANUP
dnl CHECK_DBS([databases])
@@ -159,6 +156,7 @@ ordinal_schema > schema1
constraint_schema > schema2
AT_CHECK([ovsdb-tool create db1 schema1], [0], [ignore], [ignore])
AT_CHECK([ovsdb-tool create db2 schema2], [0], [ignore], [ignore])
+on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:db.sock db1 db2], [0], [ignore], [ignore])
CHECK_DBS([constraints
ordinals
@@ -166,7 +164,7 @@ ordinals
AT_CHECK(
[[ovstest test-jsonrpc request unix:db.sock get_schema [\"nonexistent\"]]], [0],
[[{"error":{"details":"get_schema request specifies unknown database nonexistent","error":"unknown database","syntax":"[\"nonexistent\"]"},"id":0,"result":null}
-]], [], [test ! -e pid || kill `cat pid`])
+]], [])
OVSDB_SERVER_SHUTDOWN
AT_CLEANUP
@@ -393,7 +391,7 @@ AT_CHECK(
"table": "Manager",
"uuid-name": "x",
"row": {"target": "punix:socket2"}}]']], [0], [ignore], [ignore])
-on_exit 'kill `cat ovsdb-server.pid`'
+on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore])
ovs-appctl -t ovsdb-server time/warp 6000 1000
AT_CHECK(
@@ -686,6 +684,7 @@ ovsdb_check_online_compaction() {
ovsdb-tool create-cluster db schema unix:s1.raft
fi])
dnl Start ovsdb-server.
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-server -vvlog:off -vconsole:off --detach --no-chdir --pidfile --remote=punix:socket --log-file db], [0])
AT_CHECK([ovsdb_client_wait unix:socket ordinals connected])
AT_CAPTURE_FILE([ovsdb-server.log])
@@ -832,7 +831,7 @@ _uuid name number
<0> five 5
<1> four 4
<2> three 3
-], [], [test ! -e pid || kill `cat pid`])
+], [])
OVSDB_SERVER_SHUTDOWN
}
OVS_END_SHELL_HELPERS
@@ -1228,6 +1227,71 @@ AT_CHECK([test $logged_updates -lt $logged_nonblock_updates])
AT_CHECK_UNQUOTED([ovs-vsctl get open_vswitch . system_version], [0],
[xyzzy$counter
])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+AT_CLEANUP
+
+AT_SETUP([ovsdb-server transaction history size])
+on_exit 'kill `cat *.pid`'
+
+dnl Start an ovsdb-server with the clustered vswitchd schema.
+AT_CHECK([ovsdb-tool create-cluster db dnl
+ $abs_top_srcdir/vswitchd/vswitch.ovsschema unix:s1.raft],
+ [0], [ignore], [ignore])
+AT_CHECK([ovsdb-server --detach --no-chdir --pidfile dnl
+ --log-file --remote=punix:db.sock db],
+ [0], [ignore], [ignore])
+AT_CHECK([ovs-vsctl --no-wait init])
+
+dnl Create a bridge with N ports per transaction. Increase N every 4
+dnl iterations. And then remove the bridges. By increasing the size of
+dnl transactions, ensuring that they take up a significant percentage of
+dnl the total database size, so the transaction history will not be able
+dnl to hold all of them.
+dnl
+dnl The test verifies that the number of atoms in the transaction history
+dnl is always less than the number of atoms in the database, except for
+dnl a case where there is only one transaction in a history.
+get_memory_value () {
+ n=$(ovs-appctl -t ovsdb-server memory/show dnl
+ | tr ' ' '\n' | grep "^$1:" | cut -d ':' -f 2)
+ if test X"$n" == "X"; then
+ n=0
+ fi
+ echo $n
+}
+
+check_atoms () {
+ if test $(get_memory_value txn-history) -eq 1; then return; fi
+ n_db_atoms=$(get_memory_value atoms)
+ n_txn_history_atoms=$(get_memory_value txn-history-atoms)
+ echo "n_db_atoms: $n_db_atoms"
+ echo "n_txn_history_atoms: $n_txn_history_atoms"
+ AT_CHECK([test $n_txn_history_atoms -le $n_db_atoms])
+}
+
+add_ports () {
+ for j in $(seq 1 $2); do
+ printf " -- add-port br$1 p$1-%d" $j
+ done
+}
+
+initial_db_atoms=$(get_memory_value atoms)
+
+for i in $(seq 1 100); do
+ cmd=$(add_ports $i $(($i / 4 + 1)))
+ AT_CHECK([ovs-vsctl --no-wait add-br br$i $cmd])
+ check_atoms
+done
+
+for i in $(seq 1 100); do
+ AT_CHECK([ovs-vsctl --no-wait del-br br$i])
+ check_atoms
+done
+
+dnl After removing all the bridges, the number of atoms in the database
+dnl should return to its initial value.
+AT_CHECK([test $(get_memory_value atoms) -eq $initial_db_atoms])
+
OVS_APP_EXIT_AND_WAIT([ovsdb-server])
AT_CLEANUP
@@ -1254,15 +1318,14 @@ m4_define([OVSDB_CHECK_EXECUTION],
$2 > schema
PKIDIR=$abs_top_builddir/tests
AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore])
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:127.0.0.1 db], [0], [ignore], [ignore])
PARSE_LISTENING_PORT([ovsdb-server.log], [SSL_PORT])
m4_foreach([txn], [$3],
- [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore],
- [test ! -e pid || kill `cat pid`])
+ [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore])
cat stdout >> output
])
- AT_CHECK([uuidfilt output], [0], [$4], [ignore],
- [test ! -e pid || kill `cat pid`])
+ AT_CHECK([uuidfilt output], [0], [$4], [ignore])
OVSDB_SERVER_SHUTDOWN
AT_CLEANUP])
@@ -1291,16 +1354,15 @@ m4_define([OVSDB_CHECK_EXECUTION],
AT_SKIP_IF([test $HAVE_IPV6 = no])
$2 > schema
PKIDIR=$abs_top_builddir/tests
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:[[::1]] db], [0], [ignore], [ignore])
PARSE_LISTENING_PORT([ovsdb-server.log], [SSL_PORT])
m4_foreach([txn], [$3],
- [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:[[::1]]:$SSL_PORT 'txn'], [0], [stdout], [ignore],
- [test ! -e pid || kill `cat pid`])
+ [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:[[::1]]:$SSL_PORT 'txn'], [0], [stdout], [ignore])
cat stdout >> output
])
- AT_CHECK([uuidfilt output], [0], [$4], [ignore],
- [test ! -e pid || kill `cat pid`])
+ AT_CHECK([uuidfilt output], [0], [$4], [ignore])
OVSDB_SERVER_SHUTDOWN
AT_CLEANUP])
@@ -1327,16 +1389,15 @@ m4_define([OVSDB_CHECK_EXECUTION],
AT_KEYWORDS([ovsdb server positive tcp $5])
$2 > schema
PKIDIR=$abs_top_builddir/tests
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --remote=ptcp:0:127.0.0.1 db], [0], [ignore], [ignore])
PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT])
m4_foreach([txn], [$3],
- [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore],
- [test ! -e pid || kill `cat pid`])
+ [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore])
cat stdout >> output
])
- AT_CHECK([uuidfilt output], [0], [$4], [ignore],
- [test ! -e pid || kill `cat pid`])
+ AT_CHECK([uuidfilt output], [0], [$4], [ignore])
OVSDB_SERVER_SHUTDOWN
AT_CLEANUP])
@@ -1364,16 +1425,15 @@ m4_define([OVSDB_CHECK_EXECUTION],
AT_SKIP_IF([test $HAVE_IPV6 = no])
$2 > schema
PKIDIR=$abs_top_builddir/tests
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --remote=ptcp:0:[[::1]] db], [0], [ignore], [ignore])
PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT])
m4_foreach([txn], [$3],
- [AT_CHECK([ovsdb-client transact tcp:[[::1]]:$TCP_PORT 'txn'], [0], [stdout], [ignore],
- [test ! -e pid || kill `cat pid`])
+ [AT_CHECK([ovsdb-client transact tcp:[[::1]]:$TCP_PORT 'txn'], [0], [stdout], [ignore])
cat stdout >> output
])
- AT_CHECK([uuidfilt output], [0], [$4], [ignore],
- [test ! -e pid || kill `cat pid`])
+ AT_CHECK([uuidfilt output], [0], [$4], [ignore])
OVSDB_SERVER_SHUTDOWN
AT_CLEANUP])
@@ -1453,9 +1513,9 @@ m4_define([OVSDB_CHECK_EXECUTION],
target=4
$2 > schema
schema_name=`ovsdb-tool schema-name schema`
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore])
- on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log dnl
--pidfile --remote=punix:db1.sock db1
], [0], [ignore], [ignore])
@@ -1511,12 +1571,11 @@ m4_define([OVSDB_CHECK_EXECUTION],
AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore])
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore])
i
- on_exit 'test ! -e pid || kill `cat pid`'
- AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore])
- on_exit 'test ! -e pid2 || kill `cat pid2`'
+ AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore])
m4_foreach([txn], [$3],
[AT_CHECK([ovsdb-client transact 'txn'], [0], [stdout], [ignore])
@@ -1557,11 +1616,10 @@ m4_define([OVSDB_CHECK_REPLICATION],
AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore])
+ on_exit 'kill `cat *.pid`'
AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore])
- on_exit 'test ! -e pid || kill `cat pid`'
- AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock --sync-exclude-tables=mydb:b db2], [0], [ignore], [ignore])
- on_exit 'test ! -e pid2 || kill `cat pid2`'
+ AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock --sync-exclude-tables=mydb:b db2], [0], [ignore], [ignore])
m4_foreach([txn], [$3],
[AT_CHECK([ ovsdb-client transact 'txn' ], [0], [stdout], [ignore])
@@ -1629,6 +1687,7 @@ AT_CLEANUP
#ovsdb-server/set-sync-exclude-tables command
AT_SETUP([ovsdb-server/set-sync-exclude-tables])
+on_exit 'kill `cat *.pid`'
AT_KEYWORDS([ovsdb server replication set-exclude-tables])
AT_SKIP_IF([test $DIFF_SUPPORTS_NORMAL_FORMAT = no])
@@ -1637,12 +1696,10 @@ AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore])
-on_exit 'test ! -e pid || kill `cat pid`'
-AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore])
-on_exit 'test ! -e pid2 || kill `cat pid2`'
+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore])
-AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-sync-exclude-tables mydb:b], [0], [ignore], [ignore], [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`])
+AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-sync-exclude-tables mydb:b], [0], [ignore], [ignore])
AT_CHECK([ovsdb-client transact unix:db.sock \
'[["mydb",
@@ -1651,11 +1708,9 @@ AT_CHECK([ovsdb-client transact unix:db.sock \
"row": {"number": 0, "name": "zero"}},
{"op": "insert",
"table": "b",
- "row": {"number": 1, "name": "one"}}]]'], [0], [stdout], [ignore],
- [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`])
+ "row": {"number": 1, "name": "one"}}]]'], [0], [stdout], [ignore])
-AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout], [ignore],
- [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`])
+AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout], [ignore])
cat stdout > dump1
OVS_WAIT_UNTIL([ ovsdb-client dump unix:db2.sock | grep zero ])
AT_CHECK([ovsdb-client dump unix:db2.sock], [0], [stdout], [ignore])
@@ -1679,16 +1734,15 @@ AT_CLEANUP
#ovsdb-server/connect-active-ovsdb-server
AT_SETUP([ovsdb-server/connect-active-server])
+on_exit 'kill `cat *.pid`'
AT_KEYWORDS([ovsdb server replication connect-active-server])
replication_schema > schema
AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore])
-on_exit 'test ! -e pid || kill `cat pid`'
-AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 db2], [0], [ignore], [ignore])
-on_exit 'test ! -e pid2 || kill `cat pid2`'
+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 db2], [0], [ignore], [ignore])
dnl Try to connect without specifying the active server.
AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/connect-active-ovsdb-server], [0],
@@ -1718,6 +1772,7 @@ AT_CLEANUP
#ovsdb-server/disconnect-active-server command
AT_SETUP([ovsdb-server/disconnect-active-server])
+on_exit 'kill `cat *.pid`'
AT_KEYWORDS([ovsdb server replication disconnect-active-server])
AT_SKIP_IF([test $DIFF_SUPPORTS_NORMAL_FORMAT = no])
@@ -1726,10 +1781,8 @@ AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore])
AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore])
-on_exit 'test ! -e pid || kill `cat pid`'
-AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore])
-on_exit 'test ! -e pid2 || kill `cat pid2`'
+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore])
AT_CHECK([ovsdb-client transact unix:db.sock \
'[["mydb",
@@ -1775,7 +1828,7 @@ AT_CHECK([uuidfilt output], [0], [7,9c7,8
---
> _uuid name number
> ----- ---- ------
-], [ignore], [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`])
+], [ignore])
dnl The backup server now become active, and can accept write transactions.
AT_CHECK([ovsdb-client transact unix:db2.sock \
@@ -1826,13 +1879,12 @@ dnl Start both 'db1' and 'db2' in backup mode. Let them backup from each
dnl other. This is not an supported operation state, but to simulate a start
dnl up condition where an HA manger can select which one to be an active
dnl server soon after.
-AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --sync-from=unix:db2.sock --active ], [0], [ignore], [ignore])
-on_exit 'test ! -e pid || kill `cat pid`'
+on_exit 'kill `cat *.pid`'
+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --sync-from=unix:db2.sock --active ], [0], [ignore], [ignore])
AT_CHECK([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/connect-active-ovsdb-server])
-AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore])
-on_exit 'test ! -e pid2 || kill `cat pid2`'
+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore])
dnl
dnl make sure both servers reached the replication state
@@ -1900,8 +1952,8 @@ AT_CHECK([ovsdb-tool transact db \
"row": {"number": 9, "name": "nine"}}]]'], [0], [ignore], [ignore])
dnl Start 'db', then try to be a back up server of itself.
-AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db --sync-from=unix:db.sock --active ], [0], [ignore], [ignore])
-on_exit 'test ! -e pid || kill `cat pid`'
+on_exit 'kill `cat *.pid`'
+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db --sync-from=unix:db.sock --active ], [0], [ignore], [ignore])
dnl Save the current content
AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout])
@@ -1919,6 +1971,7 @@ AT_CHECK([diff dump1 dump2])
AT_CLEANUP
AT_SETUP([ovsdb-server/read-only db:ptcp connection])
+on_exit 'kill `cat *.pid`'
AT_KEYWORDS([ovsdb server read-only])
AT_DATA([schema],
[[{"name": "mydb",
@@ -2007,12 +2060,10 @@ AT_CHECK([ovsdb-tool transact db2 \
"row": {"number": 10, "name": "ten"}}]]'], [0], [ignore], [ignore])
dnl Start both 'db1' and 'db2'.
-AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore])
-on_exit 'test ! -e pid || kill `cat pid`'
-
+on_exit 'kill `cat *.pid`'
+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore])
-AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore])
-on_exit 'test ! -e pid2 || kill `cat pid2`'
+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore])
OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/sync-status |grep active])
OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/sync-status |grep active])
@@ -2112,7 +2163,7 @@ dnl Starting a dummy server only to reserve some tcp port.
AT_CHECK([cp db db.tmp])
AT_CHECK([ovsdb-server -vfile -vvlog:off --log-file=listener.log dnl
--detach --no-chdir dnl
- --pidfile=pid2 --unixctl=unixctl2 dnl
+ --pidfile=2.pid --unixctl=unixctl2 dnl
--remote=ptcp:0:127.0.0.1 dnl
db.tmp], [0], [stdout], [stderr])
PARSE_LISTENING_PORT([listener.log], [BAD_TCP_PORT])
diff --git a/tests/pmd.at b/tests/pmd.at
index 225d4ee3a4..a7cbf9a81b 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -199,7 +199,7 @@ pmd thread numa_id <cleared> core_id <cleared>:
OVS_VSWITCHD_STOP
AT_CLEANUP
-AT_SETUP([PMD - pmd-cpu-mask - NUMA])
+AT_SETUP([PMD - pmd-cpu-mask - dual NUMA])
OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy-pmd options:n_rxq=8 options:numa_id=1 -- set Open_vSwitch . other_config:pmd-cpu-mask=1],
[], [], [--dummy-numa 1,1,0,0])
@@ -359,6 +359,44 @@ pmd thread numa_id 1 core_id 0:
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([PMD - pmd-cpu-mask - multi NUMA])
+OVS_VSWITCHD_START([add-port br0 p0 \
+ -- set Interface p0 type=dummy-pmd options:n_rxq=4 \
+ -- set Interface p0 options:numa_id=0 \
+ -- set Open_vSwitch . other_config:pmd-cpu-mask=0xf \
+ -- set open_vswitch . other_config:pmd-rxq-assign=cycles],
+ [], [], [--dummy-numa 1,2,1,2])
+
+TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1))
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=group])
+
+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using group algorithm"])
+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."])
+
+# check all pmds from both non-local numas are assigned an rxq
+AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4])
+
+TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1))
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=cycles])
+
+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using cycles algorithm"])
+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."])
+
+# check all pmds from both non-local numas are assigned an rxq
+AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4])
+
+TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1))
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=roundrobin])
+
+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using roundrobin algorithm"])
+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."])
+
+# check all pmds from both non-local numas are assigned an rxq
+AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([PMD - stats])
OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 ofport_request=7 type=dummy-pmd options:n_rxq=4],
[], [], [DUMMY_NUMA])
diff --git a/tests/reconnect.at b/tests/reconnect.at
index 0f74709f5a..5bca84351c 100644
--- a/tests/reconnect.at
+++ b/tests/reconnect.at
@@ -39,8 +39,19 @@ run
connected
# Try timeout without noting that we tried to receive.
-# (This does nothing since we never timeout in this case.)
+# Timeout should be scheduled to the next probe interval.
timeout
+run
+
+# Once we reached the timeout, it should not expire until the receive actually
+# attempted. However, we still need to wake up as soon as possible in order to
+# have a chance to mark the receive attempt even if nothing was received.
+timeout
+run
+
+# Short time advance past the original probe interval, but not expired still.
+timeout
+run
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
@@ -67,18 +78,37 @@ connected
last connected 0 ms ago, connected 0 ms total
# Try timeout without noting that we tried to receive.
-# (This does nothing since we never timeout in this case.)
-timeout
- no timeout
-
-# Now disable the receive-attempted feature and timeout again.
-receive-attempted LLONG_MAX
+# Timeout should be scheduled to the next probe interval.
timeout
advance 5000 ms
### t=6000 ###
in ACTIVE for 5000 ms (0 ms backoff)
run
+
+# Once we reached the timeout, it should not expire until the receive actually
+# attempted. However, we still need to wake up as soon as possible in order to
+# have a chance to mark the receive attempt even if nothing was received.
+timeout
+ advance 1 ms
+
+### t=6001 ###
+ in ACTIVE for 5001 ms (0 ms backoff)
+run
+
+# Short time advance past the original probe interval, but not expired still.
+timeout
+ advance 1 ms
+
+### t=6002 ###
+ in ACTIVE for 5002 ms (0 ms backoff)
+run
+
+# Now disable the receive-attempted feature and timeout again.
+receive-attempted LLONG_MAX
+timeout
+ advance 0 ms
+run
should send probe
in IDLE for 0 ms (0 ms backoff)
@@ -86,7 +116,7 @@ run
timeout
advance 5000 ms
-### t=11000 ###
+### t=11002 ###
in IDLE for 5000 ms (0 ms backoff)
run
should disconnect
@@ -94,7 +124,7 @@ disconnected
in BACKOFF for 0 ms (1000 ms backoff)
1 successful connections out of 1 attempts, seqno 2
disconnected
- disconnected at 11000 ms (0 ms ago)
+ disconnected at 11002 ms (0 ms ago)
])
######################################################################
@@ -111,8 +141,19 @@ run
connected
# Try timeout without noting that we tried to receive.
-# (This does nothing since we never timeout in this case.)
+# Timeout should be scheduled to the next probe interval.
+timeout
+run
+
+# Once we reached the timeout, it should not expire until the receive actually
+# attempted. However, we still need to wake up as soon as possible in order to
+# have a chance to mark the receive attempt even if nothing was received.
+timeout
+run
+
+# Short time advance past the original probe interval, but not expired still.
timeout
+run
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
@@ -148,18 +189,37 @@ connected
last connected 0 ms ago, connected 0 ms total
# Try timeout without noting that we tried to receive.
-# (This does nothing since we never timeout in this case.)
-timeout
- no timeout
-
-# Now disable the receive-attempted feature and timeout again.
-receive-attempted LLONG_MAX
+# Timeout should be scheduled to the next probe interval.
timeout
advance 5000 ms
### t=6500 ###
in ACTIVE for 5000 ms (0 ms backoff)
run
+
+# Once we reached the timeout, it should not expire until the receive actually
+# attempted. However, we still need to wake up as soon as possible in order to
+# have a chance to mark the receive attempt even if nothing was received.
+timeout
+ advance 1 ms
+
+### t=6501 ###
+ in ACTIVE for 5001 ms (0 ms backoff)
+run
+
+# Short time advance past the original probe interval, but not expired still.
+timeout
+ advance 1 ms
+
+### t=6502 ###
+ in ACTIVE for 5002 ms (0 ms backoff)
+run
+
+# Now disable the receive-attempted feature and timeout again.
+receive-attempted LLONG_MAX
+timeout
+ advance 0 ms
+run
should send probe
in IDLE for 0 ms (0 ms backoff)
@@ -167,7 +227,7 @@ run
timeout
advance 5000 ms
-### t=11500 ###
+### t=11502 ###
in IDLE for 5000 ms (0 ms backoff)
run
should disconnect
@@ -175,7 +235,7 @@ disconnected
in BACKOFF for 0 ms (1000 ms backoff)
1 successful connections out of 1 attempts, seqno 2
disconnected
- disconnected at 11500 ms (0 ms ago)
+ disconnected at 11502 ms (0 ms ago)
])
######################################################################
@@ -1271,14 +1331,14 @@ activity
created 1000, last activity 3000, last connected 2000
# Connection times out.
-timeout
- no timeout
-receive-attempted LLONG_MAX
timeout
advance 5000 ms
### t=8000 ###
in ACTIVE for 6000 ms (1000 ms backoff)
+receive-attempted LLONG_MAX
+timeout
+ advance 0 ms
run
should send probe
in IDLE for 0 ms (1000 ms backoff)
diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at
index 19a0b125b9..8b9f5c7525 100644
--- a/tests/system-common-macros.at
+++ b/tests/system-common-macros.at
@@ -281,6 +281,14 @@ m4_define([OVS_START_L7],
#
m4_define([OFPROTO_CLEAR_DURATION_IDLE], [[sed -e 's/duration=.*s,/duration=<cleared>,/g' -e 's/idle_age=[0-9]*,/idle_age=<cleared>,/g']])
+# OVS_CHECK_TUNNEL_TSO()
+#
+# Macro to be used in general tunneling tests that could be also
+# used by system-tso. In that case, tunneling is not supported and
+# the test should be skipped.
+m4_define([OVS_CHECK_TUNNEL_TSO],
+ [m4_ifdef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)])])
+
# OVS_CHECK_VXLAN()
#
# Do basic check for vxlan functionality, skip the test if it's not there.
diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index e0e750fde5..512aa87d4c 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -248,6 +248,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout])
AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl
])
+AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl
+DPIF implementation set to dpif_avx512.
+])
+
AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl
Miniflow extract implementation set to autovalidator.
])
@@ -275,6 +279,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout])
AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl
])
+AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl
+DPIF implementation set to dpif_avx512.
+])
+
AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl
Miniflow extract implementation set to autovalidator.
])
diff --git a/tests/system-route.at b/tests/system-route.at
index 1714273e35..270956d13f 100644
--- a/tests/system-route.at
+++ b/tests/system-route.at
@@ -14,10 +14,9 @@ dnl Add ip address.
AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout])
dnl Check that OVS catches route updates.
-OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [0], [dnl
-Cached: 10.0.0.17/24 dev p1-route SRC 10.0.0.17
-Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local
-])
+OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [dnl
+Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17
+Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local])
dnl Delete ip address.
AT_CHECK([ip addr del 10.0.0.17/24 dev p1-route], [0], [stdout])
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index f400cfabc9..4c368eded4 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -218,6 +218,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over vxlan tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_VXLAN()
OVS_TRAFFIC_VSWITCHD_START()
@@ -258,7 +259,55 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([datapath - ping vlan over vxlan tunnel])
+OVS_CHECK_TUNNEL_TSO()
+OVS_CHECK_VXLAN()
+
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-underlay])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"])
+
+ADD_NAMESPACES(at_ns0)
+
+dnl Set up underlay link from host into the namespace using veth pair.
+ADD_VETH(p0, at_ns0, br-underlay, "172.31.2.1/24")
+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
+AT_CHECK([ip link set dev br-underlay up])
+
+dnl Set up tunnel endpoints on OVS outside the namespace and with a native
+dnl linux device inside the namespace.
+ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24])
+ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.2.1.1/24],
+ [id 0 dstport 4789])
+
+AT_CHECK([ovs-vsctl set port br0 tag=100])
+AT_CHECK([ovs-vsctl set port br-underlay tag=42])
+
+ADD_VLAN(at_vxlan1, at_ns0, 100, "10.1.1.1/24")
+ADD_VLAN(p0, at_ns0, 42, "172.31.1.1/24")
+
+dnl First, check the underlay
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+dnl Okay, now check the overlay with different packet sizes
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([datapath - ping over vxlan6 tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_VXLAN_UDP6ZEROCSUM()
OVS_TRAFFIC_VSWITCHD_START()
@@ -302,6 +351,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over gre tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
@@ -343,6 +393,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6gre L2 tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -383,6 +434,7 @@ AT_CLEANUP
AT_SETUP([datapath - ping over erspan v1 tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -419,6 +471,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over erspan v2 tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -455,6 +508,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6erspan v1 tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -494,6 +548,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6erspan v2 tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15)
OVS_CHECK_GRE()
OVS_CHECK_ERSPAN()
@@ -534,6 +589,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over geneve tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE()
OVS_TRAFFIC_VSWITCHD_START()
@@ -575,6 +631,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over geneve tunnel, delete flow regression])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE()
OVS_TRAFFIC_VSWITCHD_START()
@@ -629,6 +686,7 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d
AT_CLEANUP
AT_SETUP([datapath - flow resume with geneve tun_metadata])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE()
OVS_TRAFFIC_VSWITCHD_START()
@@ -680,6 +738,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over geneve6 tunnel])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_GENEVE_UDP6ZEROCSUM()
OVS_TRAFFIC_VSWITCHD_START()
@@ -723,6 +782,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over gre tunnel by simulated packets])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -769,6 +829,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over erspan v1 tunnel by simulated packets])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -817,6 +878,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over erspan v2 tunnel by simulated packets])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -870,6 +932,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6erspan v1 tunnel by simulated packets])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -925,6 +988,7 @@ OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([datapath - ping over ip6erspan v2 tunnel by simulated packets])
+OVS_CHECK_TUNNEL_TSO()
OVS_CHECK_MIN_KERNEL(3, 10)
OVS_TRAFFIC_VSWITCHD_START()
@@ -1981,6 +2045,111 @@ tcp,orig=(src=10.1.1.3,dst=10.1.1.4,sport=<cleared>,dport=<cleared>),reply=(src=
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([conntrack - zones from other field])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0)
+priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_ZONE[]),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=5)
+priority=100,in_port=2,ct_state=+trk,ct_zone=5,tcp,action=1
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+OVS_START_L7([at_ns1], [http])
+
+dnl HTTP requests from p0->p1 should work fine.
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
+tcp,dnl
+orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),dnl
+reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),dnl
+zone=5,protoinfo=(state=<cleared>)
+])
+
+dnl This is to test when the zoneid is set by a field variable like
+dnl NXM_NX_CT_ZONE, the OVS xlate should generate a megaflow with a form of
+dnl "ct_zone(5), ... actions: ct(commit, zone=5)". The match "ct_zone(5)"
+dnl is needed as if we changes the zoneid into 15 in the following, the old
+dnl "ct_zone(5), ... actions: ct(commit, zone=5)" megaflow will not get hit,
+dnl and OVS will generate a new megaflow with the match "ct_zone(0xf)".
+dnl This will make sure that the new packets are committing to zoneid 15
+dnl rather than old 5.
+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl
+ | grep "+trk" | grep -q "ct_zone(0x5)" ], [0], [])
+
+AT_CHECK([ovs-ofctl mod-flows br0 dnl
+ 'priority=100,ct_state=-trk,tcp,in_port="ovs-p0" actions=ct(table=0,zone=15)'])
+
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl
+ | grep "+trk" | grep -q "ct_zone(0xf)" ], [0], [])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - zones from other field, more tests])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0,commit,exec(load:0xffff0005->NXM_NX_CT_LABEL[[0..31]]))
+priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_LABEL[[0..15]]),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=5)
+priority=100,in_port=2,ct_state=+trk,ct_zone=5,tcp,action=1
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+OVS_START_L7([at_ns1], [http])
+
+dnl HTTP requests from p0->p1 should work fine.
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
+tcp,dnl
+orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),dnl
+reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),dnl
+zone=5,labels=0xffff0005,protoinfo=(state=<cleared>)
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl
+ | grep "+trk" | sed 's/0xffff0005\/0xffff/0x5\/0xffff/' dnl
+ | grep -q "ct_label(0x5/0xffff)" ], [0], [])
+
+AT_CHECK([ovs-ofctl mod-flows br0 'priority=100,ct_state=-trk,tcp,in_port="ovs-p0" actions=ct(table=0,zone=15,commit,exec(load:0xffff000f->NXM_NX_CT_LABEL[[0..31]]))'])
+
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl
+ | grep "+trk" | sed 's/0xffff000f\/0xffff/0xf\/0xffff/' dnl
+ | grep -q "ct_label(0xf/0xffff)" ], [0], [])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([conntrack - multiple bridges])
CHECK_CONNTRACK()
OVS_TRAFFIC_VSWITCHD_START(
@@ -3305,6 +3474,46 @@ NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([conntrack - IPv4 Fragmentation + NAT])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+CHECK_CONNTRACK()
+
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 secure -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.2.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.2.1.2/24")
+
+dnl Create a dummy route for NAT
+NS_CHECK_EXEC([at_ns1], [ip addr add 10.1.1.2/32 dev lo])
+NS_CHECK_EXEC([at_ns0], [ip route add 10.1.1.0/24 via 10.2.1.2])
+NS_CHECK_EXEC([at_ns1], [ip route add 10.1.1.0/24 via 10.2.1.1])
+
+dnl Solely for debugging when things go wrong
+NS_EXEC([at_ns0], [tcpdump -l -n -xx -U -i p0 -w p0.pcap >tcpdump.out 2>/dev/null &])
+NS_EXEC([at_ns1], [tcpdump -l -n -xx -U -i p1 -w p1.pcap >tcpdump.out 2>/dev/null &])
+
+AT_DATA([flows.txt], [dnl
+table=0,arp,actions=normal
+table=0,ct_state=-trk,ip,in_port=ovs-p0, actions=ct(table=1, nat)
+table=0,ct_state=-trk,ip,in_port=ovs-p1, actions=ct(table=1, nat)
+table=1,ct_state=+trk+new,ip,in_port=ovs-p0, actions=ct(commit, nat(src=10.1.1.1)),ovs-p1
+table=1,ct_state=+trk+est,ip,in_port=ovs-p0, actions=ovs-p1
+table=1,ct_state=+trk+est,ip,in_port=ovs-p1, actions=ovs-p0
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Check connectivity
+NS_CHECK_EXEC([at_ns0], [ping -c 1 10.1.1.2 -M dont -s 4500 | FORMAT_PING], [0], [dnl
+1 packets transmitted, 1 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([conntrack - resubmit to ct multiple times])
CHECK_CONNTRACK()
@@ -3464,15 +3673,15 @@ action=normal
AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
-AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88deb13891389080803136161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88deb13891389080803136161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl
"16161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161"dnl
"61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl
"1616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161, actions=ct(table=1)"])
-AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88deb6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88deb6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl
"161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161, actions=ct(table=1)"])
-AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033d923861616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033d923861616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl
"1616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161610a, actions=ct(table=1)"])
AT_CHECK([ovs-appctl dpctl/dump-flows | head -2 | tail -1 | grep -q -e ["]udp[(]src=5001["]])
@@ -5817,7 +6026,7 @@ on_exit 'ovs-appctl revalidator/purge'
on_exit 'ovs-appctl dpif/dump-flows br0'
dnl Should work with the virtual IP address through NAT
-for i in 1 2 3 4 5 6 7 8 9 10 11 12; do
+for i in $(seq 1 50); do
echo Request $i
NS_CHECK_EXEC([at_ns1], [wget 10.1.1.64 -t 5 -T 1 --retry-connrefused -v -o wget$i.log])
done
@@ -6106,6 +6315,132 @@ AT_CHECK([ovs-ofctl dump-flows br0 | grep table=2, | OFPROTO_CLEAR_DURATION_IDLE
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([conntrack - can match and clear ct_state from outside OVS])
+CHECK_CONNTRACK_LOCAL_STACK()
+OVS_CHECK_TUNNEL_TSO()
+OVS_CHECK_GENEVE()
+
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-underlay], [set bridge br-underlay other-config:hwaddr=\"f0:00:00:01:01:02\"])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+AT_CHECK([ovs-ofctl add-flow br-underlay "priority=100,ct_state=+trk,actions=ct_clear,resubmit(,0)"])
+AT_CHECK([ovs-ofctl add-flow br-underlay "priority=10,actions=normal"])
+
+ADD_NAMESPACES(at_ns0)
+
+dnl Set up underlay link from host into the namespace using veth pair.
+ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24", "f0:00:00:01:01:01")
+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"])
+AT_CHECK([ip link set dev br-underlay up])
+
+dnl Set up tunnel endpoints on OVS outside the namespace and with a native
+dnl linux device inside the namespace.
+ADD_OVS_TUNNEL([geneve], [br0], [at_gnv0], [172.31.1.1], [10.1.1.100/24])
+ADD_NATIVE_TUNNEL([geneve], [ns_gnv0], [at_ns0], [172.31.1.100], [10.1.1.1/24],
+ [vni 0])
+
+dnl First, check the underlay
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Okay, now check the overlay
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+dnl Confirm that the ct_state and ct_clear action found its way to the dp
+AT_CHECK([ovs-appctl dpctl/dump-flows --names | grep ct_clear | sort | dnl
+ grep 'eth(src=f0:00:00:01:01:02,dst=f0:00:00:01:01:01)' | dnl
+ strip_stats | strip_used | dnl
+ sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'],
+ [0], [dnl
+recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f0:00:00:01:01:01),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:0.0s, actions:ct_clear,ovs-p0
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_BANNER([IGMP])
+
+AT_SETUP([IGMP - flood under normal action])
+
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p1, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
+ADD_VETH(p2, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=NORMAL"])
+
+NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 01 00 5e 01 01 03 dnl
+f0 00 00 01 01 01 08 00 46 c0 00 28 00 00 40 00 01 02 d3 49 45 65 eb 4a e0 dnl
+00 00 16 94 04 00 00 22 00 f9 02 00 00 00 01 04 00 00 00 e0 00 00 fb 00 00 dnl
+00 00 00 00 > /dev/null])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows --names | grep -e .*ipv4 | sort | dnl
+ strip_stats | strip_used | strip_recirc | dnl
+ sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'],
+ [0], [dnl
+recirc_id(<recirc>),in_port(ovs-p1),eth(src=f0:00:00:01:01:01,dst=01:00:5e:01:01:03),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:br0,ovs-p2
+])
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([IGMP - forward with ICMP])
+
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p1, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
+ADD_VETH(p2, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
+
+AT_DATA([flows.txt], [dnl
+table=0, arp actions=NORMAL
+table=0, ip,in_port=1 actions=ct(table=1,zone=64000)
+table=0, in_port=2 actions=output:1
+table=1, ip,ct_state=+trk+inv actions=drop
+table=1 ip,in_port=1,icmp,ct_state=+trk+new actions=output:2
+table=1, in_port=1,ip,ct_state=+trk+new actions=controller(userdata=00.de.ad.be.ef.ca.fe.01)
+table=1, in_port=1,ip,ct_state=+trk+est actions=output:2
+])
+AT_CHECK([ovs-ofctl del-flows br0])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Send the IGMP, followed by a unicast ICMP - ensure we won't black hole
+
+NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 f0 00 00 01 01 02 dnl
+f0 00 00 01 01 01 08 00 46 c0 00 28 00 00 40 00 01 02 d3 49 45 65 eb 4a e0 dnl
+00 00 16 94 04 00 00 22 00 f9 02 00 00 00 01 04 00 00 00 e0 00 00 fb 00 00 dnl
+00 00 00 00 > /dev/null])
+
+NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 f0 00 00 01 01 02 dnl
+f0 00 00 01 01 01 08 00 45 00 00 1c 00 01 00 00 40 01 64 dc 0a 01 01 01 0a dnl
+01 01 02 08 00 f7 ff ff ff ff ff > /dev/null])
+
+sleep 1
+
+dnl Prefer the OpenFlow rules, because different datapaths will behave slightly
+dnl differently with respect to the exact dp rules.
+dnl
+dnl This is also why we clear n_bytes / n_packets - some kernels with ipv6
+dnl enabled will bump some of these counters non-deterministically
+
+AT_CHECK([ovs-ofctl dump-flows br0 | grep -v NXST | dnl
+ strip_duration | grep -v arp | grep -v n_packets=0 | dnl
+ grep -v 'in_port=2 actions=output:1' | dnl
+ sed 's/n_bytes=[[0-9]]*/n_bytes=0/
+ s/idle_age=[[0-9]]*/idle_age=0/
+ s/n_packets=[[1-9]]/n_packets=0/' | sort], [0], [dnl
+ cookie=0x0, table=0, n_packets=0, n_bytes=0, idle_age=0, ip,in_port=1 actions=ct(table=1,zone=64000)
+ cookie=0x0, table=1, n_packets=0, n_bytes=0, idle_age=0, ct_state=+new+trk,icmp,in_port=1 actions=output:2
+ cookie=0x0, table=1, n_packets=0, n_bytes=0, idle_age=0, ct_state=+new+trk,ip,in_port=1 actions=controller(userdata=00.de.ad.be.ef.ca.fe.01)
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_BANNER([802.1ad])
AT_SETUP([802.1ad - vlan_limit])
diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at
index 406334f3e0..1a80047619 100644
--- a/tests/system-tso-macros.at
+++ b/tests/system-tso-macros.at
@@ -29,3 +29,5 @@ m4_define([CONFIGURE_VETH_OFFLOADS],
[AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])]
[AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])]
)
+
+m4_define([CHECK_SYSTEM_TSO], [])
diff --git a/tests/test-cmap.c b/tests/test-cmap.c
index 0705475606..f8cc4dd80a 100644
--- a/tests/test-cmap.c
+++ b/tests/test-cmap.c
@@ -74,6 +74,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n,
cmap_values[i++] = e->value;
}
assert(i == n);
+ assert(e == NULL);
/* Here we test iteration with cmap_next_position() */
i = 0;
@@ -107,6 +108,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n,
count += e->value == values[i];
}
assert(count == 1);
+ assert(e == NULL);
}
/* Check that all the values are there in batched lookup. */
@@ -130,6 +132,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n,
CMAP_NODE_FOR_EACH (e, node, nodes[k]) {
count += e->value == values[i + k];
}
+ assert(e == NULL);
}
assert(count == j); /* j elements in a batch. */
}
diff --git a/tests/test-hindex.c b/tests/test-hindex.c
index af06be5fcc..95e49284ee 100644
--- a/tests/test-hindex.c
+++ b/tests/test-hindex.c
@@ -265,6 +265,11 @@ test_hindex_for_each_safe(hash_func *hash)
i = 0;
n_remaining = n;
HINDEX_FOR_EACH_SAFE (e, next, node, &hindex) {
+ if (hindex_next(&hindex, &e->node) == NULL) {
+ assert(next == NULL);
+ } else {
+ assert(&next->node == hindex_next(&hindex, &e->node));
+ }
assert(i < n);
if (pattern & (1ul << e->value)) {
size_t j;
@@ -281,6 +286,7 @@ test_hindex_for_each_safe(hash_func *hash)
i++;
}
assert(i == n);
+ assert(next == NULL);
for (i = 0; i < n; i++) {
if (pattern & (1ul << i)) {
diff --git a/tests/test-hmap.c b/tests/test-hmap.c
index 9259b0b3fc..47b4755386 100644
--- a/tests/test-hmap.c
+++ b/tests/test-hmap.c
@@ -62,6 +62,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n,
hmap_values[i++] = e->value;
}
assert(i == n);
+ assert(e == NULL);
memcpy(sort_values, values, sizeof *sort_values * n);
qsort(sort_values, n, sizeof *sort_values, compare_ints);
@@ -82,6 +83,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n,
count += e->value == values[i];
}
assert(count == 1);
+ assert(e == NULL);
}
/* Check counters. */
@@ -243,6 +245,11 @@ test_hmap_for_each_safe(hash_func *hash)
i = 0;
n_remaining = n;
HMAP_FOR_EACH_SAFE (e, next, node, &hmap) {
+ if (hmap_next(&hmap, &e->node) == NULL) {
+ assert(next == NULL);
+ } else {
+ assert(&next->node == hmap_next(&hmap, &e->node));
+ }
assert(i < n);
if (pattern & (1ul << e->value)) {
size_t j;
@@ -259,6 +266,8 @@ test_hmap_for_each_safe(hash_func *hash)
i++;
}
assert(i == n);
+ assert(next == NULL);
+ assert(e == NULL);
for (i = 0; i < n; i++) {
if (pattern & (1ul << i)) {
@@ -308,6 +317,7 @@ test_hmap_for_each_pop(hash_func *hash)
i++;
}
assert(i == n);
+ assert(e == NULL);
hmap_destroy(&hmap);
}
diff --git a/tests/test-json.c b/tests/test-json.c
index a7ee595e0b..072a537252 100644
--- a/tests/test-json.c
+++ b/tests/test-json.c
@@ -22,6 +22,8 @@
#include <getopt.h>
#include <stdio.h>
#include "ovstest.h"
+#include "random.h"
+#include "timeval.h"
#include "util.h"
/* --pretty: If set, the JSON output is pretty-printed, instead of printed as
@@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[])
}
OVSTEST_REGISTER("test-json", test_json_main);
+
+static void
+json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
+{
+ struct {
+ int n;
+ int quote_probablility;
+ int special_probability;
+ int iter;
+ } configs[] = {
+ { 100000, 0, 0, 1000, },
+ { 100000, 2, 1, 1000, },
+ { 100000, 10, 1, 1000, },
+ { 10000000, 0, 0, 100, },
+ { 10000000, 2, 1, 100, },
+ { 10000000, 10, 1, 100, },
+ { 100000000, 0, 0, 10. },
+ { 100000000, 2, 1, 10, },
+ { 100000000, 10, 1, 10, },
+ };
+
+ printf(" SIZE Q S TIME\n");
+ printf("--------------------------------------\n");
+
+ for (int i = 0; i < ARRAY_SIZE(configs); i++) {
+ int iter = configs[i].iter;
+ int n = configs[i].n;
+ char *str = xzalloc(n);
+
+ for (int j = 0; j < n - 1; j++) {
+ int r = random_range(100);
+
+ if (r < configs[i].special_probability) {
+ str[j] = random_range(' ' - 1) + 1;
+ } else if (r < (configs[i].special_probability
+ + configs[i].quote_probablility)) {
+ str[j] = '"';
+ } else {
+ str[j] = random_range(256 - ' ') + ' ';
+ }
+ }
+
+ printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility,
+ configs[i].special_probability);
+ fflush(stdout);
+
+ struct json *json = json_string_create_nocopy(str);
+ uint64_t start = time_msec();
+
+ char **res = xzalloc(iter * sizeof *res);
+ for (int j = 0; j < iter; j++) {
+ res[j] = json_to_string(json, 0);
+ }
+
+ printf("%16.3lf ms\n", (double) (time_msec() - start) / iter);
+ json_destroy(json);
+ for (int j = 0; j < iter; j++) {
+ free(res[j]);
+ }
+ free(res);
+ }
+
+ exit(0);
+}
+
+OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main);
diff --git a/tests/test-list.c b/tests/test-list.c
index 6f1fb059bc..648e02a5e2 100644
--- a/tests/test-list.c
+++ b/tests/test-list.c
@@ -61,7 +61,7 @@ check_list(struct ovs_list *list, const int values[], size_t n)
assert(e->value == values[i]);
i++;
}
- assert(&e->node == list);
+ assert(e == NULL);
assert(i == n);
i = 0;
@@ -70,7 +70,7 @@ check_list(struct ovs_list *list, const int values[], size_t n)
assert(e->value == values[n - i - 1]);
i++;
}
- assert(&e->node == list);
+ assert(e == NULL);
assert(i == n);
assert(ovs_list_is_empty(list) == !n);
@@ -135,6 +135,13 @@ test_list_for_each_safe(void)
values_idx = 0;
n_remaining = n;
LIST_FOR_EACH_SAFE (e, next, node, &list) {
+ /* "next" is valid as long as it's not pointing to &list. */
+ if (&e->node == list.prev) {
+ assert(next == NULL);
+ } else {
+ assert(&next->node == e->node.next);
+ }
+
assert(i < n);
if (pattern & (1ul << i)) {
ovs_list_remove(&e->node);
@@ -148,7 +155,8 @@ test_list_for_each_safe(void)
i++;
}
assert(i == n);
- assert(&e->node == &list);
+ assert(e == NULL);
+ assert(next == NULL);
for (i = 0; i < n; i++) {
if (pattern & (1ul << i)) {
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index daa55dab7b..57572cd3ed 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -512,6 +512,18 @@ do_diff_data(struct ovs_cmdl_context *ctx)
ovs_fatal(0, "failed to apply diff");
}
+ /* Apply diff to 'old' in place. */
+ error = ovsdb_datum_apply_diff_in_place(&old, &diff, &type);
+ if (error) {
+ char *string = ovsdb_error_to_string_free(error);
+ ovs_fatal(0, "%s", string);
+ }
+
+ /* Test to make sure 'old' equals 'new' now. */
+ if (!ovsdb_datum_equals(&new, &old, &type)) {
+ ovs_fatal(0, "failed to apply diff in place");
+ }
+
/* Print diff */
json = ovsdb_datum_to_json(&diff, &type);
printf ("diff: ");
@@ -522,6 +534,11 @@ do_diff_data(struct ovs_cmdl_context *ctx)
printf ("apply diff: ");
print_and_free_json(json);
+ /* Print updated 'old' */
+ json = ovsdb_datum_to_json(&old, &type);
+ printf ("apply diff in place: ");
+ print_and_free_json(json);
+
ovsdb_datum_destroy(&new, &type);
ovsdb_datum_destroy(&old, &type);
ovsdb_datum_destroy(&diff, &type);
@@ -1862,7 +1879,8 @@ print_and_log(const char *format, ...)
}
static char *
-format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents)
+format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents,
+ bool terse)
{
const char *change_str =
!ovsdb_idl_track_is_set(row->table)
@@ -1873,9 +1891,13 @@ format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents)
? "deleted row: "
: "";
- return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT,
- step, row->table->class_->name, change_str, contents,
- UUID_ARGS(&row->uuid));
+ if (terse) {
+ return xasprintf("%03d: table %s", step, row->table->class_->name);
+ } else {
+ return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT,
+ step, row->table->class_->name, change_str,
+ contents, UUID_ARGS(&row->uuid));
+ }
}
static void
@@ -1998,7 +2020,7 @@ print_idl_row_updated_singleton(const struct idltest_singleton *sng, int step)
}
static void
-print_idl_row_simple(const struct idltest_simple *s, int step)
+print_idl_row_simple(const struct idltest_simple *s, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "i=%"PRId64" r=%g b=%s s=%s u="UUID_FMT" ia=[",
@@ -2025,7 +2047,7 @@ print_idl_row_simple(const struct idltest_simple *s, int step)
}
ds_put_cstr(&msg, "]");
- char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2034,7 +2056,7 @@ print_idl_row_simple(const struct idltest_simple *s, int step)
}
static void
-print_idl_row_link1(const struct idltest_link1 *l1, int step)
+print_idl_row_link1(const struct idltest_link1 *l1, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "i=%"PRId64" k=", l1->i);
@@ -2053,7 +2075,7 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step)
ds_put_format(&msg, "%"PRId64, l1->l2->i);
}
- char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2062,7 +2084,7 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step)
}
static void
-print_idl_row_link2(const struct idltest_link2 *l2, int step)
+print_idl_row_link2(const struct idltest_link2 *l2, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "i=%"PRId64" l1=", l2->i);
@@ -2070,7 +2092,7 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step)
ds_put_format(&msg, "%"PRId64, l2->l1->i);
}
- char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2079,7 +2101,7 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step)
}
static void
-print_idl_row_simple3(const struct idltest_simple3 *s3, int step)
+print_idl_row_simple3(const struct idltest_simple3 *s3, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
size_t i;
@@ -2098,7 +2120,7 @@ print_idl_row_simple3(const struct idltest_simple3 *s3, int step)
}
ds_put_cstr(&msg, "]");
- char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2107,12 +2129,12 @@ print_idl_row_simple3(const struct idltest_simple3 *s3, int step)
}
static void
-print_idl_row_simple4(const struct idltest_simple4 *s4, int step)
+print_idl_row_simple4(const struct idltest_simple4 *s4, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "name=%s", s4->name);
- char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2121,7 +2143,7 @@ print_idl_row_simple4(const struct idltest_simple4 *s4, int step)
}
static void
-print_idl_row_simple6(const struct idltest_simple6 *s6, int step)
+print_idl_row_simple6(const struct idltest_simple6 *s6, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "name=%s ", s6->name);
@@ -2132,7 +2154,7 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step)
}
ds_put_cstr(&msg, "]");
- char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2141,12 +2163,13 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step)
}
static void
-print_idl_row_singleton(const struct idltest_singleton *sng, int step)
+print_idl_row_singleton(const struct idltest_singleton *sng, int step,
+ bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "name=%s", sng->name);
- char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2155,7 +2178,7 @@ print_idl_row_singleton(const struct idltest_singleton *sng, int step)
}
static void
-print_idl(struct ovsdb_idl *idl, int step)
+print_idl(struct ovsdb_idl *idl, int step, bool terse)
{
const struct idltest_simple3 *s3;
const struct idltest_simple4 *s4;
@@ -2167,31 +2190,31 @@ print_idl(struct ovsdb_idl *idl, int step)
int n = 0;
IDLTEST_SIMPLE_FOR_EACH (s, idl) {
- print_idl_row_simple(s, step);
+ print_idl_row_simple(s, step, terse);
n++;
}
IDLTEST_LINK1_FOR_EACH (l1, idl) {
- print_idl_row_link1(l1, step);
+ print_idl_row_link1(l1, step, terse);
n++;
}
IDLTEST_LINK2_FOR_EACH (l2, idl) {
- print_idl_row_link2(l2, step);
+ print_idl_row_link2(l2, step, terse);
n++;
}
IDLTEST_SIMPLE3_FOR_EACH (s3, idl) {
- print_idl_row_simple3(s3, step);
+ print_idl_row_simple3(s3, step, terse);
n++;
}
IDLTEST_SIMPLE4_FOR_EACH (s4, idl) {
- print_idl_row_simple4(s4, step);
+ print_idl_row_simple4(s4, step, terse);
n++;
}
IDLTEST_SIMPLE6_FOR_EACH (s6, idl) {
- print_idl_row_simple6(s6, step);
+ print_idl_row_simple6(s6, step, terse);
n++;
}
IDLTEST_SINGLETON_FOR_EACH (sng, idl) {
- print_idl_row_singleton(sng, step);
+ print_idl_row_singleton(sng, step, terse);
n++;
}
if (!n) {
@@ -2200,7 +2223,7 @@ print_idl(struct ovsdb_idl *idl, int step)
}
static void
-print_idl_track(struct ovsdb_idl *idl, int step)
+print_idl_track(struct ovsdb_idl *idl, int step, bool terse)
{
const struct idltest_simple3 *s3;
const struct idltest_simple4 *s4;
@@ -2211,27 +2234,27 @@ print_idl_track(struct ovsdb_idl *idl, int step)
int n = 0;
IDLTEST_SIMPLE_FOR_EACH_TRACKED (s, idl) {
- print_idl_row_simple(s, step);
+ print_idl_row_simple(s, step, terse);
n++;
}
IDLTEST_LINK1_FOR_EACH_TRACKED (l1, idl) {
- print_idl_row_link1(l1, step);
+ print_idl_row_link1(l1, step, terse);
n++;
}
IDLTEST_LINK2_FOR_EACH_TRACKED (l2, idl) {
- print_idl_row_link2(l2, step);
+ print_idl_row_link2(l2, step, terse);
n++;
}
IDLTEST_SIMPLE3_FOR_EACH_TRACKED (s3, idl) {
- print_idl_row_simple3(s3, step);
+ print_idl_row_simple3(s3, step, terse);
n++;
}
IDLTEST_SIMPLE4_FOR_EACH_TRACKED (s4, idl) {
- print_idl_row_simple4(s4, step);
+ print_idl_row_simple4(s4, step, terse);
n++;
}
IDLTEST_SIMPLE6_FOR_EACH_TRACKED (s6, idl) {
- print_idl_row_simple6(s6, step);
+ print_idl_row_simple6(s6, step, terse);
n++;
}
@@ -2634,6 +2657,13 @@ do_idl(struct ovs_cmdl_context *ctx)
char *arg = ctx->argv[i];
struct jsonrpc_msg *request, *reply;
+ bool terse = false;
+ if (*arg == '?') {
+ /* We're only interested in terse table contents. */
+ terse = true;
+ arg++;
+ }
+
if (*arg == '+') {
/* The previous transaction didn't change anything. */
arg++;
@@ -2654,10 +2684,10 @@ do_idl(struct ovs_cmdl_context *ctx)
/* Print update. */
if (track) {
- print_idl_track(idl, step++);
+ print_idl_track(idl, step++, terse);
ovsdb_idl_track_clear(idl);
} else {
- print_idl(idl, step++);
+ print_idl(idl, step++, terse);
}
}
seqno = ovsdb_idl_get_seqno(idl);
@@ -2710,7 +2740,7 @@ do_idl(struct ovs_cmdl_context *ctx)
ovsdb_idl_wait(idl);
poll_block();
}
- print_idl(idl, step++);
+ print_idl(idl, step++, false);
ovsdb_idl_track_clear(idl);
ovsdb_idl_destroy(idl);
print_and_log("%03d: done", step);
@@ -2727,13 +2757,15 @@ print_idl_row_simple2(const struct idltest_simple2 *s, int step)
printf("%03d: name=%s smap=[",
step, s->name);
for (i = 0; i < smap->n; i++) {
- printf("[%s : %s]%s", smap->keys[i].string, smap->values[i].string,
- i < smap->n-1? ",": "");
+ printf("[%s : %s]%s",
+ smap->keys[i].s->string, smap->values[i].s->string,
+ i < smap->n - 1 ? "," : "");
}
printf("] imap=[");
for (i = 0; i < imap->n; i++) {
- printf("[%"PRId64" : %s]%s", imap->keys[i].integer, imap->values[i].string,
- i < imap->n-1? ",":"");
+ printf("[%"PRId64" : %s]%s",
+ imap->keys[i].integer, imap->values[i].s->string,
+ i < imap->n - 1 ? "," : "");
}
printf("]\n");
}
@@ -2802,8 +2834,8 @@ do_idl_partial_update_map_column(struct ovs_cmdl_context *ctx)
myTxn = ovsdb_idl_txn_create(idl);
smap = idltest_simple2_get_smap(myRow, OVSDB_TYPE_STRING,
OVSDB_TYPE_STRING);
- strcpy(key_to_delete, smap->keys[0].string);
- idltest_simple2_update_smap_delkey(myRow, smap->keys[0].string);
+ ovs_strlcpy(key_to_delete, smap->keys[0].s->string, sizeof key_to_delete);
+ idltest_simple2_update_smap_delkey(myRow, smap->keys[0].s->string);
ovsdb_idl_txn_commit_block(myTxn);
ovsdb_idl_txn_destroy(myTxn);
ovsdb_idl_get_initial_snapshot(idl);
@@ -2829,7 +2861,7 @@ dump_simple3(struct ovsdb_idl *idl,
int step)
{
IDLTEST_SIMPLE3_FOR_EACH(myRow, idl) {
- print_idl_row_simple3(myRow, step);
+ print_idl_row_simple3(myRow, step, false);
}
}
@@ -2971,7 +3003,7 @@ do_idl_compound_index_with_ref(struct ovs_cmdl_context *ctx)
idltest_simple3_index_set_uref(equal, &myRow2, 1);
printf("%03d: Query using index with reference\n", step++);
IDLTEST_SIMPLE3_FOR_EACH_EQUAL (myRow, equal, index) {
- print_idl_row_simple3(myRow, step++);
+ print_idl_row_simple3(myRow, step++, false);
}
idltest_simple3_index_destroy_row(equal);
diff --git a/tests/test-ovsdb.py b/tests/test-ovsdb.py
index 5bc0bf6814..853264f22b 100644
--- a/tests/test-ovsdb.py
+++ b/tests/test-ovsdb.py
@@ -232,75 +232,87 @@ def get_singleton_table_printable_row(row):
return "name=%s" % row.name
-def print_row(table, row, step, contents):
- s = "%03d: table %s: %s " % (step, table, contents)
- s += get_simple_printable_row_string(row, ["uuid"])
+def print_row(table, row, step, contents, terse):
+ if terse:
+ s = "%03d: table %s" % (step, table)
+ else:
+ s = "%03d: table %s: %s " % (step, table, contents)
+ s += get_simple_printable_row_string(row, ["uuid"])
print(s)
-def print_idl(idl, step):
+def print_idl(idl, step, terse=False):
n = 0
if "simple" in idl.tables:
simple = idl.tables["simple"].rows
for row in simple.values():
print_row("simple", row, step,
- get_simple_table_printable_row(row))
+ get_simple_table_printable_row(row),
+ terse)
n += 1
if "simple2" in idl.tables:
simple2 = idl.tables["simple2"].rows
for row in simple2.values():
print_row("simple2", row, step,
- get_simple2_table_printable_row(row))
+ get_simple2_table_printable_row(row),
+ terse)
n += 1
if "simple3" in idl.tables:
simple3 = idl.tables["simple3"].rows
for row in simple3.values():
print_row("simple3", row, step,
- get_simple3_table_printable_row(row))
+ get_simple3_table_printable_row(row),
+ terse)
n += 1
if "simple4" in idl.tables:
simple4 = idl.tables["simple4"].rows
for row in simple4.values():
print_row("simple4", row, step,
- get_simple4_table_printable_row(row))
+ get_simple4_table_printable_row(row),
+ terse)
n += 1
if "simple5" in idl.tables:
simple5 = idl.tables["simple5"].rows
for row in simple5.values():
print_row("simple5", row, step,
- get_simple5_table_printable_row(row))
+ get_simple5_table_printable_row(row),
+ terse)
n += 1
if "simple6" in idl.tables:
simple6 = idl.tables["simple6"].rows
for row in simple6.values():
print_row("simple6", row, step,
- get_simple6_table_printable_row(row))
+ get_simple6_table_printable_row(row),
+ terse)
n += 1
if "link1" in idl.tables:
l1 = idl.tables["link1"].rows
for row in l1.values():
print_row("link1", row, step,
- get_link1_table_printable_row(row))
+ get_link1_table_printable_row(row),
+ terse)
n += 1
if "link2" in idl.tables:
l2 = idl.tables["link2"].rows
for row in l2.values():
print_row("link2", row, step,
- get_link2_table_printable_row(row))
+ get_link2_table_printable_row(row),
+ terse)
n += 1
if "singleton" in idl.tables:
sng = idl.tables["singleton"].rows
for row in sng.values():
print_row("singleton", row, step,
- get_singleton_table_printable_row(row))
+ get_singleton_table_printable_row(row),
+ terse)
n += 1
if not n:
@@ -701,6 +713,12 @@ def do_idl(schema_file, remote, *commands):
step += 1
for command in commands:
+ terse = False
+ if command.startswith("?"):
+ # We're only interested in terse table contents.
+ terse = True
+ command = command[1:]
+
if command.startswith("+"):
# The previous transaction didn't change anything.
command = command[1:]
@@ -714,7 +732,7 @@ def do_idl(schema_file, remote, *commands):
rpc.wait(poller)
poller.block()
- print_idl(idl, step)
+ print_idl(idl, step, terse)
step += 1
seqno = idl.change_seqno
diff --git a/tests/test-rcu.c b/tests/test-rcu.c
index 965f3c49f3..bb17092bf0 100644
--- a/tests/test-rcu.c
+++ b/tests/test-rcu.c
@@ -35,7 +35,7 @@ quiescer_main(void *aux OVS_UNUSED)
}
static void
-test_rcu_quiesce(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
+test_rcu_quiesce(void)
{
pthread_t quiescer;
@@ -48,4 +48,29 @@ test_rcu_quiesce(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
xpthread_join(quiescer, NULL);
}
-OVSTEST_REGISTER("test-rcu-quiesce", test_rcu_quiesce);
+static void
+add_count(void *_count)
+{
+ unsigned *count = (unsigned *)_count;
+ (*count) ++;
+}
+
+static void
+test_rcu_barrier(void)
+{
+ unsigned count = 0;
+ for (int i = 0; i < 10; i ++) {
+ ovsrcu_postpone(add_count, &count);
+ }
+
+ ovsrcu_barrier();
+ ovs_assert(count == 10);
+}
+
+static void
+test_rcu(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) {
+ test_rcu_quiesce();
+ test_rcu_barrier();
+}
+
+OVSTEST_REGISTER("test-rcu", test_rcu);
diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
index 59723e63b8..c7665a1aeb 100644
--- a/tests/tunnel-push-pop-ipv6.at
+++ b/tests/tunnel-push-pop-ipv6.at
@@ -432,6 +432,42 @@ AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl
tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535))
])
+dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc1244cafe86dd60000000003a11402001cafe0000000000000000000000922001cafe000000000000000000000088c85312b5003abc700c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172'])
+
+ovs-appctl time/warp 1000
+ovs-appctl time/warp 1000
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=2])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:ca:fe,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1)
+])
+
+AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl
+2001:cafe::92 f8:bc:12:44:ca:fe br0
+2001:cafe::93 f8:bc:12:44:34:b7 br0
+])
+
+dnl Restore and check the cache entries
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc124434b686dd60000000003a11402001cafe0000000000000000000000922001cafe000000000000000000000088c85312b5003abc700c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172'])
+
+ovs-appctl time/warp 1000
+ovs-appctl time/warp 1000
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=2])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1)
+])
+
+AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl
+2001:cafe::92 f8:bc:12:44:34:b6 br0
+2001:cafe::93 f8:bc:12:44:34:b7 br0
+])
+
ovs-appctl time/warp 10000
AT_CHECK([ovs-vsctl del-port int-br t3 \
diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
index 48c5de9d19..a441de3ef2 100644
--- a/tests/tunnel-push-pop.at
+++ b/tests/tunnel-push-pop.at
@@ -499,6 +499,28 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port [[37]]' | sort], [0], [dnl
port 7: rx pkts=5, bytes=434, drop=?, errs=?, frame=?, over=?, crc=?
])
+dnl Send out packets received from L3GRE tunnel back to L3GRE tunnel
+AT_CHECK([ovs-ofctl del-flows int-br])
+AT_CHECK([ovs-ofctl add-flow int-br "in_port=7,actions=set_field:3->in_port,7"])
+AT_CHECK([ovs-vsctl -- set Interface br0 options:pcap=br0.pcap])
+
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
+
+ovs-appctl time/warp 1000
+
+AT_CHECK([ovs-pcap p0.pcap > p0.pcap.txt 2>&1])
+AT_CHECK([tail -6 p0.pcap.txt], [0], [dnl
+aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637
+])
+
+
dnl Check decapsulation of Geneve packet with options
AT_CAPTURE_FILE([ofctl_monitor.log])
AT_CHECK([ovs-ofctl monitor int-br 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log])
@@ -518,8 +540,43 @@ icmp,vlan_tci=0x0000,dl_src=be:b6:f4:e1:49:4a,dl_dst=fe:71:d8:83:72:4f,nw_src=30
AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], [0], [dnl
port 5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=?
])
-AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl
-tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=2,rule_cookie=0,controller_id=0,max_len=65535))
+AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)' | sed -e 's/recirc_id=[[0-9]]*/recirc_id=<cleared>/g'], [0], [dnl
+tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=<cleared>,rule_cookie=0,controller_id=0,max_len=65535))
+])
+
+dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc1244cafe08004500004e00010000401173e90101025c01010258c85312b5003a8cd40c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172'])
+
+ovs-appctl time/warp 1000
+ovs-appctl time/warp 1000
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=2])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:ca:fe,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1)
+])
+
+AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl
+1.1.2.92 f8:bc:12:44:ca:fe br0
+1.1.2.93 f8:bc:12:44:34:b7 br0
+])
+
+dnl Restore and check the cache entries
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc124434b608004500004e00010000401173e90101025c01010258c85312b5003a8cd40c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172'])
+
+ovs-appctl time/warp 1000
+ovs-appctl time/warp 1000
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1)
+])
+
+AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl
+1.1.2.92 f8:bc:12:44:34:b6 br0
+1.1.2.93 f8:bc:12:44:34:b7 br0
])
ovs-appctl time/warp 10000
@@ -595,6 +652,64 @@ OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep 50540000000a5054000000091235 | wc
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([tunnel_push_pop - packet_out debug_slow])
+
+OVS_VSWITCHD_START(
+ [add-port br0 p0 dnl
+ -- set Interface p0 type=dummy ofport_request=1 dnl
+ other-config:hwaddr=aa:55:aa:55:00:00])
+AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg])
+AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy])
+AT_CHECK([ovs-vsctl add-port int-br t2 dnl
+ -- set Interface t2 type=geneve options:remote_ip=1.1.2.92 dnl
+ options:key=123 ofport_request=2])
+
+dnl First setup dummy interface IP address, then add the route
+dnl so that tnl-port table can get valid IP address for the device.
+AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK
+])
+AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK
+])
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+
+dnl This ARP reply from p0 has two effects:
+dnl 1. The ARP cache will learn that 1.1.2.92 is at f8:bc:12:44:34:b6.
+dnl 2. The br0 mac learning will learn that f8:bc:12:44:34:b6 is on p0.
+AT_CHECK([
+ ovs-appctl netdev-dummy/receive p0 dnl
+ 'recirc_id(0),in_port(2),dnl
+ eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl
+ arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)'
+])
+
+AT_CHECK([ovs-vsctl -- set Interface p0 options:tx_pcap=p0.pcap])
+
+packet=50540000000a505400000009123
+dnl Source port is based on a packet hash, so it may differ depending on the
+dnl compiler flags and CPU type. Masked with '....'.
+encap=f8bc124434b6aa55aa5500000800450000320000400040113406010102580101025c....17c1001e00000000655800007b00
+
+dnl Output to tunnel from a int-br internal port.
+dnl Checking that the packet arrived and it was correctly encapsulated.
+AT_CHECK([ovs-ofctl add-flow int-br "in_port=LOCAL,actions=debug_slow,output:2"])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"])
+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 1])
+dnl Sending again to exercise the non-miss upcall path.
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"])
+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 2])
+
+dnl Output to tunnel from the controller.
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out int-br CONTROLLER "debug_slow,output:2" "${packet}5"])
+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}5" | wc -l` -ge 1])
+
+dnl Datapath actions should not have tunnel push action.
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q tnl_push], [1])
+dnl There should be slow_path action instead.
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q 'slow_path(action)'], [0])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([tunnel_push_pop - underlay bridge match])
OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00])
@@ -645,3 +760,54 @@ NXST_FLOW reply:
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([tunnel_push_pop - VXLAN access port])
+
+dnl Create bridge that has a MAC address.
+OVS_VSWITCHD_START([set bridge br0 datapath_type=dummy dnl
+ -- set Interface br0 other-config:hwaddr=aa:55:aa:55:00:00])
+AT_CHECK([ovs-vsctl add-port br0 p8 dnl
+ -- set Interface p8 type=dummy ofport_request=8])
+
+dnl Create another bridge.
+AT_CHECK([ovs-vsctl add-br ovs-tun0 -- set bridge ovs-tun0 datapath_type=dummy])
+
+dnl Add VXLAN port to this bridge.
+AT_CHECK([ovs-vsctl add-port ovs-tun0 tun0 dnl
+ -- set int tun0 type=vxlan options:remote_ip=10.0.0.11 dnl
+ -- add-port ovs-tun0 p7 dnl
+ -- set interface p7 type=dummy ofport_request=7])
+
+dnl Set VLAN tags, so that br0 and its port p8 have the same tag,
+dnl but ovs-tun0's port p7 has a different tag.
+AT_CHECK([ovs-vsctl set port p8 tag=42 dnl
+ -- set port br0 tag=42 dnl
+ -- set port p7 tag=200])
+
+dnl Set IP address and route for br0.
+AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 10.0.0.2/24], [0], [OK
+])
+AT_CHECK([ovs-appctl ovs/route/add 10.0.0.11/24 br0], [0], [OK
+])
+
+dnl Send an ARP reply to port b8 on br0, so that packets will be forwarded
+dnl to learned port.
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+
+AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),dnl
+ eth(src=aa:55:aa:66:00:00,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl
+ arp(sip=10.0.0.11,tip=10.0.0.2,op=2,sha=aa:55:aa:66:00:00,tha=00:00:00:00:00:00)'])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-tun0 in_port=p7], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0], [dnl
+Megaflow: recirc_id=0,eth,in_port=7,dl_src=00:00:00:00:00:00,dnl
+dl_dst=00:00:00:00:00:00,dl_type=0x0000
+Datapath actions: push_vlan(vid=200,pcp=0),1,clone(tnl_push(tnl_port(4789),dnl
+header(size=50,type=4,eth(dst=aa:55:aa:66:00:00,src=aa:55:aa:55:00:00,dnl
+dl_type=0x0800),ipv4(src=10.0.0.2,dst=10.0.0.11,proto=17,tos=0,ttl=64,dnl
+frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x0)),dnl
+out_port(100)),8)
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/tunnel.at b/tests/tunnel.at
index b8ae7caa9b..fd482aa872 100644
--- a/tests/tunnel.at
+++ b/tests/tunnel.at
@@ -126,7 +126,7 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"])
AT_CHECK([ovs-appctl dpctl/dump-flows | tail -1], [0], [dnl
-tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2
+tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2
])
OVS_VSWITCHD_STOP
diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in
index 71800795c0..e6e07f4763 100644
--- a/utilities/ovs-ctl.in
+++ b/utilities/ovs-ctl.in
@@ -421,7 +421,9 @@ Less important options for "start", "restart" and "force-reload-kmod":
--no-force-corefiles do not force on core dumps for OVS daemons
--no-mlockall do not lock all of ovs-vswitchd into memory
--ovsdb-server-priority=NICE set ovsdb-server's niceness (default: $OVSDB_SERVER_PRIORITY)
+ --ovsdb-server-options=OPTIONS additional options for ovsdb-server (example: '-vconsole:dbg -vfile:dbg')
--ovs-vswitchd-priority=NICE set ovs-vswitchd's niceness (default: $OVS_VSWITCHD_PRIORITY)
+ --ovs-vswitchd-options=OPTIONS additional options for ovs-vswitchd (example: '-vconsole:dbg -vfile:dbg')
--no-full-hostname set short hostname instead of full hostname
--no-record-hostname do not attempt to determine/record system
hostname as part of start command
diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in
index 3eda01d3c1..13477a6a9e 100644
--- a/utilities/ovs-lib.in
+++ b/utilities/ovs-lib.in
@@ -519,13 +519,13 @@ join_cluster() {
LOCAL_ADDR="$3"
REMOTE_ADDR="$4"
- if test ! -e "$DB_FILE"; then
- ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" "$REMOTE_ADDR"
- elif ovsdb_tool db-is-standalone "$DB_FILE"; then
- # Backup standalone database and join cluster.
+ if test -e "$DB_FILE" && ovsdb_tool db-is-standalone "$DB_FILE"; then
backup_db || return 1
+ rm $DB_FILE
+ fi
+ if test ! -e "$DB_FILE"; then
action "Joining $DB_FILE to cluster" \
- ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR"
+ ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" "$REMOTE_ADDR"
fi
}
diff --git a/utilities/ovs-save b/utilities/ovs-save
index 27ce3a9aad..a190902f4d 100755
--- a/utilities/ovs-save
+++ b/utilities/ovs-save
@@ -102,7 +102,7 @@ save_interfaces () {
get_highest_ofp_version() {
ovs-vsctl get bridge "$1" protocols | \
sed 's/[][]//g' | sed 's/\ //g' | \
- awk -F ',' '{ print (NF>1)? $(NF) : "OpenFlow14" }'
+ awk -F ',' '{ print (NF>0)? $(NF) : "OpenFlow14" }'
}
save_flows () {
@@ -150,7 +150,10 @@ save_flows () {
ovs-ofctl -O $ofp_version dump-flows --no-names --no-stats "$bridge" | \
sed -e '/NXST_FLOW/d' \
-e '/OFPST_FLOW/d' \
- -e 's/\(idle\|hard\)_age=[^,]*,//g' > \
+ -e 's/\(idle\|hard\)_age=[^,]*,//g' \
+ -e 's/igmp_type/tp_src/g' \
+ -e 's/igmp_code/tp_dst/g' \
+ -e 's/igmp/ip,nw_proto=2/g' > \
"$workdir/$bridge.flows.dump"
done
echo "rm -rf \"$workdir\""
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index cb7c5cb769..c790a56adf 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -4229,7 +4229,7 @@ bridge_configure_aa(struct bridge *br)
union ovsdb_atom atom;
atom.integer = m->isid;
- if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER) == UINT_MAX) {
+ if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER, NULL)) {
VLOG_INFO("Deleting isid=%"PRIu32", vlan=%"PRIu16,
m->isid, m->vlan);
bridge_aa_mapping_destroy(m);
@@ -4826,7 +4826,7 @@ queue_ids_include(const struct ovsdb_datum *queues, int64_t target)
union ovsdb_atom atom;
atom.integer = target;
- return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER) != UINT_MAX;
+ return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER, NULL);
}
static void
@@ -5020,7 +5020,7 @@ bridge_configure_mirrors(struct bridge *br)
union ovsdb_atom atom;
atom.uuid = m->uuid;
- if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID) == UINT_MAX) {
+ if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID, NULL)) {
mirror_destroy(m);
}
}