From 6c6af1cca976b65ca01e7cbb74a3f3387d8b21d4 Mon Sep 17 00:00:00 2001 From: Alfredo Moralejo Date: Jun 17 2022 11:48:07 +0000 Subject: Import openvswitch2.17-2.17.0-15 from FDP --- diff --git a/.openvswitch.metadata b/.openvswitch.metadata index 307e046..9189888 100644 --- a/.openvswitch.metadata +++ b/.openvswitch.metadata @@ -1,5 +1,6 @@ 002450621b33c5690060345b0aac25bc2426d675 SOURCES/docutils-0.12.tar.gz -15b9809476e3235bb8d1644d82a85d8beb325539 SOURCES/openvswitch-2.16.0.tar.gz +722b63cd114c21041abda7b38d7f14e46338e3e0 SOURCES/openvswitch-2.17.0.tar.gz d34f96421a86004aa5d26ecf975edefd09f948b1 SOURCES/Pygments-1.4.tar.gz 3a11f130c63b057532ca37fe49c8967d0cbae1d5 SOURCES/Sphinx-1.2.3.tar.gz -1a6cfbd2cb017ab6915076705d58a37af8fff708 SOURCES/dpdk-20.11.1.tar.xz +17331a86759beba4b6635ed530ce23b0b73c0744 SOURCES/dpdk-21.11.tar.xz +8509a716f9f936526f64fb23f313c5a9baf2f123 SOURCES/pyelftools-0.27.tar.gz diff --git a/SOURCES/openvswitch-2.16.0.patch b/SOURCES/openvswitch-2.16.0.patch deleted file mode 100644 index 471ef8b..0000000 --- a/SOURCES/openvswitch-2.16.0.patch +++ /dev/null @@ -1,12112 +0,0 @@ -diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh -index 863f023888..c06e88c577 100755 ---- a/.ci/linux-build.sh -+++ b/.ci/linux-build.sh -@@ -216,7 +216,7 @@ fi - - if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then - if [ -z "$DPDK_VER" ]; then -- DPDK_VER="20.11.1" -+ DPDK_VER="20.11.4" - fi - install_dpdk $DPDK_VER - if [ "$CC" = "clang" ]; then -@@ -246,8 +246,8 @@ if [ "$ASAN" ]; then - export ASAN_OPTIONS='detect_leaks=1' - # -O2 generates few false-positive memory leak reports in test-ovsdb - # application, so lowering optimizations to -O1 here. -- CLFAGS_ASAN="-O1 -fno-omit-frame-pointer -fno-common -fsanitize=address" -- CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} ${CLFAGS_ASAN}" -+ CFLAGS_ASAN="-O1 -fno-omit-frame-pointer -fno-common -fsanitize=address" -+ CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} ${CFLAGS_ASAN}" - fi - - save_OPTS="${OPTS} $*" -diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh -index c55125cf78..c0b7473eda 100755 ---- a/.ci/linux-prepare.sh -+++ b/.ci/linux-prepare.sh -@@ -20,9 +20,13 @@ cd sparse - make -j4 HAVE_LLVM= HAVE_SQLITE= install - cd .. - -+# Installing wheel separately because it may be needed to build some -+# of the packages during dependency backtracking and pip >= 22.0 will -+# abort backtracking on build failures: -+# https://github.com/pypa/pip/issues/10655 -+pip3 install --disable-pip-version-check --user wheel - pip3 install --disable-pip-version-check --user \ -- flake8 hacking sphinx pyOpenSSL wheel setuptools --pip3 install --user --upgrade docutils -+ flake8 'hacking>=3.0' sphinx setuptools - pip3 install --user 'meson==0.47.1' - - if [ "$M32" ]; then -diff --git a/.cirrus.yml b/.cirrus.yml -index 358f2ba256..a4d2a5bbcd 100644 ---- a/.cirrus.yml -+++ b/.cirrus.yml -@@ -2,14 +2,14 @@ freebsd_build_task: - - freebsd_instance: - matrix: -- image_family: freebsd-12-2-snap -- image_family: freebsd-11-4-snap -+ image_family: freebsd-12-3-snap -+ image_family: freebsd-13-0-snap - cpu: 4 -- memory: 8G -+ memory: 4G - - env: - DEPENDENCIES: automake libtool gmake gcc wget openssl python3 -- PY_DEPS: sphinx|openssl -+ PY_DEPS: sphinx - matrix: - COMPILER: gcc - COMPILER: clang -diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml -index e2350c6d9d..7434ad18ec 100644 ---- a/.github/workflows/build-and-test.yml -+++ b/.github/workflows/build-and-test.yml -@@ -127,7 +127,7 @@ jobs: - - name: set up python - uses: actions/setup-python@v2 - with: -- python-version: '3.x' -+ python-version: '3.9' - - - name: create ci signature file for the dpdk cache key - if: matrix.dpdk != '' || matrix.dpdk_shared != '' -@@ -215,7 +215,7 @@ jobs: - - name: set up python - uses: actions/setup-python@v2 - with: -- python-version: '3.x' -+ python-version: '3.9' - - name: install dependencies - run: brew install automake libtool - - name: prepare -diff --git a/.travis.yml b/.travis.yml -index 51d0511080..c7aeede06e 100644 ---- a/.travis.yml -+++ b/.travis.yml -@@ -17,7 +17,6 @@ addons: - - libjemalloc-dev - - libnuma-dev - - libpcap-dev -- - python3-openssl - - python3-pip - - python3-sphinx - - libelf-dev -diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst -index 68c9867b19..d62d575eba 100644 ---- a/Documentation/faq/releases.rst -+++ b/Documentation/faq/releases.rst -@@ -205,10 +205,10 @@ Q: What DPDK version does each Open vSwitch release work with? - 2.10.x 17.11.10 - 2.11.x 18.11.9 - 2.12.x 18.11.9 -- 2.13.x 19.11.8 -- 2.14.x 19.11.8 -- 2.15.x 20.11.1 -- 2.16.x 20.11.1 -+ 2.13.x 19.11.10 -+ 2.14.x 19.11.10 -+ 2.15.x 20.11.4 -+ 2.16.x 20.11.4 - ============ ======== - - Q: Are all the DPDK releases that OVS versions work with maintained? -diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst -index d8fa931fab..9ce5285c58 100644 ---- a/Documentation/intro/install/dpdk.rst -+++ b/Documentation/intro/install/dpdk.rst -@@ -42,7 +42,7 @@ Build requirements - In addition to the requirements described in :doc:`general`, building Open - vSwitch with DPDK will require the following: - --- DPDK 20.11.1 -+- DPDK 20.11.4 - - - A `DPDK supported NIC`_ - -@@ -73,9 +73,9 @@ Install DPDK - #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``:: - - $ cd /usr/src/ -- $ wget https://fast.dpdk.org/rel/dpdk-20.11.1.tar.xz -- $ tar xf dpdk-20.11.1.tar.xz -- $ export DPDK_DIR=/usr/src/dpdk-stable-20.11.1 -+ $ wget https://fast.dpdk.org/rel/dpdk-20.11.4.tar.xz -+ $ tar xf dpdk-20.11.4.tar.xz -+ $ export DPDK_DIR=/usr/src/dpdk-stable-20.11.4 - $ cd $DPDK_DIR - - #. Configure and install DPDK using Meson -@@ -219,7 +219,7 @@ To verify hugepage configuration:: - - Mount the hugepages, if not already mounted by default:: - -- $ mount -t hugetlbfs none /dev/hugepages`` -+ $ mount -t hugetlbfs none /dev/hugepages - - .. note:: - -diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst -index c4300cd53e..a297aadac8 100644 ---- a/Documentation/intro/install/general.rst -+++ b/Documentation/intro/install/general.rst -@@ -169,7 +169,7 @@ other than plain text, only if you have the following: - If you are going to extensively modify Open vSwitch, consider installing the - following to obtain better warnings: - --- "sparse" version 0.5.1 or later -+- "sparse" version 0.6.2 or later - (https://git.kernel.org/pub/scm/devel/sparse/sparse.git/). - - - GNU make. -diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst -index 95fa7af128..c1a35eb13a 100644 ---- a/Documentation/topics/dpdk/pmd.rst -+++ b/Documentation/topics/dpdk/pmd.rst -@@ -31,17 +31,19 @@ input ports for packets, classifying packets once received, and executing - actions on the packets once they are classified. - - PMD threads utilize Receive (Rx) and Transmit (Tx) queues, commonly known as --*rxq*\s and *txq*\s. While Tx queue configuration happens automatically, Rx --queues can be configured by the user. This can happen in one of two ways: -+*rxq*\s and *txq*\s to receive and send packets from/to an interface. - --- For physical interfaces, configuration is done using the -- :program:`ovs-appctl` utility. -+- For physical interfaces, the number of Tx Queues is automatically configured -+ based on the number of PMD thread cores. The number of Rx queues can be -+ configured with:: - --- For virtual interfaces, configuration is done using the :program:`ovs-appctl` -- utility, but this configuration must be reflected in the guest configuration -- (e.g. QEMU command line arguments). -+ $ ovs-vsctl set Interface options:n_rxq=N - --The :program:`ovs-appctl` utility also provides a number of commands for -+- For virtual interfaces, the number of Tx and Rx queues are configured by -+ libvirt/QEMU and enabled/disabled in the guest. Refer to :doc:'vhost-user' -+ for more information. -+ -+The :program:`ovs-appctl` utility provides a number of commands for - querying PMD threads and their respective queues. This, and all of the above, - is discussed here. - -diff --git a/NEWS b/NEWS -index 559a51ba3f..c3c5c16ae6 100644 ---- a/NEWS -+++ b/NEWS -@@ -1,3 +1,39 @@ -+v2.16.5 - xx xxx xxxx -+--------------------- -+ -+v2.16.4 - 15 Jun 2022 -+--------------------- -+ - Bug fixes -+ -+v2.16.3 - 08 Apr 2022 -+--------------------- -+ - Bug fixes -+ - libopenvswitch API change: -+ * To fix the Undefined Behavior issue causing the compiler to incorrectly -+ optimize important parts of code, container iteration macros (e.g., -+ LIST_FOR_EACH) have been re-implemented in a UB-safe way. -+ * Backwards compatibility has mostly been preserved, however the -+ user-provided pointer is now set to NULL after the loop (unless it -+ exited via "break;") -+ * Users of libopenvswitch will need to double-check the use of such loop -+ macros before compiling with a new version. -+ * Since the change is limited to the definitions within the headers, the -+ ABI is not affected. -+ - DPDK: -+ * OVS validated with DPDK 20.11.4. It is recommended to use this version -+ until further releases. -+ - Python: -+ * For SSL support, the use of the pyOpenSSL library has been replaced -+ with the native 'ssl' module. -+ -+v2.16.2 - 17 Dec 2021 -+--------------------- -+ - Bug fixes -+ -+v2.16.1 - 21 Oct 2021 -+--------------------- -+ - Bug fixes -+ - v2.16.0 - 16 Aug 2021 - --------------------- - - Removed support for 1024-bit Diffie-Hellman key exchange, which is now -diff --git a/acinclude.m4 b/acinclude.m4 -index dba365ea1a..1b957c3dcd 100644 ---- a/acinclude.m4 -+++ b/acinclude.m4 -@@ -77,7 +77,7 @@ dnl Checks if compiler and binutils supports AVX512. - AC_DEFUN([OVS_CHECK_AVX512], [ - OVS_CHECK_BINUTILS_AVX512 - OVS_CHECK_CC_OPTION( -- [-mavx512f], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no]) -+ [-mavx512f -mavx512vpopcntdq], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no]) - AM_CONDITIONAL([HAVE_AVX512F], [test $ovs_have_cc_mavx512f = yes]) - if test "$ovs_have_cc_mavx512f" = yes; then - AC_DEFINE([HAVE_AVX512F], [1], -@@ -305,6 +305,13 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [ - ])], - [AC_DEFINE([HAVE_TCA_SKBEDIT_FLAGS], [1], - [Define to 1 if TCA_SKBEDIT_FLAGS is available.])]) -+ -+ AC_COMPILE_IFELSE([ -+ AC_LANG_PROGRAM([#include ], [ -+ int x = TCA_STATS_PKT64; -+ ])], -+ [AC_DEFINE([HAVE_TCA_STATS_PKT64], [1], -+ [Define to 1 if TCA_STATS_PKT64 is available.])]) - ]) - - dnl OVS_CHECK_LINUX_SCTP_CT -@@ -1417,7 +1424,7 @@ AC_DEFUN([OVS_ENABLE_SPARSE], - : ${SPARSE=sparse} - AC_SUBST([SPARSE]) - AC_CONFIG_COMMANDS_PRE( -- [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')']) -+ [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse -I $(top_srcdir)/include $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')']) - - AC_ARG_ENABLE( - [sparse], -diff --git a/configure.ac b/configure.ac -index 16b32be965..406df116ee 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -13,7 +13,7 @@ - # limitations under the License. - - AC_PREREQ(2.63) --AC_INIT(openvswitch, 2.16.0, bugs@openvswitch.org) -+AC_INIT(openvswitch, 2.16.5, bugs@openvswitch.org) - AC_CONFIG_SRCDIR([datapath/datapath.c]) - AC_CONFIG_MACRO_DIR([m4]) - AC_CONFIG_AUX_DIR([build-aux]) -diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c -index e130c2f966..218e7db814 100644 ---- a/datapath-windows/ovsext/Actions.c -+++ b/datapath-windows/ovsext/Actions.c -@@ -1112,9 +1112,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx, - * should split the function and refactor. */ - if (!bufferData) { - EthHdr *ethHdr = (EthHdr *)bufferStart; -- /* If the frame is not VLAN make it a no op */ - if (ethHdr->Type != ETH_TYPE_802_1PQ_NBO) { -- return NDIS_STATUS_SUCCESS; -+ OVS_LOG_ERROR("Invalid ethHdr type %u, nbl %p", ethHdr->Type, ovsFwdCtx->curNbl); -+ return NDIS_STATUS_INVALID_PACKET; - } - } - RtlMoveMemory(bufferStart + shiftLength, bufferStart, shiftOffset); -@@ -1137,6 +1137,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx, - static __inline NDIS_STATUS - OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) - { -+ NDIS_STATUS status; -+ OVS_PACKET_HDR_INFO* layers = &ovsFwdCtx->layers; -+ - /* - * Declare a dummy vlanTag structure since we need to compute the size - * of shiftLength. The NDIS one is a unionized structure. -@@ -1145,7 +1148,15 @@ OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) - UINT32 shiftLength = sizeof(vlanTag.TagHeader); - UINT32 shiftOffset = sizeof(DL_EUI48) + sizeof(DL_EUI48); - -- return OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, NULL); -+ status = OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, -+ NULL); -+ -+ if (status == NDIS_STATUS_SUCCESS) { -+ layers->l3Offset -= (UINT16) shiftLength; -+ layers->l4Offset -= (UINT16) shiftLength; -+ } -+ -+ return status; - } - - -@@ -1516,6 +1527,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, - - csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl, - TcpIpChecksumNetBufferListInfo); -+ - /* - * Adjust the IP header inline as dictated by the action, and also update - * the IP and the TCP checksum for the data modified. -@@ -1524,6 +1536,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, - * ChecksumUpdate32(). Ignoring this for now, since for the most common - * case, we only update the TTL. - */ -+ /*Only tx direction the checksum value will be reset to be PseudoChecksum*/ - - if (isSource) { - addrField = &ipHdr->saddr; -@@ -1540,7 +1553,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, - ((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded || - (BOOLEAN)csumInfo.Receive.UdpChecksumFailed); - } -- if (l4Offload) { -+ if (isTx && l4Offload) { - *checkField = IPPseudoChecksum(&newAddr, &ipHdr->daddr, - tcpHdr ? IPPROTO_TCP : IPPROTO_UDP, - ntohs(ipHdr->tot_len) - ipHdr->ihl * 4); -@@ -1561,7 +1574,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, - (BOOLEAN)csumInfo.Receive.UdpChecksumFailed); - } - -- if (l4Offload) { -+ if (isTx && l4Offload) { - *checkField = IPPseudoChecksum(&ipHdr->saddr, &newAddr, - tcpHdr ? IPPROTO_TCP : IPPROTO_UDP, - ntohs(ipHdr->tot_len) - ipHdr->ihl * 4); -@@ -1570,7 +1583,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, - - if (*addrField != newAddr) { - UINT32 oldAddr = *addrField; -- if (checkField && *checkField != 0 && !l4Offload) { -+ if ((checkField && *checkField != 0) && (!l4Offload || !isTx)) { - /* Recompute total checksum. */ - *checkField = ChecksumUpdate32(*checkField, oldAddr, - newAddr); -@@ -1579,11 +1592,12 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, - ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr, - newAddr); - } -+ - *addrField = newAddr; - } - - if (portField && *portField != newPort) { -- if (checkField && !l4Offload) { -+ if ((checkField) && (!l4Offload || !isTx)) { - /* Recompute total checksum. */ - *checkField = ChecksumUpdate16(*checkField, *portField, - newPort); -@@ -1698,6 +1712,15 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, - ipHdr->ttl = ipAttr->ipv4_ttl; - key->ipKey.nwTtl = ipAttr->ipv4_ttl; - } -+ if (ipHdr->dscp != (ipAttr->ipv4_tos & 0xfc)) { -+ /* ECN + DSCP */ -+ UINT8 newTos = (ipHdr->tos & 0x3) | (ipAttr->ipv4_tos & 0xfc); -+ if (ipHdr->check != 0) { -+ ipHdr->check = ChecksumUpdate16(ipHdr->check, ipHdr->tos, newTos); -+ } -+ ipHdr->tos = newTos; -+ key->ipKey.nwTos = newTos; -+ } - - return NDIS_STATUS_SUCCESS; - } -@@ -1792,9 +1815,11 @@ OvsExecuteRecirc(OvsForwardingContext *ovsFwdCtx, - } - - if (newNbl) { -- deferredAction = OvsAddDeferredActions(newNbl, key, NULL); -+ deferredAction = OvsAddDeferredActions(newNbl, key, &(ovsFwdCtx->layers), -+ NULL); - } else { -- deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key, NULL); -+ deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key, -+ &(ovsFwdCtx->layers), NULL); - } - - if (deferredAction) { -@@ -1964,7 +1989,7 @@ OvsExecuteSampleAction(OvsForwardingContext *ovsFwdCtx, - return STATUS_SUCCESS; - } - -- if (!OvsAddDeferredActions(newNbl, key, a)) { -+ if (!OvsAddDeferredActions(newNbl, key, &(ovsFwdCtx->layers), a)) { - OVS_LOG_INFO( - "Deferred actions limit reached, dropping sample action."); - OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE); -@@ -2100,6 +2125,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext, - */ - status = OvsPopVlanInPktBuf(&ovsFwdCtx); - if (status != NDIS_STATUS_SUCCESS) { -+ OVS_LOG_ERROR("OVS-pop vlan action failed status = %lu", status); - dropReason = L"OVS-pop vlan action failed"; - goto dropit; - } -@@ -2349,7 +2375,7 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, - - if (status == STATUS_SUCCESS) { - status = OvsProcessDeferredActions(switchContext, completionList, -- portNo, sendFlags, layers); -+ portNo, sendFlags); - } - - return status; -diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c -index 2610d626a0..7f1d2fb412 100644 ---- a/datapath-windows/ovsext/Conntrack.c -+++ b/datapath-windows/ovsext/Conntrack.c -@@ -493,15 +493,32 @@ static __inline NDIS_STATUS - OvsDetectCtPacket(OvsForwardingContext *fwdCtx, - OvsFlowKey *key) - { -+ NDIS_STATUS status = NDIS_STATUS_SUCCESS; -+ OvsFlowKey newFlowKey = { 0 }; -+ - switch (ntohs(key->l2.dlType)) { - case ETH_TYPE_IPV4: - if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) { -- return OvsProcessIpv4Fragment(fwdCtx->switchContext, -+ status = OvsProcessIpv4Fragment(fwdCtx->switchContext, - &fwdCtx->curNbl, - fwdCtx->completionList, - fwdCtx->fwdDetail->SourcePortId, - &fwdCtx->layers, - key->tunKey.tunnelId); -+ if (status == NDIS_STATUS_SUCCESS) { -+ /* After the Ipv4 Fragment is reassembled, update flow key as -+ L3 and L4 headers are not correct */ -+ status = -+ OvsExtractFlow(fwdCtx->curNbl, fwdCtx->srcVportNo, -+ &newFlowKey, &fwdCtx->layers, -+ fwdCtx->tunKey.dst != 0 ? &fwdCtx->tunKey : NULL); -+ if (status != NDIS_STATUS_SUCCESS) { -+ OVS_LOG_ERROR("Extract flow failed Nbl %p", fwdCtx->curNbl); -+ return status; -+ } -+ *key = newFlowKey; -+ } -+ return status; - } - if (key->ipKey.nwProto == IPPROTO_TCP - || key->ipKey.nwProto == IPPROTO_UDP -@@ -609,6 +626,31 @@ OvsReverseIcmpType(UINT8 type) - } - } - -+static __inline void -+OvsPickupCtTupleAsLookupKey(POVS_CT_KEY ctKey, UINT16 zone, OvsFlowKey *flowKey) -+{ -+ UINT32 ipAddrSrc = 0, ipAddrDst = 0; -+ -+ if (!flowKey || !ctKey) return; -+ -+ if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) { -+ ipAddrSrc = flowKey->ct.tuple_ipv4.ipv4_src; -+ ipAddrDst = flowKey->ct.tuple_ipv4.ipv4_dst; -+ -+ if ((ipAddrSrc > 0 && ipAddrDst > 0) && -+ (zone == flowKey->ct.zone)) { -+ /* if the ct tuple_ipv4 in flowKey is not null and ct.zone is same with -+ * zone parameter pickup the tuple_ipv4 value as the lookup key -+ */ -+ ctKey->src.addr.ipv4 = flowKey->ct.tuple_ipv4.ipv4_src; -+ ctKey->dst.addr.ipv4 = flowKey->ct.tuple_ipv4.ipv4_dst; -+ ctKey->nw_proto = flowKey->ct.tuple_ipv4.ipv4_proto; -+ ctKey->src.port = flowKey->ct.tuple_ipv4.src_port; -+ ctKey->dst.port = flowKey->ct.tuple_ipv4.dst_port; -+ } -+ } -+} -+ - static __inline NDIS_STATUS - OvsCtSetupLookupCtx(OvsFlowKey *flowKey, - UINT16 zone, -@@ -629,6 +671,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, - - ctx->key.src.port = flowKey->ipKey.l4.tpSrc; - ctx->key.dst.port = flowKey->ipKey.l4.tpDst; -+ - if (flowKey->ipKey.nwProto == IPPROTO_ICMP) { - ICMPHdr icmpStorage; - const ICMPHdr *icmp; -@@ -683,6 +726,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, - /* Translate address first for reverse NAT */ - ctx->key = natEntry->ctEntry->key; - OvsCtKeyReverse(&ctx->key); -+ } else { -+ if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) { -+ OvsPickupCtTupleAsLookupKey(&(ctx->key), zone, flowKey); -+ } - } - - ctx->hash = OvsCtHashKey(&ctx->key); -diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c -index cc0840704a..2a206305ec 100644 ---- a/datapath-windows/ovsext/PacketIO.c -+++ b/datapath-windows/ovsext/PacketIO.c -@@ -45,7 +45,9 @@ extern NDIS_STRING ovsExtFriendlyNameUC; - - static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList); - static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, -- PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags); -+ PNET_BUFFER_LIST netBufferLists, -+ ULONG sendCompleteFlags, -+ BOOLEAN isSendComplete); - - VOID - OvsInitCompletionList(OvsCompletionList *completionList, -@@ -155,7 +157,7 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, - OvsReportNBLIngressError(switchContext, netBufferLists, &filterReason, - NDIS_STATUS_PAUSED); - OvsCompleteNBLIngress(switchContext, netBufferLists, -- sendCompleteFlags); -+ sendCompleteFlags, FALSE); - return; - } - -@@ -175,6 +177,79 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, - NDIS_DEFAULT_PORT_NUMBER, sendFlags); - } - -+static __inline BOOLEAN -+OvsCheckNBLSingleSource(PNET_BUFFER_LIST netBufferLists) -+{ -+ UINT32 sourcePortId = 0; -+ BOOLEAN singleSource = TRUE; -+ PNET_BUFFER_LIST curNbl = netBufferLists; -+ PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; -+ -+ while (curNbl != NULL) { -+ info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); -+ if (info == NULL) { -+ /* We are not able to determine the source port ID */ -+ singleSource = FALSE; -+ OVS_LOG_INFO("nbl %p has no source port", curNbl); -+ break; -+ } -+ if (curNbl == netBufferLists) { -+ sourcePortId = info->SourcePortId; -+ } else if (info->SourcePortId != sourcePortId) { -+ singleSource = FALSE; -+ OVS_LOG_INFO("Source port in nbl %p is %u, not from %u", -+ curNbl, info->SourcePortId, sourcePortId); -+ break; -+ } -+ curNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl); -+ } -+ -+ return singleSource; -+} -+ -+/* -+ * SendNetBufferListsCompleteHandler releases the NetBufferLists with flag -+ * NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE if all the NBLs have same -+ * source port, for cloned NBLs, source port might be changed, although the -+ * cloned NBLs have same source port, there parent NBLs may have different -+ * source ports, so we should have a check before passing the flag to -+ * NdisFSendNetBufferListsComplete. -+ */ -+static __inline VOID -+OvsCompleteUpperLayerNBL(NDIS_HANDLE ndisHandle, -+ PNET_BUFFER_LIST netBufferLists, -+ ULONG sendCompleteFlags, -+ BOOLEAN isSendComplete) -+{ -+ BOOLEAN singleSource = TRUE; -+ PNET_BUFFER_LIST curNbl, nextNbl; -+ -+ /* To check whether the NBLs are from the same source port */ -+ if (isSendComplete && -+ (sendCompleteFlags & NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE)) { -+ singleSource = OvsCheckNBLSingleSource(netBufferLists); -+ } -+ -+ if (singleSource) { -+ NdisFSendNetBufferListsComplete(ndisHandle, -+ netBufferLists, -+ sendCompleteFlags); -+ } else { -+ /* -+ * Not from a single source port, releasing the NBls without flag -+ * NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE doesn't help, so -+ * let's release them one by one. -+ */ -+ for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { -+ nextNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl); -+ NET_BUFFER_LIST_NEXT_NBL(curNbl) = NULL; -+ NdisFSendNetBufferListsComplete(ndisHandle, -+ curNbl, -+ sendCompleteFlags); -+ } -+ } -+} -+ - static __inline VOID - OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST nblList, -@@ -184,8 +259,8 @@ OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, - { - ASSERT(error); - OvsReportNBLIngressError(switchContext, nblList, filterReason, error); -- NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, nblList, -- sendCompleteFlags); -+ OvsCompleteUpperLayerNBL(switchContext->NdisFilterHandle, nblList, -+ sendCompleteFlags, FALSE); - } - - static VOID -@@ -427,7 +502,8 @@ OvsExtSendNBL(NDIS_HANDLE filterModuleContext, - static VOID - OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST netBufferLists, -- ULONG sendCompleteFlags) -+ ULONG sendCompleteFlags, -+ BOOLEAN isSendComplete) - { - PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; - OvsCompletionList newList; -@@ -449,8 +525,10 @@ OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, - - /* Complete the NBL's that were sent by the upper layer. */ - if (newList.dropNbl != NULL) { -- NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl, -- sendCompleteFlags); -+ OvsCompleteUpperLayerNBL(switchContext->NdisFilterHandle, -+ newList.dropNbl, -+ sendCompleteFlags, -+ isSendComplete); - } - } - -@@ -466,7 +544,7 @@ OvsExtSendNBLComplete(NDIS_HANDLE filterModuleContext, - ULONG sendCompleteFlags) - { - OvsCompleteNBLIngress((POVS_SWITCH_CONTEXT)filterModuleContext, -- netBufferLists, sendCompleteFlags); -+ netBufferLists, sendCompleteFlags, TRUE); - } - - -@@ -476,7 +554,8 @@ OvsFinalizeCompletionList(OvsCompletionList *completionList) - if (completionList->dropNbl != NULL) { - OvsCompleteNBLIngress(completionList->switchContext, - completionList->dropNbl, -- completionList->sendCompleteFlags); -+ completionList->sendCompleteFlags, -+ FALSE); - - completionList->dropNbl = NULL; - completionList->dropNblNext = &completionList->dropNbl; -diff --git a/datapath-windows/ovsext/Recirc.c b/datapath-windows/ovsext/Recirc.c -index 2febf060dd..7a688c8742 100644 ---- a/datapath-windows/ovsext/Recirc.c -+++ b/datapath-windows/ovsext/Recirc.c -@@ -277,16 +277,23 @@ OvsDeferredActionsQueuePush(POVS_DEFERRED_ACTION_QUEUE queue) - POVS_DEFERRED_ACTION - OvsAddDeferredActions(PNET_BUFFER_LIST nbl, - OvsFlowKey *key, -+ POVS_PACKET_HDR_INFO layers, - const PNL_ATTR actions) - { - POVS_DEFERRED_ACTION_QUEUE queue = OvsDeferredActionsQueueGet(); - POVS_DEFERRED_ACTION deferredAction = NULL; -+ OVS_PACKET_HDR_INFO layersInit = { 0 }; - - deferredAction = OvsDeferredActionsQueuePush(queue); - if (deferredAction) { - deferredAction->nbl = nbl; - deferredAction->actions = actions; - deferredAction->key = *key; -+ if (layers) { -+ deferredAction->layers = *layers; -+ } else { -+ deferredAction->layers = layersInit; -+ } - } - - return deferredAction; -@@ -303,15 +310,17 @@ NDIS_STATUS - OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, - OvsCompletionList *completionList, - UINT32 portNo, -- ULONG sendFlags, -- OVS_PACKET_HDR_INFO *layers) -+ ULONG sendFlags) - { - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - POVS_DEFERRED_ACTION_QUEUE queue = OvsDeferredActionsQueueGet(); - POVS_DEFERRED_ACTION deferredAction = NULL; -+ POVS_PACKET_HDR_INFO layersDeferred = NULL; - - /* Process all deferred actions. */ - while ((deferredAction = OvsDeferredActionsQueuePop(queue)) != NULL) { -+ layersDeferred = &(deferredAction->layers); -+ - if (deferredAction->actions) { - status = OvsDoExecuteActions(switchContext, - completionList, -@@ -319,7 +328,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, - portNo, - sendFlags, - &deferredAction->key, NULL, -- layers, deferredAction->actions, -+ layersDeferred, deferredAction->actions, - NlAttrGetSize(deferredAction->actions)); - } else { - status = OvsDoRecirc(switchContext, -@@ -327,7 +336,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, - deferredAction->nbl, - &deferredAction->key, - portNo, -- layers); -+ layersDeferred); - } - } - -diff --git a/datapath-windows/ovsext/Recirc.h b/datapath-windows/ovsext/Recirc.h -index 2b314ce274..b2d02a65c2 100644 ---- a/datapath-windows/ovsext/Recirc.h -+++ b/datapath-windows/ovsext/Recirc.h -@@ -18,6 +18,7 @@ - #define __RECIRC_H_ 1 - - #include "Actions.h" -+#include "NetProto.h" - - #define DEFERRED_ACTION_QUEUE_SIZE 10 - #define DEFERRED_ACTION_EXEC_LEVEL 4 -@@ -26,6 +27,7 @@ typedef struct _OVS_DEFERRED_ACTION { - PNET_BUFFER_LIST nbl; - PNL_ATTR actions; - OvsFlowKey key; -+ OVS_PACKET_HDR_INFO layers; - } OVS_DEFERRED_ACTION, *POVS_DEFERRED_ACTION; - - /* -@@ -39,8 +41,7 @@ NDIS_STATUS - OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, - OvsCompletionList *completionList, - UINT32 portNo, -- ULONG sendFlags, -- OVS_PACKET_HDR_INFO *layers); -+ ULONG sendFlags); - - /* - * -------------------------------------------------------------------------- -@@ -52,6 +53,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, - POVS_DEFERRED_ACTION - OvsAddDeferredActions(PNET_BUFFER_LIST packet, - OvsFlowKey *key, -+ POVS_PACKET_HDR_INFO layers, - const PNL_ATTR actions); - - /* -diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h -index 4cce92f66c..bc18c56b81 100644 ---- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h -+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h -@@ -108,7 +108,14 @@ static inline bool rpl_nf_ct_delete(struct nf_conn *ct, u32 portid, int report) - static inline unsigned int - rpl_nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) - { -- return nf_conntrack_in(state->net, state->pf, state->hook, skb); -+ int err; -+ -+ /* Repeat if requested, see nf_iterate(). */ -+ do { -+ err = nf_conntrack_in(state->net, state->pf, state->hook, skb); -+ } while (err == NF_REPEAT); -+ -+ return err; - } - #define nf_conntrack_in rpl_nf_conntrack_in - #endif /* HAVE_NF_CONNTRACK_IN_TAKES_NF_HOOK_STATE */ -diff --git a/debian/changelog b/debian/changelog -index 239d210b96..522e10b0e5 100644 ---- a/debian/changelog -+++ b/debian/changelog -@@ -1,3 +1,33 @@ -+openvswitch (2.16.5-1) unstable; urgency=low -+ [ Open vSwitch team ] -+ * New upstream version -+ -+ -- Open vSwitch team Wed, 15 Jun 2022 12:03:55 +0200 -+ -+openvswitch (2.16.4-1) unstable; urgency=low -+ [ Open vSwitch team ] -+ * New upstream version -+ -+ -- Open vSwitch team Wed, 15 Jun 2022 12:03:55 +0200 -+ -+openvswitch (2.16.3-1) unstable; urgency=low -+ [ Open vSwitch team ] -+ * New upstream version -+ -+ -- Open vSwitch team Fri, 08 Apr 2022 14:57:43 +0200 -+ -+openvswitch (2.16.2-1) unstable; urgency=low -+ [ Open vSwitch team ] -+ * New upstream version -+ -+ -- Open vSwitch team Fri, 17 Dec 2021 22:14:03 +0100 -+ -+openvswitch (2.16.1-1) unstable; urgency=low -+ [ Open vSwitch team ] -+ * New upstream version -+ -+ -- Open vSwitch team Thu, 21 Oct 2021 23:58:12 +0200 -+ - openvswitch (2.16.0-1) unstable; urgency=low - - * New upstream version -diff --git a/dpdk/lib/librte_vhost/vhost_user.c b/dpdk/lib/librte_vhost/vhost_user.c -index 45c8ac09da..70d206dcf8 100644 ---- a/dpdk/lib/librte_vhost/vhost_user.c -+++ b/dpdk/lib/librte_vhost/vhost_user.c -@@ -1416,6 +1416,9 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, - int fd, i, j; - void *addr; - -+ if (validate_msg_fds(msg, 0) != 0) -+ return RTE_VHOST_MSG_RESULT_ERR; -+ - if (msg->size != sizeof(msg->payload.inflight)) { - VHOST_LOG_CONFIG(ERR, - "invalid get_inflight_fd message size is %d\n", -@@ -1509,6 +1512,9 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, - void *addr; - int fd, i; - -+ if (validate_msg_fds(msg, 1) != 0) -+ return RTE_VHOST_MSG_RESULT_ERR; -+ - fd = msg->fds[0]; - if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { - VHOST_LOG_CONFIG(ERR, -@@ -2652,6 +2658,9 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, - case VHOST_USER_SET_VRING_ADDR: - vring_idx = msg->payload.addr.index; - break; -+ case VHOST_USER_SET_INFLIGHT_FD: -+ vring_idx = msg->payload.inflight.num_queues - 1; -+ break; - default: - return 0; - } -diff --git a/include/linux/automake.mk b/include/linux/automake.mk -index 8f063f482e..f857c7e088 100644 ---- a/include/linux/automake.mk -+++ b/include/linux/automake.mk -@@ -2,6 +2,7 @@ noinst_HEADERS += \ - include/linux/netlink.h \ - include/linux/netfilter/nf_conntrack_sctp.h \ - include/linux/pkt_cls.h \ -+ include/linux/gen_stats.h \ - include/linux/tc_act/tc_mpls.h \ - include/linux/tc_act/tc_pedit.h \ - include/linux/tc_act/tc_skbedit.h \ -diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h -new file mode 100644 -index 0000000000..6fae6f727c ---- /dev/null -+++ b/include/linux/gen_stats.h -@@ -0,0 +1,81 @@ -+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -+#ifndef __LINUX_GEN_STATS_WRAPPER_H -+#define __LINUX_GEN_STATS_WRAPPER_H 1 -+ -+#if defined(__KERNEL__) || defined(HAVE_TCA_STATS_PKT64) -+#include_next -+#else -+#include -+ -+enum { -+ TCA_STATS_UNSPEC, -+ TCA_STATS_BASIC, -+ TCA_STATS_RATE_EST, -+ TCA_STATS_QUEUE, -+ TCA_STATS_APP, -+ TCA_STATS_RATE_EST64, -+ TCA_STATS_PAD, -+ TCA_STATS_BASIC_HW, -+ TCA_STATS_PKT64, -+ __TCA_STATS_MAX, -+}; -+#define TCA_STATS_MAX (__TCA_STATS_MAX - 1) -+ -+/** -+ * struct gnet_stats_basic - byte/packet throughput statistics -+ * @bytes: number of seen bytes -+ * @packets: number of seen packets -+ */ -+struct gnet_stats_basic { -+ __u64 bytes; -+ __u32 packets; -+}; -+ -+/** -+ * struct gnet_stats_rate_est - rate estimator -+ * @bps: current byte rate -+ * @pps: current packet rate -+ */ -+struct gnet_stats_rate_est { -+ __u32 bps; -+ __u32 pps; -+}; -+ -+/** -+ * struct gnet_stats_rate_est64 - rate estimator -+ * @bps: current byte rate -+ * @pps: current packet rate -+ */ -+struct gnet_stats_rate_est64 { -+ __u64 bps; -+ __u64 pps; -+}; -+ -+/** -+ * struct gnet_stats_queue - queuing statistics -+ * @qlen: queue length -+ * @backlog: backlog size of queue -+ * @drops: number of dropped packets -+ * @requeues: number of requeues -+ * @overlimits: number of enqueues over the limit -+ */ -+struct gnet_stats_queue { -+ __u32 qlen; -+ __u32 backlog; -+ __u32 drops; -+ __u32 requeues; -+ __u32 overlimits; -+}; -+ -+/** -+ * struct gnet_estimator - rate estimator configuration -+ * @interval: sampling period -+ * @ewma_log: the log of measurement window weight -+ */ -+struct gnet_estimator { -+ signed char interval; -+ unsigned char ewma_log; -+}; -+ -+#endif /* __KERNEL__ || !HAVE_TCA_STATS_PKT64 */ -+#endif /* __LINUX_GEN_STATS_WRAPPER_H */ -diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h -index 3054015d93..df10cf579e 100644 ---- a/include/openvswitch/flow.h -+++ b/include/openvswitch/flow.h -@@ -141,15 +141,14 @@ struct flow { - uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */ - uint8_t nw_ttl; /* IP TTL/Hop Limit. */ - uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ -+ /* L4 (64-bit aligned) */ - struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ - struct eth_addr arp_sha; /* ARP/ND source hardware address. */ - struct eth_addr arp_tha; /* ARP/ND target hardware address. */ -- ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. -- * With L3 to avoid matching L4. */ -+ ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. */ - ovs_be16 pad2; /* Pad to 64 bits. */ - struct ovs_key_nsh nsh; /* Network Service Header keys */ - -- /* L4 (64-bit aligned) */ - ovs_be16 tp_src; /* TCP/UDP/SCTP source port/ICMP type. */ - ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */ - ovs_be16 ct_tp_src; /* CT original tuple source port/ICMP type. */ -@@ -179,7 +178,7 @@ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t) - enum { - FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_dst), - FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, nw_src), -- FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src), -+ FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, nd_target), - }; - BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % sizeof(uint64_t) == 0); - BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % sizeof(uint64_t) == 0); -diff --git a/include/openvswitch/hmap.h b/include/openvswitch/hmap.h -index 4e001cc692..68c284cf14 100644 ---- a/include/openvswitch/hmap.h -+++ b/include/openvswitch/hmap.h -@@ -134,17 +134,17 @@ struct hmap_node *hmap_random_node(const struct hmap *); - * without using 'break', NODE will be NULL. This is true for all of the - * HMAP_FOR_EACH_*() macros. - */ --#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ -- for (INIT_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \ -- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ -- || ((NODE = NULL), false); \ -- ASSIGN_CONTAINER(NODE, hmap_next_with_hash(&(NODE)->MEMBER), \ -- MEMBER)) --#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ -- for (INIT_CONTAINER(NODE, hmap_first_in_bucket(HMAP, HASH), MEMBER); \ -- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ -- || ((NODE = NULL), false); \ -- ASSIGN_CONTAINER(NODE, hmap_next_in_bucket(&(NODE)->MEMBER), MEMBER)) -+#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ -+ for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_with_hash(HMAP, HASH), \ -+ struct hmap_node); \ -+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ -+ UPDATE_MULTIVAR(NODE, hmap_next_with_hash(ITER_VAR(NODE)))) -+ -+#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ -+ for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_in_bucket(HMAP, HASH), \ -+ struct hmap_node); \ -+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ -+ UPDATE_MULTIVAR(NODE, hmap_next_in_bucket(ITER_VAR(NODE)))) - - static inline struct hmap_node *hmap_first_with_hash(const struct hmap *, - size_t hash); -@@ -170,54 +170,62 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *); - /* Iterates through every node in HMAP. */ - #define HMAP_FOR_EACH(NODE, MEMBER, HMAP) \ - HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, (void) 0) --#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ -- for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ -- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ -- || ((NODE = NULL), false); \ -- ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) -+#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ -+ for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_first(HMAP), struct hmap_node, \ -+ __VA_ARGS__); \ -+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ -+ UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE)))) - - /* Safe when NODE may be freed (not needed when NODE may be removed from the - * hash map but its members remain accessible and intact). */ - #define HMAP_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HMAP) \ -- HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, (void) 0) --#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ -- for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ -- ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ -- || ((NODE = NULL), false) \ -- ? INIT_CONTAINER(NEXT, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), 1 \ -- : 0); \ -- (NODE) = (NEXT)) -+ HMAP_FOR_EACH_SAFE_INIT (NODE, NEXT, MEMBER, HMAP, (void) NEXT) -+ -+#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ -+ for (INIT_MULTIVAR_SAFE_LONG_EXP(NODE, NEXT, MEMBER, hmap_first(HMAP), \ -+ struct hmap_node, __VA_ARGS__); \ -+ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ -+ ITER_VAR(NODE) != NULL, \ -+ ITER_VAR(NEXT) = hmap_next(HMAP, ITER_VAR(NODE)), \ -+ ITER_VAR(NEXT) != NULL); \ -+ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) - - /* Continues an iteration from just after NODE. */ - #define HMAP_FOR_EACH_CONTINUE(NODE, MEMBER, HMAP) \ - HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, (void) 0) --#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ -- for (ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), \ -- __VA_ARGS__; \ -- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ -- || ((NODE = NULL), false); \ -- ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) -+#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ -+ for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_next(HMAP, &(NODE)->MEMBER), \ -+ struct hmap_node, __VA_ARGS__); \ -+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ -+ UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE)))) -+ -+struct hmap_pop_helper_iter__ { -+ size_t bucket; -+ struct hmap_node *node; -+}; - --static inline struct hmap_node * --hmap_pop_helper__(struct hmap *hmap, size_t *bucket) { -+static inline void -+hmap_pop_helper__(struct hmap *hmap, struct hmap_pop_helper_iter__ *iter) { - -- for (; *bucket <= hmap->mask; (*bucket)++) { -- struct hmap_node *node = hmap->buckets[*bucket]; -+ for (; iter->bucket <= hmap->mask; (iter->bucket)++) { -+ struct hmap_node *node = hmap->buckets[iter->bucket]; - - if (node) { - hmap_remove(hmap, node); -- return node; -+ iter->node = node; -+ return; - } - } -- -- return NULL; -+ iter->node = NULL; - } - --#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ -- for (size_t bucket__ = 0; \ -- INIT_CONTAINER(NODE, hmap_pop_helper__(HMAP, &bucket__), MEMBER), \ -- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ -- || ((NODE = NULL), false);) -+#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ -+ for (struct hmap_pop_helper_iter__ ITER_VAR(NODE) = { 0, NULL }; \ -+ hmap_pop_helper__(HMAP, &ITER_VAR(NODE)), \ -+ (ITER_VAR(NODE).node != NULL) ? \ -+ (((NODE) = OBJECT_CONTAINING(ITER_VAR(NODE).node, \ -+ NODE, MEMBER)),1): \ -+ (((NODE) = NULL), 0);) - - static inline struct hmap_node *hmap_first(const struct hmap *); - static inline struct hmap_node *hmap_next(const struct hmap *, -diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h -index 73b562e03d..0831a9cee1 100644 ---- a/include/openvswitch/json.h -+++ b/include/openvswitch/json.h -@@ -50,7 +50,9 @@ enum json_type { - JSON_INTEGER, /* 123. */ - JSON_REAL, /* 123.456. */ - JSON_STRING, /* "..." */ -- JSON_N_TYPES -+ JSON_N_TYPES, -+ JSON_SERIALIZED_OBJECT, /* Internal type to hold serialized version of -+ * data of other types. */ - }; - - const char *json_type_to_string(enum json_type); -@@ -70,7 +72,7 @@ struct json { - struct json_array array; - long long int integer; - double real; -- char *string; -+ char *string; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */ - }; - }; - -@@ -78,6 +80,7 @@ struct json *json_null_create(void); - struct json *json_boolean_create(bool); - struct json *json_string_create(const char *); - struct json *json_string_create_nocopy(char *); -+struct json *json_serialized_object_create(const struct json *); - struct json *json_integer_create(long long int); - struct json *json_real_create(double); - -@@ -99,6 +102,7 @@ void json_object_put_format(struct json *, - OVS_PRINTF_FORMAT(3, 4); - - const char *json_string(const struct json *); -+const char *json_serialized_object(const struct json *); - struct json_array *json_array(const struct json *); - struct shash *json_object(const struct json *); - bool json_boolean(const struct json *); -@@ -125,6 +129,7 @@ struct json *json_parser_finish(struct json_parser *); - void json_parser_abort(struct json_parser *); - - struct json *json_from_string(const char *string); -+struct json *json_from_serialized_object(const struct json *); - struct json *json_from_file(const char *file_name); - struct json *json_from_stream(FILE *stream); - -diff --git a/include/openvswitch/list.h b/include/openvswitch/list.h -index 8ad5eeb327..bbd2edbd0c 100644 ---- a/include/openvswitch/list.h -+++ b/include/openvswitch/list.h -@@ -72,37 +72,48 @@ static inline bool ovs_list_is_empty(const struct ovs_list *); - static inline bool ovs_list_is_singleton(const struct ovs_list *); - static inline bool ovs_list_is_short(const struct ovs_list *); - --#define LIST_FOR_EACH(ITER, MEMBER, LIST) \ -- for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ -- &(ITER)->MEMBER != (LIST); \ -- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) --#define LIST_FOR_EACH_CONTINUE(ITER, MEMBER, LIST) \ -- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER); \ -- &(ITER)->MEMBER != (LIST); \ -- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) --#define LIST_FOR_EACH_REVERSE(ITER, MEMBER, LIST) \ -- for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \ -- &(ITER)->MEMBER != (LIST); \ -- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) --#define LIST_FOR_EACH_REVERSE_SAFE(ITER, PREV, MEMBER, LIST) \ -- for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \ -- (&(ITER)->MEMBER != (LIST) \ -- ? INIT_CONTAINER(PREV, (ITER)->MEMBER.prev, MEMBER), 1 \ -- : 0); \ -- (ITER) = (PREV)) --#define LIST_FOR_EACH_REVERSE_CONTINUE(ITER, MEMBER, LIST) \ -- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ -- &(ITER)->MEMBER != (LIST); \ -- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) --#define LIST_FOR_EACH_SAFE(ITER, NEXT, MEMBER, LIST) \ -- for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ -- (&(ITER)->MEMBER != (LIST) \ -- ? INIT_CONTAINER(NEXT, (ITER)->MEMBER.next, MEMBER), 1 \ -- : 0); \ -- (ITER) = (NEXT)) --#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \ -- while (!ovs_list_is_empty(LIST) \ -- && (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1)) -+#define LIST_FOR_EACH(VAR, MEMBER, LIST) \ -+ for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->next, struct ovs_list); \ -+ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ -+ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next)) -+ -+#define LIST_FOR_EACH_CONTINUE(VAR, MEMBER, LIST) \ -+ for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.next, struct ovs_list); \ -+ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ -+ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next)) -+ -+#define LIST_FOR_EACH_REVERSE(VAR, MEMBER, LIST) \ -+ for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->prev, struct ovs_list); \ -+ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ -+ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev)) -+ -+#define LIST_FOR_EACH_REVERSE_CONTINUE(VAR, MEMBER, LIST) \ -+ for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.prev, struct ovs_list); \ -+ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ -+ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev)) -+ -+#define LIST_FOR_EACH_REVERSE_SAFE(VAR, PREV, MEMBER, LIST) \ -+ for (INIT_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, (LIST)->prev, \ -+ struct ovs_list); \ -+ CONDITION_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, \ -+ ITER_VAR(VAR) != (LIST), \ -+ ITER_VAR(PREV) = ITER_VAR(VAR)->prev, \ -+ ITER_VAR(PREV) != (LIST)); \ -+ UPDATE_MULTIVAR_SAFE_LONG(VAR, PREV)) -+ -+#define LIST_FOR_EACH_SAFE(VAR, NEXT, MEMBER, LIST) \ -+ for (INIT_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, (LIST)->next, \ -+ struct ovs_list); \ -+ CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, \ -+ ITER_VAR(VAR) != (LIST), \ -+ ITER_VAR(NEXT) = ITER_VAR(VAR)->next, \ -+ ITER_VAR(NEXT) != (LIST)); \ -+ UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT)) -+ -+#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \ -+ while (!ovs_list_is_empty(LIST) ? \ -+ (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1) : \ -+ (ITER = NULL, 0)) - - /* Inline implementations. */ - -diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h -index 95e52e3587..045dce8f5f 100644 ---- a/include/openvswitch/meta-flow.h -+++ b/include/openvswitch/meta-flow.h -@@ -2305,6 +2305,7 @@ void mf_set_flow_value_masked(const struct mf_field *, - const union mf_value *mask, - struct flow *); - bool mf_is_tun_metadata(const struct mf_field *); -+bool mf_is_frozen_metadata(const struct mf_field *); - bool mf_is_pipeline_field(const struct mf_field *); - bool mf_is_set(const struct mf_field *, const struct flow *); - void mf_mask_field(const struct mf_field *, struct flow_wildcards *); -diff --git a/include/openvswitch/util.h b/include/openvswitch/util.h -index 228b185c3a..8e6c46a85f 100644 ---- a/include/openvswitch/util.h -+++ b/include/openvswitch/util.h -@@ -145,6 +145,150 @@ OVS_NO_RETURN void ovs_assert_failure(const char *, const char *, const char *); - #define INIT_CONTAINER(OBJECT, POINTER, MEMBER) \ - ((OBJECT) = NULL, ASSIGN_CONTAINER(OBJECT, POINTER, MEMBER)) - -+/* Multi-variable container iterators. -+ * -+ * The following macros facilitate safe iteration over data structures -+ * contained in objects. It does so by using an internal iterator variable of -+ * the type of the member object pointer (i.e: pointer to the data structure). -+ */ -+ -+/* Multi-variable iterator variable name. -+ * Returns the name of the internal iterator variable. -+ */ -+#define ITER_VAR(NAME) NAME ## __iterator__ -+ -+/* Multi-variable initialization. Creates an internal iterator variable that -+ * points to the provided pointer. The type of the iterator variable is -+ * ITER_TYPE*. It must be the same type as &VAR->MEMBER. -+ * -+ * The _EXP version evaluates the extra expressions once. -+ */ -+#define INIT_MULTIVAR(VAR, MEMBER, POINTER, ITER_TYPE) \ -+ INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0) -+ -+#define INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \ -+ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER) -+ -+/* Multi-variable condition. -+ * Evaluates the condition expression (that must be based on the internal -+ * iterator variable). Only if the result of expression is true, the OBJECT is -+ * set to the object containing the current value of the iterator variable. -+ * -+ * It is up to the caller to make sure it is safe to run OBJECT_CONTAINING on -+ * the pointers that verify the condition. -+ */ -+#define CONDITION_MULTIVAR(VAR, MEMBER, EXPR) \ -+ ((EXPR) ? \ -+ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), 1) : \ -+ (((VAR) = NULL), 0)) -+ -+/* Multi-variable update. -+ * Sets the iterator value to NEXT_ITER. -+ */ -+#define UPDATE_MULTIVAR(VAR, NEXT_ITER) \ -+ (ITER_VAR(VAR) = NEXT_ITER) -+ -+/* In the safe version of the multi-variable container iteration, the next -+ * value of the iterator is precalculated on the condition expression. -+ * This allows for the iterator to be freed inside the loop. -+ * -+ * Two versions of the macros are provided: -+ * -+ * * In the _SHORT version, the user does not have to provide a variable to -+ * store the next value of the iterator. Instead, a second iterator variable -+ * is declared in the INIT_ macro and its name is determined by -+ * ITER_NEXT_VAR(OBJECT). -+ * -+ * * In the _LONG version, the user provides another variable of the same type -+ * as the iterator object variable to store the next containing object. -+ * We still declare an iterator variable inside the loop but in this case it's -+ * name is derived from the name of the next containing variable. -+ * The value of the next containing object will only be set -+ * (via OBJECT_CONTAINING) if an additional condition is statisfied. This -+ * second condition must ensure it is safe to call OBJECT_CONTAINING on the -+ * next iterator variable. -+ * With respect to the value of the next containing object: -+ * - Inside of the loop: the variable is either NULL or safe to use. -+ * - Outside of the loop: the variable is NULL if the loop ends normally. -+ * If the loop ends with a "break;" statement, rules of Inside the loop -+ * apply. -+ */ -+#define ITER_NEXT_VAR(NAME) NAME ## __iterator__next__ -+ -+/* Safe initialization declares both iterators. */ -+#define INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, POINTER, ITER_TYPE) \ -+ INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0) -+ -+#define INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \ -+ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \ -+ *ITER_NEXT_VAR(VAR) = NULL -+ -+/* Evaluate the condition expression and, if satisfied, update the _next_ -+ * iterator with the NEXT_EXPR. -+ * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and -+ * ITER_NEXT_VAR(VAR). -+ */ -+#define CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, EXPR, NEXT_EXPR) \ -+ ((EXPR) ? \ -+ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \ -+ (NEXT_EXPR), 1) : \ -+ (((VAR) = NULL), 0)) -+ -+#define UPDATE_MULTIVAR_SAFE_SHORT(VAR) \ -+ UPDATE_MULTIVAR(VAR, ITER_NEXT_VAR(VAR)) -+ -+/* _LONG versions of the macros. */ -+ -+#define INIT_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE) \ -+ INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE, \ -+ (void) 0) \ -+ -+#define INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, \ -+ ITER_TYPE, ...) \ -+ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \ -+ *ITER_VAR(NEXT_VAR) = NULL -+ -+/* Evaluate the condition expression and, if satisfied, update the _next_ -+ * iterator with the NEXT_EXPR. After, evaluate the NEXT_COND and, if -+ * satisfied, set the value to NEXT_VAR. NEXT_COND must use ITER_VAR(NEXT_VAR). -+ * -+ * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and -+ * ITER_VAR(NEXT_VAR). -+ */ -+#define CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, EXPR, NEXT_EXPR, \ -+ NEXT_COND) \ -+ ((EXPR) ? \ -+ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \ -+ (NEXT_EXPR), ((NEXT_COND) ? \ -+ ((NEXT_VAR) = \ -+ OBJECT_CONTAINING(ITER_VAR(NEXT_VAR), NEXT_VAR, MEMBER)) : \ -+ ((NEXT_VAR) = NULL)), 1) : \ -+ (((VAR) = NULL), ((NEXT_VAR) = NULL), 0)) -+ -+#define UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR) \ -+ UPDATE_MULTIVAR(VAR, ITER_VAR(NEXT_VAR)) -+ -+/* Helpers to allow overloading the *_SAFE iterator macros and select either -+ * the LONG or the SHORT version depending on the number of arguments. -+ */ -+#define GET_SAFE_MACRO2(_1, _2, NAME, ...) NAME -+#define GET_SAFE_MACRO3(_1, _2, _3, NAME, ...) NAME -+#define GET_SAFE_MACRO4(_1, _2, _3, _4, NAME, ...) NAME -+#define GET_SAFE_MACRO5(_1, _2, _3, _4, _5, NAME, ...) NAME -+#define GET_SAFE_MACRO6(_1, _2, _3, _4, _5, _6, NAME, ...) NAME -+#define GET_SAFE_MACRO(MAX_ARGS) GET_SAFE_MACRO ## MAX_ARGS -+ -+/* MSVC treats __VA_ARGS__ as a simple token in argument lists. Introduce -+ * a level of indirection to work around that. */ -+#define EXPAND_MACRO(name, args) name args -+ -+/* Overload the LONG and the SHORT version of the macros. MAX_ARGS is the -+ * maximum number of arguments (i.e: the number of arguments of the LONG -+ * version). */ -+#define OVERLOAD_SAFE_MACRO(LONG, SHORT, MAX_ARGS, ...) \ -+ EXPAND_MACRO(GET_SAFE_MACRO(MAX_ARGS), \ -+ (__VA_ARGS__, LONG, SHORT))(__VA_ARGS__) -+ - /* Returns the number of elements in ARRAY. */ - #define ARRAY_SIZE(ARRAY) __ARRAY_SIZE(ARRAY) - -@@ -285,6 +429,9 @@ is_pow2(uintmax_t x) - * segfault, so it is important to be aware of correct alignment. */ - #define ALIGNED_CAST(TYPE, ATTR) ((TYPE) (void *) (ATTR)) - -+#define IS_PTR_ALIGNED(OBJ) \ -+ (!(OBJ) || (uintptr_t) (OBJ) % __alignof__(OVS_TYPEOF(OBJ)) == 0) -+ - #ifdef __cplusplus - } - #endif -diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in -index 89a36fe17b..a8b0705d9f 100755 ---- a/ipsec/ovs-monitor-ipsec.in -+++ b/ipsec/ovs-monitor-ipsec.in -@@ -202,18 +202,18 @@ conn prevent_unencrypted_vxlan - """ - - auth_tmpl = {"psk": Template("""\ -- left=0.0.0.0 -+ left=%any - right=$remote_ip - authby=psk"""), - "pki_remote": Template("""\ -- left=0.0.0.0 -+ left=%any - right=$remote_ip - leftid=$local_name - rightid=$remote_name - leftcert=$certificate - rightcert=$remote_cert"""), - "pki_ca": Template("""\ -- left=0.0.0.0 -+ left=%any - right=$remote_ip - leftid=$local_name - rightid=$remote_name -@@ -299,11 +299,11 @@ conn prevent_unencrypted_vxlan - - def config_tunnel(self, tunnel): - if tunnel.conf["psk"]: -- self.secrets_file.write('0.0.0.0 %s : PSK "%s"\n' % -+ self.secrets_file.write('%%any %s : PSK "%s"\n' % - (tunnel.conf["remote_ip"], tunnel.conf["psk"])) - auth_section = self.auth_tmpl["psk"].substitute(tunnel.conf) - else: -- self.secrets_file.write("0.0.0.0 %s : RSA %s\n" % -+ self.secrets_file.write("%%any %s : RSA %s\n" % - (tunnel.conf["remote_ip"], - tunnel.conf["private_key"])) - if tunnel.conf["remote_cert"]: -diff --git a/lib/bfd.c b/lib/bfd.c -index 3c965699ac..9698576d07 100644 ---- a/lib/bfd.c -+++ b/lib/bfd.c -@@ -131,16 +131,17 @@ enum diag { - * | Required Min Echo RX Interval | - * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ - struct msg { -- uint8_t vers_diag; /* Version and diagnostic. */ -- uint8_t flags; /* 2bit State field followed by flags. */ -- uint8_t mult; /* Fault detection multiplier. */ -- uint8_t length; /* Length of this BFD message. */ -- ovs_be32 my_disc; /* My discriminator. */ -- ovs_be32 your_disc; /* Your discriminator. */ -- ovs_be32 min_tx; /* Desired minimum tx interval. */ -- ovs_be32 min_rx; /* Required minimum rx interval. */ -- ovs_be32 min_rx_echo; /* Required minimum echo rx interval. */ -+ uint8_t vers_diag; /* Version and diagnostic. */ -+ uint8_t flags; /* 2bit State field followed by flags. */ -+ uint8_t mult; /* Fault detection multiplier. */ -+ uint8_t length; /* Length of this BFD message. */ -+ ovs_16aligned_be32 my_disc; /* My discriminator. */ -+ ovs_16aligned_be32 your_disc; /* Your discriminator. */ -+ ovs_16aligned_be32 min_tx; /* Desired minimum tx interval. */ -+ ovs_16aligned_be32 min_rx; /* Required minimum rx interval. */ -+ ovs_16aligned_be32 min_rx_echo; /* Required minimum echo rx interval. */ - }; -+ - BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg)); - - #define DIAG_MASK 0x1f -@@ -634,9 +635,9 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p, - - msg->mult = bfd->mult; - msg->length = BFD_PACKET_LEN; -- msg->my_disc = htonl(bfd->disc); -- msg->your_disc = htonl(bfd->rmt_disc); -- msg->min_rx_echo = htonl(0); -+ put_16aligned_be32(&msg->my_disc, htonl(bfd->disc)); -+ put_16aligned_be32(&msg->your_disc, htonl(bfd->rmt_disc)); -+ put_16aligned_be32(&msg->min_rx_echo, htonl(0)); - - if (bfd_in_poll(bfd)) { - min_tx = bfd->poll_min_tx; -@@ -646,8 +647,8 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p, - min_rx = bfd->min_rx; - } - -- msg->min_tx = htonl(min_tx * 1000); -- msg->min_rx = htonl(min_rx * 1000); -+ put_16aligned_be32(&msg->min_tx, htonl(min_tx * 1000)); -+ put_16aligned_be32(&msg->min_rx, htonl(min_rx * 1000)); - - bfd->flags &= ~FLAG_FINAL; - *oam = bfd->oam; -@@ -781,12 +782,12 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, - goto out; - } - -- if (!msg->my_disc) { -+ if (!get_16aligned_be32(&msg->my_disc)) { - log_msg(VLL_WARN, msg, "NULL my_disc", bfd); - goto out; - } - -- pkt_your_disc = ntohl(msg->your_disc); -+ pkt_your_disc = ntohl(get_16aligned_be32(&msg->your_disc)); - if (pkt_your_disc) { - /* Technically, we should use the your discriminator field to figure - * out which 'struct bfd' this packet is destined towards. That way a -@@ -806,7 +807,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, - bfd_status_changed(bfd); - } - -- bfd->rmt_disc = ntohl(msg->my_disc); -+ bfd->rmt_disc = ntohl(get_16aligned_be32(&msg->my_disc)); - bfd->rmt_state = rmt_state; - bfd->rmt_flags = flags; - bfd->rmt_diag = msg->vers_diag & DIAG_MASK; -@@ -834,7 +835,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, - bfd->rmt_mult = msg->mult; - } - -- rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1); -+ rmt_min_rx = MAX(ntohl(get_16aligned_be32(&msg->min_rx)) / 1000, 1); - if (bfd->rmt_min_rx != rmt_min_rx) { - bfd->rmt_min_rx = rmt_min_rx; - if (bfd->next_tx) { -@@ -843,7 +844,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, - log_msg(VLL_INFO, msg, "New remote min_rx", bfd); - } - -- bfd->rmt_min_tx = MAX(ntohl(msg->min_tx) / 1000, 1); -+ bfd->rmt_min_tx = MAX(ntohl(get_16aligned_be32(&msg->min_tx)) / 1000, 1); - bfd->detect_time = bfd_rx_interval(bfd) * bfd->rmt_mult + time_msec(); - - if (bfd->state == STATE_ADMIN_DOWN) { -@@ -1105,10 +1106,14 @@ log_msg(enum vlog_level level, const struct msg *p, const char *message, - bfd_diag_str(p->vers_diag & DIAG_MASK), - bfd_state_str(p->flags & STATE_MASK), - p->mult, p->length, bfd_flag_str(p->flags & FLAGS_MASK), -- ntohl(p->my_disc), ntohl(p->your_disc), -- ntohl(p->min_tx), ntohl(p->min_tx) / 1000, -- ntohl(p->min_rx), ntohl(p->min_rx) / 1000, -- ntohl(p->min_rx_echo), ntohl(p->min_rx_echo) / 1000); -+ ntohl(get_16aligned_be32(&p->my_disc)), -+ ntohl(get_16aligned_be32(&p->your_disc)), -+ ntohl(get_16aligned_be32(&p->min_tx)), -+ ntohl(get_16aligned_be32(&p->min_tx)) / 1000, -+ ntohl(get_16aligned_be32(&p->min_rx)), -+ ntohl(get_16aligned_be32(&p->min_rx)) / 1000, -+ ntohl(get_16aligned_be32(&p->min_rx_echo)), -+ ntohl(get_16aligned_be32(&p->min_rx_echo)) / 1000); - bfd_put_details(&ds, bfd); - VLOG(level, "%s", ds_cstr(&ds)); - ds_destroy(&ds); -diff --git a/lib/cmap.h b/lib/cmap.h -index c502d23112..72e2ec5f71 100644 ---- a/lib/cmap.h -+++ b/lib/cmap.h -@@ -108,6 +108,8 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node, - * - * CMAP and HASH are evaluated only once. NODE is evaluated many times. - * -+ * After a normal exit of the loop (not through a "break;" statement) NODE is -+ * NULL. - * - * Thread-safety - * ============= -@@ -128,15 +130,15 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node, - * CMAP_FOR_EACH_WITH_HASH_PROTECTED may only be used if CMAP is guaranteed not - * to change during iteration. It may be very slightly faster. - */ --#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \ -- for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \ -- (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ -- ASSIGN_CONTAINER(NODE, cmap_node_next(&(NODE)->MEMBER), MEMBER)) --#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \ -- for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \ -- (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ -- ASSIGN_CONTAINER(NODE, cmap_node_next_protected(&(NODE)->MEMBER), \ -- MEMBER)) -+#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \ -+ for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \ -+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ -+ UPDATE_MULTIVAR(NODE, cmap_node_next(ITER_VAR(NODE)))) -+#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \ -+ for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \ -+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ -+ UPDATE_MULTIVAR(NODE, cmap_node_next_protected(ITER_VAR(NODE)))) -+ - #define CMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, CMAP) \ - CMAP_NODE_FOR_EACH(NODE, MEMBER, cmap_find(CMAP, HASH)) - #define CMAP_FOR_EACH_WITH_HASH_PROTECTED(NODE, MEMBER, HASH, CMAP) \ -@@ -223,7 +225,7 @@ unsigned long cmap_find_batch(const struct cmap *cmap, unsigned long map, - ? (INIT_CONTAINER(NODE, (CURSOR)->node, MEMBER), \ - cmap_cursor_advance(CURSOR), \ - true) \ -- : false) -+ : (NODE = NULL, false)) - - #define CMAP_CURSOR_FOR_EACH(NODE, MEMBER, CURSOR, CMAP) \ - for (*(CURSOR) = cmap_cursor_start(CMAP); \ -diff --git a/lib/db-ctl-base.c b/lib/db-ctl-base.c -index 77cc76a9f6..7074561588 100644 ---- a/lib/db-ctl-base.c -+++ b/lib/db-ctl-base.c -@@ -247,15 +247,15 @@ record_id_equals(const union ovsdb_atom *name, enum ovsdb_atomic_type type, - const char *record_id) - { - if (type == OVSDB_TYPE_STRING) { -- if (!strcmp(name->string, record_id)) { -+ if (!strcmp(name->s->string, record_id)) { - return true; - } - - struct uuid uuid; - size_t len = strlen(record_id); - if (len >= 4 -- && uuid_from_string(&uuid, name->string) -- && !strncmp(name->string, record_id, len)) { -+ && uuid_from_string(&uuid, name->s->string) -+ && !strncmp(name->s->string, record_id, len)) { - return true; - } - -@@ -314,15 +314,19 @@ get_row_by_id(struct ctl_context *ctx, - row, id->name_column, key, value); - - /* Extract the name from the column. */ -- const union ovsdb_atom *name; -+ const union ovsdb_atom *name = NULL; - if (!id->key) { - name = datum->n == 1 ? &datum->keys[0] : NULL; - } else { -- const union ovsdb_atom key_atom -- = { .string = CONST_CAST(char *, id->key) }; -- unsigned int i = ovsdb_datum_find_key(datum, &key_atom, -- OVSDB_TYPE_STRING); -- name = i == UINT_MAX ? NULL : &datum->values[i]; -+ union ovsdb_atom key_atom = { -+ .s = ovsdb_atom_string_create(CONST_CAST(char *, id->key)) }; -+ unsigned int i; -+ -+ if (ovsdb_datum_find_key(datum, &key_atom, -+ OVSDB_TYPE_STRING, &i)) { -+ name = &datum->values[i]; -+ } -+ ovsdb_atom_destroy(&key_atom, OVSDB_TYPE_STRING); - } - if (!name) { - continue; -@@ -819,14 +823,14 @@ check_condition(const struct ovsdb_idl_table_class *table, - goto out; - } - -- idx = ovsdb_datum_find_key(have_datum, -- &want_key, column->type.key.type); -- if (idx == UINT_MAX && !is_set_operator(operator)) { -+ bool found = ovsdb_datum_find_key(have_datum, &want_key, -+ column->type.key.type, &idx); -+ if (!found && !is_set_operator(operator)) { - retval = false; - } else { - struct ovsdb_datum a; - -- if (idx != UINT_MAX) { -+ if (found) { - a.n = 1; - a.keys = &have_datum->values[idx]; - a.values = NULL; -@@ -992,9 +996,8 @@ cmd_get(struct ctl_context *ctx) - return; - } - -- idx = ovsdb_datum_find_key(datum, &key, -- column->type.key.type); -- if (idx == UINT_MAX) { -+ if (!ovsdb_datum_find_key(datum, &key, -+ column->type.key.type, &idx)) { - if (must_exist) { - ctl_error( - ctx, "no key \"%s\" in %s record \"%s\" column %s", -@@ -1375,7 +1378,7 @@ set_column(const struct ovsdb_idl_table_class *table, - ovsdb_atom_destroy(&value, column->type.value.type); - - ovsdb_datum_union(&datum, ovsdb_idl_read(row, column), -- &column->type, false); -+ &column->type); - ovsdb_idl_txn_verify(row, column); - ovsdb_idl_txn_write(row, column, &datum); - } else { -@@ -1514,7 +1517,7 @@ cmd_add(struct ctl_context *ctx) - ovsdb_datum_destroy(&old, &column->type); - return; - } -- ovsdb_datum_union(&old, &add, type, false); -+ ovsdb_datum_union(&old, &add, type); - ovsdb_datum_destroy(&add, type); - } - if (old.n > type->n_max) { -diff --git a/lib/dns-resolve.c b/lib/dns-resolve.c -index d344514343..8bcecb90ce 100644 ---- a/lib/dns-resolve.c -+++ b/lib/dns-resolve.c -@@ -265,7 +265,7 @@ resolve_callback__(void *req_, int err, struct ub_result *result) - if (err != 0 || (result->qtype == ns_t_aaaa && !result->havedata)) { - ub_resolve_free(result); - req->state = RESOLVE_ERROR; -- VLOG_ERR_RL(&rl, "%s: failed to resolve", req->name); -+ VLOG_WARN_RL(&rl, "%s: failed to resolve", req->name); - return; - } - -diff --git a/lib/dp-packet.c b/lib/dp-packet.c -index 72f6d09ac7..35c72542a2 100644 ---- a/lib/dp-packet.c -+++ b/lib/dp-packet.c -@@ -294,7 +294,7 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom) - void - dp_packet_prealloc_tailroom(struct dp_packet *b, size_t size) - { -- if (size > dp_packet_tailroom(b)) { -+ if ((size && !dp_packet_base(b)) || (size > dp_packet_tailroom(b))) { - dp_packet_resize(b, dp_packet_headroom(b), MAX(size, 64)); - } - } -diff --git a/lib/dp-packet.h b/lib/dp-packet.h -index 08d93c2779..3dc582fbfd 100644 ---- a/lib/dp-packet.h -+++ b/lib/dp-packet.h -@@ -199,6 +199,7 @@ struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t, - void dp_packet_resize(struct dp_packet *b, size_t new_headroom, - size_t new_tailroom); - static inline void dp_packet_delete(struct dp_packet *); -+static inline void dp_packet_swap(struct dp_packet *, struct dp_packet *); - - static inline void *dp_packet_at(const struct dp_packet *, size_t offset, - size_t size); -@@ -256,6 +257,18 @@ dp_packet_delete(struct dp_packet *b) - } - } - -+/* Swaps content of two packets. */ -+static inline void -+dp_packet_swap(struct dp_packet *a, struct dp_packet *b) -+{ -+ ovs_assert(a->source == DPBUF_MALLOC || a->source == DPBUF_STUB); -+ ovs_assert(b->source == DPBUF_MALLOC || b->source == DPBUF_STUB); -+ struct dp_packet c = *a; -+ -+ *a = *b; -+ *b = c; -+} -+ - /* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to - * byte 'offset'. Otherwise, returns a null pointer. */ - static inline void * -diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c -index b7d577870d..fe24f9abdf 100644 ---- a/lib/dpdk-stub.c -+++ b/lib/dpdk-stub.c -@@ -83,7 +83,7 @@ bool - dpdk_get_cpu_has_isa(const char *arch OVS_UNUSED, - const char *feature OVS_UNUSED) - { -- VLOG_ERR_ONCE("DPDK not supported in this version of Open vSwitch, " -+ VLOG_DBG_ONCE("DPDK not supported in this version of Open vSwitch, " - "cannot use CPU flag based optimizations"); - return false; - } -diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c -index ec64419e38..77f28810c7 100644 ---- a/lib/dpif-netdev-extract-avx512.c -+++ b/lib/dpif-netdev-extract-avx512.c -@@ -157,10 +157,19 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a) - 0, 0, 0, 0, /* Src IP */ \ - 0, 0, 0, 0, /* Dst IP */ - --#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xFE, 0xFF, 0xFF) -+#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xBF, 0xFF, 0xFF) - #define PATTERN_IPV4_UDP PATTERN_IPV4_GEN(0x45, 0, 0, 0x11) - #define PATTERN_IPV4_TCP PATTERN_IPV4_GEN(0x45, 0, 0, 0x06) - -+#define PATTERN_TCP_GEN(data_offset) \ -+ 0, 0, 0, 0, /* sport, dport */ \ -+ 0, 0, 0, 0, /* sequence number */ \ -+ 0, 0, 0, 0, /* ack number */ \ -+ data_offset, /* data offset: used to verify = 5, options not supported */ -+ -+#define PATTERN_TCP_MASK PATTERN_TCP_GEN(0xF0) -+#define PATTERN_TCP PATTERN_TCP_GEN(0x50) -+ - #define NU 0 - #define PATTERN_IPV4_UDP_SHUFFLE \ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* Ether */ \ -@@ -217,6 +226,25 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a) - #define PATTERN_DT1Q_IPV4_TCP_KMASK \ - (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_TCP << 40)) - -+/* Miniflow Strip post-processing masks. -+ * This allows unsetting specific bits from the resulting miniflow. It is used -+ * for e.g. IPv4 where the "DF" bit is never pushed to the miniflow itself. -+ * The NC define is for "No Change", allowing the bits to pass through. -+ */ -+#define NC 0xFF -+ -+#define PATTERN_STRIP_IPV4_MASK \ -+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ -+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, 0xBF, NC, NC, NC, \ -+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ -+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC -+ -+#define PATTERN_STRIP_DOT1Q_IPV4_MASK \ -+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ -+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ -+ NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ -+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC -+ - /* This union allows initializing static data as u8, but easily loading it - * into AVX512 registers too. The union ensures proper alignment for the zmm. - */ -@@ -241,8 +269,9 @@ struct mfex_profile { - union mfex_data probe_mask; - union mfex_data probe_data; - -- /* Required for reshaping packet into miniflow. */ -+ /* Required for reshaping packet into miniflow and post-processing it. */ - union mfex_data store_shuf; -+ union mfex_data strip_mask; - __mmask64 store_kmsk; - - /* Constant data to set in mf.bits and dp_packet data on hit. */ -@@ -310,6 +339,7 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = - .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_UDP}, - - .store_shuf.u8_data = { PATTERN_IPV4_UDP_SHUFFLE }, -+ .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK }, - .store_kmsk = PATTERN_IPV4_UDP_KMASK, - - .mf_bits = { 0x18a0000000000000, 0x0000000000040401}, -@@ -320,10 +350,19 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = - }, - - [PROFILE_ETH_IPV4_TCP] = { -- .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK }, -- .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_TCP}, -+ .probe_mask.u8_data = { -+ PATTERN_ETHERTYPE_MASK -+ PATTERN_IPV4_MASK -+ PATTERN_TCP_MASK -+ }, -+ .probe_data.u8_data = { -+ PATTERN_ETHERTYPE_IPV4 -+ PATTERN_IPV4_TCP -+ PATTERN_TCP -+ }, - - .store_shuf.u8_data = { PATTERN_IPV4_TCP_SHUFFLE }, -+ .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK }, - .store_kmsk = PATTERN_IPV4_TCP_KMASK, - - .mf_bits = { 0x18a0000000000000, 0x0000000000044401}, -@@ -342,6 +381,7 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = - }, - - .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_UDP_SHUFFLE }, -+ .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK }, - .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK, - - .mf_bits = { 0x38a0000000000000, 0x0000000000040401}, -@@ -353,20 +393,27 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = - - [PROFILE_ETH_VLAN_IPV4_TCP] = { - .probe_mask.u8_data = { -- PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK -+ PATTERN_ETHERTYPE_MASK -+ PATTERN_DT1Q_MASK -+ PATTERN_IPV4_MASK -+ PATTERN_TCP_MASK - }, - .probe_data.u8_data = { -- PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_TCP -+ PATTERN_ETHERTYPE_DT1Q -+ PATTERN_DT1Q_IPV4 -+ PATTERN_IPV4_TCP -+ PATTERN_TCP - }, - - .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_TCP_SHUFFLE }, -+ .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK }, - .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK, - - .mf_bits = { 0x38a0000000000000, 0x0000000000044401}, - .dp_pkt_offs = { - 14, UINT16_MAX, 18, 38, - }, -- .dp_pkt_min_size = 46, -+ .dp_pkt_min_size = 58, - }, - }; - -@@ -374,16 +421,31 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = - /* Protocol specific helper functions, for calculating offsets/lenghts. */ - static int32_t - mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh, -- uint32_t len_from_ipv4) -+ uint32_t len_from_ipv4, uint32_t next_proto_len) - { -- /* Handle dynamic l2_pad_size. */ -- uint16_t tot_len = ntohs(nh->ip_tot_len); -- if (OVS_UNLIKELY(tot_len > len_from_ipv4 || -- (len_from_ipv4 - tot_len) > UINT16_MAX)) { -- return -1; -- } -- dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - tot_len); -- return 0; -+ /* Handle dynamic l2_pad_size; note that avx512 has already validated -+ * the IP->ihl field to be 5, so 20 bytes of IP header (no options). -+ */ -+ uint16_t ip_tot_len = ntohs(nh->ip_tot_len); -+ -+ /* Error if IP total length is greater than remaining packet size. */ -+ bool err_ip_tot_len_too_high = ip_tot_len > len_from_ipv4; -+ -+ /* Error if IP total length is less than the size of the IP header -+ * itself, and the size of the next-protocol this profile matches on. -+ */ -+ bool err_ip_tot_len_too_low = -+ (IP_HEADER_LEN + next_proto_len) > ip_tot_len; -+ -+ /* Ensure the l2 pad size will not overflow. */ -+ bool err_len_u16_overflow = (len_from_ipv4 - ip_tot_len) > UINT16_MAX; -+ -+ if (OVS_UNLIKELY(err_ip_tot_len_too_high || err_ip_tot_len_too_low || -+ err_len_u16_overflow)) { -+ return -1; -+ } -+ dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len); -+ return 0; - } - - /* Fixup the VLAN CFI and PCP, reading the PCP from the input to this function, -@@ -433,6 +495,7 @@ mfex_avx512_process(struct dp_packet_batch *packets, - __m512i v_vals = _mm512_loadu_si512(&profile->probe_data); - __m512i v_mask = _mm512_loadu_si512(&profile->probe_mask); - __m512i v_shuf = _mm512_loadu_si512(&profile->store_shuf); -+ __m512i v_strp = _mm512_loadu_si512(&profile->strip_mask); - - __mmask64 k_shuf = profile->store_kmsk; - __m128i v_bits = _mm_loadu_si128((void *) &profile->mf_bits); -@@ -450,10 +513,17 @@ mfex_avx512_process(struct dp_packet_batch *packets, - - /* Load packet data and probe with AVX512 mask & compare. */ - const uint8_t *pkt = dp_packet_data(packet); -- __m512i v_pkt0 = _mm512_loadu_si512(pkt); -+ __m512i v_pkt0; -+ if (size >= 64) { -+ v_pkt0 = _mm512_loadu_si512(pkt); -+ } else { -+ uint64_t load_kmask = (1ULL << size) - 1; -+ v_pkt0 = _mm512_maskz_loadu_epi8(load_kmask, pkt); -+ } -+ - __m512i v_pkt0_masked = _mm512_and_si512(v_pkt0, v_mask); - __mmask64 k_cmp = _mm512_cmpeq_epi8_mask(v_pkt0_masked, v_vals); -- if (k_cmp != UINT64_MAX) { -+ if (OVS_UNLIKELY(k_cmp != UINT64_MAX)) { - continue; - } - -@@ -481,8 +551,9 @@ mfex_avx512_process(struct dp_packet_batch *packets, - v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0, - v_shuf, v512_zeros); - } -- _mm512_storeu_si512(&blocks[2], v_blk0); - -+ __m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp); -+ _mm512_storeu_si512(&blocks[2], v_blk0_strip); - - /* Perform "post-processing" per profile, handling details not easily - * handled in the above generic AVX512 code. Examples include TCP flag -@@ -498,7 +569,8 @@ mfex_avx512_process(struct dp_packet_batch *packets, - - uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN; - struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN]; -- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { -+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4, -+ TCP_HEADER_LEN)) { - continue; - } - -@@ -512,7 +584,8 @@ mfex_avx512_process(struct dp_packet_batch *packets, - - uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN; - struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN]; -- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { -+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4, -+ UDP_HEADER_LEN)) { - continue; - } - } break; -@@ -525,7 +598,8 @@ mfex_avx512_process(struct dp_packet_batch *packets, - /* Handle dynamic l2_pad_size. */ - uint32_t size_from_ipv4 = size - sizeof(struct eth_header); - struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)]; -- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { -+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4, -+ TCP_HEADER_LEN)) { - continue; - } - } break; -@@ -534,7 +608,8 @@ mfex_avx512_process(struct dp_packet_batch *packets, - /* Handle dynamic l2_pad_size. */ - uint32_t size_from_ipv4 = size - sizeof(struct eth_header); - struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)]; -- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { -+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4, -+ UDP_HEADER_LEN)) { - continue; - } - -diff --git a/lib/dpif-netdev-private-dfc.h b/lib/dpif-netdev-private-dfc.h -index 92092ebec9..3dfc91f0fe 100644 ---- a/lib/dpif-netdev-private-dfc.h -+++ b/lib/dpif-netdev-private-dfc.h -@@ -59,7 +59,8 @@ extern "C" { - * Thread-safety - * ============= - * -- * Each pmd_thread has its own private exact match cache. -+ * Each pmd_thread has its own private exact match cache and signature match -+ * cache. - * If dp_netdev_input is not called from a pmd thread, a mutex is used. - */ - -diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h -index 7c4a840cb1..0d5da73c7a 100644 ---- a/lib/dpif-netdev-private-dpcls.h -+++ b/lib/dpif-netdev-private-dpcls.h -@@ -83,8 +83,10 @@ struct dpcls_subtable { - /* The lookup function to use for this subtable. If there is a known - * property of the subtable (eg: only 3 bits of miniflow metadata is - * used for the lookup) then this can point at an optimized version of -- * the lookup function for this particular subtable. */ -- dpcls_subtable_lookup_func lookup_func; -+ * the lookup function for this particular subtable. The lookup function -+ * can be used at any time by a PMD thread, so it's declared as an atomic -+ * here to prevent garbage from being read. */ -+ ATOMIC(dpcls_subtable_lookup_func) lookup_func; - - /* Caches the masks to match a packet to, reducing runtime calculations. */ - uint64_t *mf_masks; -diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h -index 3030660675..32ad020d90 100644 ---- a/lib/dpif-netdev-private-flow.h -+++ b/lib/dpif-netdev-private-flow.h -@@ -101,6 +101,7 @@ struct dp_netdev_flow { - - bool dead; - uint32_t mark; /* Unique flow mark assigned to a flow */ -+ odp_port_t orig_in_port; - - /* Statistics. */ - struct dp_netdev_flow_stats stats; -diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h -index a782d9678a..ac4885538c 100644 ---- a/lib/dpif-netdev-private-thread.h -+++ b/lib/dpif-netdev-private-thread.h -@@ -78,10 +78,10 @@ struct dp_netdev_pmd_thread { - struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */ - struct cmap_node node; /* In 'dp->poll_threads'. */ - -- /* Per thread exact-match cache. Note, the instance for cpu core -- * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly -- * need to be protected by 'non_pmd_mutex'. Every other instance -- * will only be accessed by its own pmd thread. */ -+ /* Per thread exact match cache and signature match cache. Note, the -+ * instance for cpu core NON_PMD_CORE_ID can be accessed by multiple -+ * threads, and thusly need to be protected by 'non_pmd_mutex'. Every -+ * other instance will only be accessed by its own pmd thread. */ - OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct dfc_cache flow_cache; - - /* Flow-Table and classifiers -diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c -index bddce75b63..f96d0ecf54 100644 ---- a/lib/dpif-netdev.c -+++ b/lib/dpif-netdev.c -@@ -984,7 +984,9 @@ dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc OVS_UNUSED, - if (!cls) { - continue; - } -+ ovs_mutex_lock(&pmd->flow_mutex); - uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls); -+ ovs_mutex_unlock(&pmd->flow_mutex); - if (subtbl_changes) { - lookup_dpcls_changed++; - lookup_subtable_changed += subtbl_changes; -@@ -2221,13 +2223,24 @@ static void - do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) - OVS_REQUIRES(dp->port_mutex) - { -- netdev_flow_flush(port->netdev); -- netdev_uninit_flow_api(port->netdev); - hmap_remove(&dp->ports, &port->node); - seq_change(dp->port_seq); - - reconfigure_datapath(dp); - -+ /* Flush and disable offloads only after 'port' has been made -+ * inaccessible through datapath reconfiguration. -+ * This prevents having PMDs enqueuing offload requests after -+ * the flush. However, the flush doesn't provide any synchronization -+ * with the offload thread, so some requests could still be in the -+ * queue. -+ * When only this port is deleted instead of the whole datapath, -+ * revalidator threads are still active and can still enqueue -+ * offload modification or deletion. Managing those stray requests -+ * is done in the offload threads. */ -+ netdev_flow_flush(port->netdev); -+ netdev_uninit_flow_api(port->netdev); -+ - port_destroy(port); - } - -@@ -2711,6 +2724,10 @@ queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd, - ovsthread_once_done(&offload_thread_once); - } - -+ if (!netdev_is_flow_api_enabled()) { -+ return; -+ } -+ - offload = dp_netdev_alloc_flow_offload(pmd, flow, - DP_NETDEV_FLOW_OFFLOAD_OP_DEL); - dp_netdev_append_flow_offload(offload); -@@ -2720,7 +2737,7 @@ static void - queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, - struct dp_netdev_flow *flow, struct match *match, - const struct nlattr *actions, size_t actions_len, -- odp_port_t orig_in_port, int op) -+ int op) - { - struct dp_flow_offload_item *offload; - -@@ -2740,7 +2757,7 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, - offload->actions = xmalloc(actions_len); - memcpy(offload->actions, actions, actions_len); - offload->actions_len = actions_len; -- offload->orig_in_port = orig_in_port; -+ offload->orig_in_port = flow->orig_in_port; - - dp_netdev_append_flow_offload(offload); - } -@@ -2758,9 +2775,7 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, - ovs_assert(cls != NULL); - dpcls_remove(cls, &flow->cr); - cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid)); -- if (flow->mark != INVALID_FLOW_MARK) { -- queue_netdev_flow_del(pmd, flow); -- } -+ queue_netdev_flow_del(pmd, flow); - flow->dead = true; - - dp_netdev_flow_unref(flow); -@@ -3555,6 +3570,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, - flow->dead = false; - flow->batch = NULL; - flow->mark = INVALID_FLOW_MARK; -+ flow->orig_in_port = orig_in_port; - *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id; - *CONST_CAST(struct flow *, &flow->flow) = match->flow; - *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid; -@@ -3584,7 +3600,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, - dp_netdev_flow_hash(&flow->ufid)); - - queue_netdev_flow_put(pmd, flow, match, actions, actions_len, -- orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD); -+ DP_NETDEV_FLOW_OFFLOAD_OP_ADD); - - if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) { - struct ds ds = DS_EMPTY_INITIALIZER; -@@ -3671,7 +3687,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, - ovsrcu_set(&netdev_flow->actions, new_actions); - - queue_netdev_flow_put(pmd, netdev_flow, match, -- put->actions, put->actions_len, ODPP_NONE, -+ put->actions, put->actions_len, - DP_NETDEV_FLOW_OFFLOAD_OP_MOD); - - if (stats) { -@@ -4061,7 +4077,10 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) - flow_hash_5tuple(execute->flow, 0)); - } - -- dp_packet_batch_init_packet(&pp, execute->packet); -+ /* Making a copy because the packet might be stolen during the execution -+ * and caller might still need it. */ -+ struct dp_packet *packet_clone = dp_packet_clone(execute->packet); -+ dp_packet_batch_init_packet(&pp, packet_clone); - dp_netdev_execute_actions(pmd, &pp, false, execute->flow, - execute->actions, execute->actions_len); - dp_netdev_pmd_flush_output_packets(pmd, true); -@@ -4071,6 +4090,24 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) - dp_netdev_pmd_unref(pmd); - } - -+ if (dp_packet_batch_size(&pp) == 1) { -+ /* Packet wasn't dropped during the execution. Swapping content with -+ * the original packet, because the caller might expect actions to -+ * modify it. Uisng the packet from a batch instead of 'packet_clone' -+ * because it maybe stolen and replaced by other packet, e.g. by -+ * the fragmentation engine. */ -+ dp_packet_swap(execute->packet, pp.packets[0]); -+ dp_packet_delete_batch(&pp, true); -+ } else if (dp_packet_batch_size(&pp)) { -+ /* FIXME: We have more packets than expected. Likely, we got IP -+ * fragments of the reassembled packet. Dropping them here as we have -+ * no way to get them to the caller. It might be that all the required -+ * actions with them are already executed, but it also might not be a -+ * case, e.g. if dpif_netdev_execute() called to execute a single -+ * tunnel push. */ -+ dp_packet_delete_batch(&pp, true); -+ } -+ - return 0; - } - -@@ -5033,23 +5070,28 @@ sched_numa_list_put_in_place(struct sched_numa_list *numa_list) - } - } - -+/* Returns 'true' if OVS rxq scheduling algorithm assigned any unpinned rxq to -+ * a PMD thread core on a non-local numa node. */ - static bool - sched_numa_list_cross_numa_polling(struct sched_numa_list *numa_list) - { - struct sched_numa *numa; - -- /* For each numa */ - HMAP_FOR_EACH (numa, node, &numa_list->numas) { -- /* For each pmd */ - for (int i = 0; i < numa->n_pmds; i++) { - struct sched_pmd *sched_pmd; - - sched_pmd = &numa->pmds[i]; -- /* For each rxq. */ -+ if (sched_pmd->isolated) { -+ /* All rxqs on this PMD thread core are pinned. */ -+ continue; -+ } - for (unsigned k = 0; k < sched_pmd->n_rxq; k++) { - struct dp_netdev_rxq *rxq = sched_pmd->rxqs[k]; -- -- if (!sched_pmd->isolated && -+ /* Check if the rxq is not pinned to a specific PMD thread core -+ * by the user AND the PMD thread core that OVS assigned is -+ * non-local to the rxq port. */ -+ if (rxq->core_id == OVS_CORE_UNSPEC && - rxq->pmd->numa_id != - netdev_get_numa_id(rxq->port->netdev)) { - return true; -@@ -5349,10 +5391,10 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list, - /* Find any numa with available PMDs. */ - for (int j = 0; j < n_numa; j++) { - numa = sched_numa_list_next(numa_list, last_cross_numa); -+ last_cross_numa = numa; - if (sched_numa_noniso_pmd_count(numa)) { - break; - } -- last_cross_numa = numa; - numa = NULL; - } - } -@@ -6616,15 +6658,15 @@ static struct dp_netdev_pmd_thread * - dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id) - { - struct dp_netdev_pmd_thread *pmd; -- const struct cmap_node *pnode; - -- pnode = cmap_find(&dp->poll_threads, hash_int(core_id, 0)); -- if (!pnode) { -- return NULL; -+ CMAP_FOR_EACH_WITH_HASH (pmd, node, hash_int(core_id, 0), -+ &dp->poll_threads) { -+ if (pmd->core_id == core_id) { -+ return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL; -+ } - } -- pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node); - -- return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL; -+ return NULL; - } - - /* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */ -@@ -8942,9 +8984,12 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask) - - /* Get the preferred subtable search function for this (u0,u1) subtable. - * The function is guaranteed to always return a valid implementation, and -- * possibly an ISA optimized, and/or specialized implementation. -+ * possibly an ISA optimized, and/or specialized implementation. Initialize -+ * the subtable search function atomically to avoid garbage data being read -+ * by the PMD thread. - */ -- subtable->lookup_func = dpcls_subtable_get_best_impl(unit0, unit1); -+ atomic_init(&subtable->lookup_func, -+ dpcls_subtable_get_best_impl(unit0, unit1)); - - cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash); - /* Add the new subtable at the end of the pvector (with no hits yet) */ -@@ -8973,6 +9018,10 @@ dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask) - /* Checks for the best available implementation for each subtable lookup - * function, and assigns it as the lookup function pointer for each subtable. - * Returns the number of subtables that have changed lookup implementation. -+ * This function requires holding a flow_mutex when called. This is to make -+ * sure modifications done by this function are not overwritten. This could -+ * happen if dpcls_sort_subtable_vector() is called at the same time as this -+ * function. - */ - static uint32_t - dpcls_subtable_lookup_reprobe(struct dpcls *cls) -@@ -8985,10 +9034,13 @@ dpcls_subtable_lookup_reprobe(struct dpcls *cls) - uint32_t u0_bits = subtable->mf_bits_set_unit0; - uint32_t u1_bits = subtable->mf_bits_set_unit1; - void *old_func = subtable->lookup_func; -- subtable->lookup_func = dpcls_subtable_get_best_impl(u0_bits, u1_bits); -+ -+ /* Set the subtable lookup function atomically to avoid garbage data -+ * being read by the PMD thread. */ -+ atomic_store_relaxed(&subtable->lookup_func, -+ dpcls_subtable_get_best_impl(u0_bits, u1_bits)); - subtables_changed += (old_func != subtable->lookup_func); - } -- pvector_publish(pvec); - - return subtables_changed; - } -diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c -index 34fc042373..5f4b60c5a6 100644 ---- a/lib/dpif-netlink.c -+++ b/lib/dpif-netlink.c -@@ -84,6 +84,8 @@ enum { MAX_PORTS = USHRT_MAX }; - #define EPOLLEXCLUSIVE (1u << 28) - #endif - -+#define OVS_DP_F_UNSUPPORTED (1 << 31); -+ - /* This PID is not used by the kernel datapath when using dispatch per CPU, - * but it is required to be set (not zero). */ - #define DPIF_NETLINK_PER_CPU_PID UINT32_MAX -@@ -382,36 +384,62 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name, - dp_request.cmd = OVS_DP_CMD_SET; - } - -- /* The Open vSwitch kernel module has two modes for dispatching upcalls: -- * per-vport and per-cpu. -- * -- * When dispatching upcalls per-vport, the kernel will -- * send the upcall via a Netlink socket that has been selected based on the -- * vport that received the packet that is causing the upcall. -- * -- * When dispatching upcall per-cpu, the kernel will send the upcall via -- * a Netlink socket that has been selected based on the cpu that received -- * the packet that is causing the upcall. -- * -- * First we test to see if the kernel module supports per-cpu dispatching -- * (the preferred method). If it does not support per-cpu dispatching, we -- * fall back to the per-vport dispatch mode. -+ /* Some older kernels will not reject unknown features. This will cause -+ * 'ovs-vswitchd' to incorrectly assume a feature is supported. In order to -+ * test for that, we attempt to set a feature that we know is not supported -+ * by any kernel. If this feature is not rejected, we can assume we are -+ * running on one of these older kernels. - */ - dp_request.user_features |= OVS_DP_F_UNALIGNED; -- dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS; -- dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU; -+ dp_request.user_features |= OVS_DP_F_VPORT_PIDS; -+ dp_request.user_features |= OVS_DP_F_UNSUPPORTED; - error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); - if (error) { -- dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU; -+ /* The Open vSwitch kernel module has two modes for dispatching -+ * upcalls: per-vport and per-cpu. -+ * -+ * When dispatching upcalls per-vport, the kernel will -+ * send the upcall via a Netlink socket that has been selected based on -+ * the vport that received the packet that is causing the upcall. -+ * -+ * When dispatching upcall per-cpu, the kernel will send the upcall via -+ * a Netlink socket that has been selected based on the cpu that -+ * received the packet that is causing the upcall. -+ * -+ * First we test to see if the kernel module supports per-cpu -+ * dispatching (the preferred method). If it does not support per-cpu -+ * dispatching, we fall back to the per-vport dispatch mode. -+ */ -+ dp_request.user_features &= ~OVS_DP_F_UNSUPPORTED; -+ dp_request.user_features |= OVS_DP_F_UNALIGNED; -+ dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS; -+ dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU; -+ error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); -+ if (error) { -+ dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU; -+ dp_request.user_features |= OVS_DP_F_VPORT_PIDS; -+ error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); -+ } -+ if (error) { -+ return error; -+ } -+ -+ error = open_dpif(&dp, dpifp); -+ dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING); -+ } else { -+ VLOG_INFO("Kernel does not correctly support feature negotiation. " -+ "Using standard features."); -+ dp_request.cmd = OVS_DP_CMD_SET; -+ dp_request.user_features = 0; -+ dp_request.user_features |= OVS_DP_F_UNALIGNED; - dp_request.user_features |= OVS_DP_F_VPORT_PIDS; - error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); -- } -- if (error) { -- return error; -+ if (error) { -+ return error; -+ } -+ error = open_dpif(&dp, dpifp); - } - -- error = open_dpif(&dp, dpifp); -- dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING); - ofpbuf_delete(buf); - - if (create) { -diff --git a/lib/flow.c b/lib/flow.c -index 89837de95d..a021bc0eba 100644 ---- a/lib/flow.c -+++ b/lib/flow.c -@@ -1006,14 +1006,18 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) - if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { - if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { - const struct tcp_header *tcp = data; -- -- miniflow_push_be32(mf, arp_tha.ea[2], 0); -- miniflow_push_be32(mf, tcp_flags, -- TCP_FLAGS_BE32(tcp->tcp_ctl)); -- miniflow_push_be16(mf, tp_src, tcp->tcp_src); -- miniflow_push_be16(mf, tp_dst, tcp->tcp_dst); -- miniflow_push_be16(mf, ct_tp_src, ct_tp_src); -- miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); -+ size_t tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4; -+ -+ if (OVS_LIKELY(tcp_hdr_len >= TCP_HEADER_LEN) -+ && OVS_LIKELY(size >= tcp_hdr_len)) { -+ miniflow_push_be32(mf, arp_tha.ea[2], 0); -+ miniflow_push_be32(mf, tcp_flags, -+ TCP_FLAGS_BE32(tcp->tcp_ctl)); -+ miniflow_push_be16(mf, tp_src, tcp->tcp_src); -+ miniflow_push_be16(mf, tp_dst, tcp->tcp_dst); -+ miniflow_push_be16(mf, ct_tp_src, ct_tp_src); -+ miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); -+ } - } - } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) { - if (OVS_LIKELY(size >= UDP_HEADER_LEN)) { -diff --git a/lib/hindex.h b/lib/hindex.h -index 876c5a9e39..f7a30d511a 100644 ---- a/lib/hindex.h -+++ b/lib/hindex.h -@@ -128,18 +128,22 @@ void hindex_remove(struct hindex *, struct hindex_node *); - * Evaluates HASH only once. - */ - #define HINDEX_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HINDEX) \ -- for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ -- NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ -- ASSIGN_CONTAINER(NODE, (NODE)->MEMBER.s, MEMBER)) -+ for (INIT_MULTIVAR(NODE, MEMBER, hindex_node_with_hash(HINDEX, HASH), \ -+ struct hindex_node); \ -+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ -+ UPDATE_MULTIVAR(NODE, ITER_VAR(NODE)->s)) - - /* Safe when NODE may be freed (not needed when NODE may be removed from the - * hash map but its members remain accessible and intact). */ --#define HINDEX_FOR_EACH_WITH_HASH_SAFE(NODE, NEXT, MEMBER, HASH, HINDEX) \ -- for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ -- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ -- ? INIT_CONTAINER(NEXT, (NODE)->MEMBER.s, MEMBER), 1 \ -- : 0); \ -- (NODE) = (NEXT)) -+#define HINDEX_FOR_EACH_WITH_HASH_SAFE(NODE, NEXT, MEMBER, HASH, HINDEX) \ -+ for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ -+ hindex_node_with_hash(HINDEX, HASH), \ -+ struct hindex_node); \ -+ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ -+ ITER_VAR(NODE) != NULL, \ -+ ITER_VAR(NEXT) = ITER_VAR(NODE)->s, \ -+ ITER_VAR(NEXT) != NULL); \ -+ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) - - /* Returns the head node in 'hindex' with the given 'hash', or a null pointer - * if no nodes have that hash value. */ -@@ -157,19 +161,22 @@ hindex_node_with_hash(const struct hindex *hindex, size_t hash) - /* Iteration. */ - - /* Iterates through every node in HINDEX. */ --#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ -- for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ -- NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ -- ASSIGN_CONTAINER(NODE, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER)) -+#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ -+ for (INIT_MULTIVAR(NODE, MEMBER, hindex_first(HINDEX), \ -+ struct hindex_node); \ -+ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ -+ UPDATE_MULTIVAR(NODE, hindex_next(HINDEX, ITER_VAR(NODE)))) - - /* Safe when NODE may be freed (not needed when NODE may be removed from the - * hash index but its members remain accessible and intact). */ --#define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \ -- for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ -- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ -- ? INIT_CONTAINER(NEXT, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER), 1 \ -- : 0); \ -- (NODE) = (NEXT)) -+#define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \ -+ for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, hindex_first(HINDEX), \ -+ struct hindex_node); \ -+ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ -+ ITER_VAR(NODE) != NULL, \ -+ ITER_VAR(NEXT) = hindex_next(HINDEX, ITER_VAR(NODE)), \ -+ ITER_VAR(NEXT) != NULL); \ -+ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) - - struct hindex_node *hindex_first(const struct hindex *); - struct hindex_node *hindex_next(const struct hindex *, -diff --git a/lib/ipf.c b/lib/ipf.c -index d9f781147a..507db2aea2 100644 ---- a/lib/ipf.c -+++ b/lib/ipf.c -@@ -943,6 +943,8 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, - ovs_mutex_lock(&ipf->ipf_lock); - if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) { - dp_packet_batch_refill(pb, pkt, pb_idx); -+ } else { -+ dp_packet_delete(pkt); - } - ovs_mutex_unlock(&ipf->ipf_lock); - } else { -@@ -1152,52 +1154,56 @@ ipf_post_execute_reass_pkts(struct ipf *ipf, - * NETDEV_MAX_BURST. */ - DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { - if (rp && pkt == rp->list->reass_execute_ctx) { -+ const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; -+ void *l4_frag = dp_packet_l4(frag_0->pkt); -+ void *l4_reass = dp_packet_l4(pkt); -+ memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); -+ - for (int i = 0; i <= rp->list->last_inuse_idx; i++) { -- rp->list->frag_list[i].pkt->md.ct_label = pkt->md.ct_label; -- rp->list->frag_list[i].pkt->md.ct_mark = pkt->md.ct_mark; -- rp->list->frag_list[i].pkt->md.ct_state = pkt->md.ct_state; -- rp->list->frag_list[i].pkt->md.ct_zone = pkt->md.ct_zone; -- rp->list->frag_list[i].pkt->md.ct_orig_tuple_ipv6 = -+ const struct ipf_frag *frag_i = &rp->list->frag_list[i]; -+ -+ frag_i->pkt->md.ct_label = pkt->md.ct_label; -+ frag_i->pkt->md.ct_mark = pkt->md.ct_mark; -+ frag_i->pkt->md.ct_state = pkt->md.ct_state; -+ frag_i->pkt->md.ct_zone = pkt->md.ct_zone; -+ frag_i->pkt->md.ct_orig_tuple_ipv6 = - pkt->md.ct_orig_tuple_ipv6; - if (pkt->md.ct_orig_tuple_ipv6) { -- rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv6 = -+ frag_i->pkt->md.ct_orig_tuple.ipv6 = - pkt->md.ct_orig_tuple.ipv6; - } else { -- rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv4 = -+ frag_i->pkt->md.ct_orig_tuple.ipv4 = - pkt->md.ct_orig_tuple.ipv4; - } -- } -- -- const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; -- void *l4_frag = dp_packet_l4(frag_0->pkt); -- void *l4_reass = dp_packet_l4(pkt); -- memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); -- -- if (v6) { -- struct ovs_16aligned_ip6_hdr *l3_frag -- = dp_packet_l3(frag_0->pkt); -- struct ovs_16aligned_ip6_hdr *l3_reass = dp_packet_l3(pkt); -- l3_frag->ip6_src = l3_reass->ip6_src; -- l3_frag->ip6_dst = l3_reass->ip6_dst; -- } else { -- struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt); -- struct ip_header *l3_reass = dp_packet_l3(pkt); -- if (!dp_packet_hwol_is_ipv4(frag_0->pkt)) { -- ovs_be32 reass_ip = -- get_16aligned_be32(&l3_reass->ip_src); -- ovs_be32 frag_ip = -- get_16aligned_be32(&l3_frag->ip_src); -- -- l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, -- frag_ip, reass_ip); -- reass_ip = get_16aligned_be32(&l3_reass->ip_dst); -- frag_ip = get_16aligned_be32(&l3_frag->ip_dst); -- l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, -- frag_ip, reass_ip); -+ if (v6) { -+ struct ovs_16aligned_ip6_hdr *l3_frag -+ = dp_packet_l3(frag_i->pkt); -+ struct ovs_16aligned_ip6_hdr *l3_reass -+ = dp_packet_l3(pkt); -+ l3_frag->ip6_src = l3_reass->ip6_src; -+ l3_frag->ip6_dst = l3_reass->ip6_dst; -+ } else { -+ struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt); -+ struct ip_header *l3_reass = dp_packet_l3(pkt); -+ if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) { -+ ovs_be32 reass_ip = -+ get_16aligned_be32(&l3_reass->ip_src); -+ ovs_be32 frag_ip = -+ get_16aligned_be32(&l3_frag->ip_src); -+ -+ l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, -+ frag_ip, -+ reass_ip); -+ reass_ip = get_16aligned_be32(&l3_reass->ip_dst); -+ frag_ip = get_16aligned_be32(&l3_frag->ip_dst); -+ l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, -+ frag_ip, -+ reass_ip); -+ } -+ -+ l3_frag->ip_src = l3_reass->ip_src; -+ l3_frag->ip_dst = l3_reass->ip_dst; - } -- -- l3_frag->ip_src = l3_reass->ip_src; -- l3_frag->ip_dst = l3_reass->ip_dst; - } - - ipf_completed_list_add(&ipf->frag_complete_list, rp->list); -diff --git a/lib/json.c b/lib/json.c -index 32d25003b8..0baf7c622c 100644 ---- a/lib/json.c -+++ b/lib/json.c -@@ -146,6 +146,7 @@ json_type_to_string(enum json_type type) - case JSON_STRING: - return "string"; - -+ case JSON_SERIALIZED_OBJECT: - case JSON_N_TYPES: - default: - return ""; -@@ -180,6 +181,14 @@ json_string_create(const char *s) - return json_string_create_nocopy(xstrdup(s)); - } - -+struct json * -+json_serialized_object_create(const struct json *src) -+{ -+ struct json *json = json_create(JSON_SERIALIZED_OBJECT); -+ json->string = json_to_string(src, JSSF_SORT); -+ return json; -+} -+ - struct json * - json_array_create_empty(void) - { -@@ -309,6 +318,13 @@ json_string(const struct json *json) - return json->string; - } - -+const char * -+json_serialized_object(const struct json *json) -+{ -+ ovs_assert(json->type == JSON_SERIALIZED_OBJECT); -+ return json->string; -+} -+ - struct json_array * - json_array(const struct json *json) - { -@@ -362,6 +378,7 @@ json_destroy(struct json *json) - break; - - case JSON_STRING: -+ case JSON_SERIALIZED_OBJECT: - free(json->string); - break; - -@@ -422,6 +439,9 @@ json_deep_clone(const struct json *json) - case JSON_STRING: - return json_string_create(json->string); - -+ case JSON_SERIALIZED_OBJECT: -+ return json_serialized_object_create(json); -+ - case JSON_NULL: - case JSON_FALSE: - case JSON_TRUE: -@@ -521,6 +541,7 @@ json_hash(const struct json *json, size_t basis) - return json_hash_array(&json->array, basis); - - case JSON_STRING: -+ case JSON_SERIALIZED_OBJECT: - return hash_string(json->string, basis); - - case JSON_NULL: -@@ -596,6 +617,7 @@ json_equal(const struct json *a, const struct json *b) - return json_equal_array(&a->array, &b->array); - - case JSON_STRING: -+ case JSON_SERIALIZED_OBJECT: - return !strcmp(a->string, b->string); - - case JSON_NULL: -@@ -1072,6 +1094,14 @@ json_from_string(const char *string) - return json_parser_finish(p); - } - -+/* Parses data of JSON_SERIALIZED_OBJECT to the real JSON. */ -+struct json * -+json_from_serialized_object(const struct json *json) -+{ -+ ovs_assert(json->type == JSON_SERIALIZED_OBJECT); -+ return json_from_string(json->string); -+} -+ - /* Reads the file named 'file_name', parses its contents as a JSON object or - * array, and returns a newly allocated 'struct json'. The caller must free - * the returned structure with json_destroy() when it is no longer needed. -@@ -1563,6 +1593,10 @@ json_serialize(const struct json *json, struct json_serializer *s) - json_serialize_string(json->string, ds); - break; - -+ case JSON_SERIALIZED_OBJECT: -+ ds_put_cstr(ds, json->string); -+ break; -+ - case JSON_N_TYPES: - default: - OVS_NOT_REACHED(); -@@ -1696,14 +1730,30 @@ json_serialize_string(const char *string, struct ds *ds) - { - uint8_t c; - uint8_t c2; -+ size_t count; - const char *escape; -+ const char *start; - - ds_put_char(ds, '"'); -+ count = 0; -+ start = string; - while ((c = *string++) != '\0') { -- escape = chars_escaping[c]; -- while ((c2 = *escape++) != '\0') { -- ds_put_char(ds, c2); -+ if (c >= ' ' && c != '"' && c != '\\') { -+ count++; -+ } else { -+ if (count) { -+ ds_put_buffer(ds, start, count); -+ count = 0; -+ } -+ start = string; -+ escape = chars_escaping[c]; -+ while ((c2 = *escape++) != '\0') { -+ ds_put_char(ds, c2); -+ } - } - } -+ if (count) { -+ ds_put_buffer(ds, start, count); -+ } - ds_put_char(ds, '"'); - } -diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c -index 18afbab9a7..dfeb2a8002 100644 ---- a/lib/lldp/lldp.c -+++ b/lib/lldp/lldp.c -@@ -146,7 +146,9 @@ static void - lldp_tlv_end(struct dp_packet *p, unsigned int start) - { - ovs_be16 *tlv = dp_packet_at_assert(p, start, 2); -- *tlv |= htons((dp_packet_size(p) - (start + 2)) & 0x1ff); -+ put_unaligned_be16(tlv, -+ get_unaligned_be16(tlv) -+ | htons((dp_packet_size(p) - (start + 2)) & 0x1ff)); - } - - int -diff --git a/lib/meta-flow.c b/lib/meta-flow.c -index c808d205d5..e03cd8d0c5 100644 ---- a/lib/meta-flow.c -+++ b/lib/meta-flow.c -@@ -1788,6 +1788,19 @@ mf_is_tun_metadata(const struct mf_field *mf) - mf->id < MFF_TUN_METADATA0 + TUN_METADATA_NUM_OPTS; - } - -+bool -+mf_is_frozen_metadata(const struct mf_field *mf) -+{ -+ if (mf->id >= MFF_TUN_ID && mf->id <= MFF_IN_PORT_OXM) { -+ return true; -+ } -+ -+ if (mf->id >= MFF_REG0 && mf->id < MFF_ETH_SRC) { -+ return true; -+ } -+ return false; -+} -+ - bool - mf_is_pipeline_field(const struct mf_field *mf) - { -diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c -index 45a96b9be2..738fb44b3c 100644 ---- a/lib/netdev-dpdk.c -+++ b/lib/netdev-dpdk.c -@@ -961,14 +961,6 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) - - rte_eth_dev_info_get(dev->port_id, &info); - -- /* As of DPDK 19.11, it is not allowed to set a mq_mode for -- * virtio PMD driver. */ -- if (!strcmp(info.driver_name, "net_virtio")) { -- conf.rxmode.mq_mode = ETH_MQ_RX_NONE; -- } else { -- conf.rxmode.mq_mode = ETH_MQ_RX_RSS; -- } -- - /* As of DPDK 17.11.1 a few PMDs require to explicitly enable - * scatter to support jumbo RX. - * Setting scatter for the device is done after checking for -@@ -1000,6 +992,11 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) - /* Limit configured rss hash functions to only those supported - * by the eth device. */ - conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads; -+ if (conf.rx_adv_conf.rss_conf.rss_hf == 0) { -+ conf.rxmode.mq_mode = ETH_MQ_RX_NONE; -+ } else { -+ conf.rxmode.mq_mode = ETH_MQ_RX_RSS; -+ } - - /* A device may report more queues than it makes available (this has - * been observed for Intel xl710, which reserves some of them for -@@ -2867,6 +2864,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, - bool concurrent_txq) - { - if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) { -+ rte_spinlock_lock(&dev->stats_lock); -+ dev->stats.tx_dropped += dp_packet_batch_size(batch); -+ rte_spinlock_unlock(&dev->stats_lock); - dp_packet_delete_batch(batch, true); - return; - } -diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c -index 60dd138914..94c9737110 100644 ---- a/lib/netdev-linux.c -+++ b/lib/netdev-linux.c -@@ -627,6 +627,7 @@ netdev_linux_notify_sock(void) - if (!error) { - size_t i; - -+ nl_sock_listen_all_nsid(sock, true); - for (i = 0; i < ARRAY_SIZE(mcgroups); i++) { - error = nl_sock_join_mcgroup(sock, mcgroups[i]); - if (error) { -@@ -636,7 +637,6 @@ netdev_linux_notify_sock(void) - } - } - } -- nl_sock_listen_all_nsid(sock, true); - ovsthread_once_done(&once); - } - -@@ -6285,7 +6285,14 @@ get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats) - if (ofpbuf_try_pull(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg))) { - const struct nlattr *a = nl_attr_find(reply, 0, IFLA_STATS64); - if (a && nl_attr_get_size(a) >= sizeof(struct rtnl_link_stats64)) { -- netdev_stats_from_rtnl_link_stats64(stats, nl_attr_get(a)); -+ const struct rtnl_link_stats64 *lstats = nl_attr_get(a); -+ struct rtnl_link_stats64 aligned_lstats; -+ -+ if (!IS_PTR_ALIGNED(lstats)) { -+ memcpy(&aligned_lstats, lstats, sizeof aligned_lstats); -+ lstats = &aligned_lstats; -+ } -+ netdev_stats_from_rtnl_link_stats64(stats, lstats); - error = 0; - } else { - a = nl_attr_find(reply, 0, IFLA_STATS); -diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c -index 9845e8d3fe..e1568e38a0 100644 ---- a/lib/netdev-offload-tc.c -+++ b/lib/netdev-offload-tc.c -@@ -481,10 +481,10 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump) - - static void - parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf, -- struct tc_flower *flower) -+ struct tc_action *action) - { -- char *mask = (char *) &flower->rewrite.mask; -- char *data = (char *) &flower->rewrite.key; -+ char *mask = (char *) &action->rewrite.mask; -+ char *data = (char *) &action->rewrite.key; - - for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) { - char *put = NULL; -@@ -585,8 +585,10 @@ parse_tc_flower_to_stats(struct tc_flower *flower, - } - - memset(stats, 0, sizeof *stats); -- stats->n_packets = get_32aligned_u64(&flower->stats.n_packets); -- stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes); -+ stats->n_packets = get_32aligned_u64(&flower->stats_sw.n_packets); -+ stats->n_packets += get_32aligned_u64(&flower->stats_hw.n_packets); -+ stats->n_bytes = get_32aligned_u64(&flower->stats_sw.n_bytes); -+ stats->n_bytes += get_32aligned_u64(&flower->stats_hw.n_bytes); - stats->used = flower->lastused; - } - -@@ -877,7 +879,7 @@ parse_tc_flower_to_match(struct tc_flower *flower, - } - break; - case TC_ACT_PEDIT: { -- parse_flower_rewrite_to_netlink_action(buf, flower); -+ parse_flower_rewrite_to_netlink_action(buf, action); - } - break; - case TC_ACT_ENCAP: { -@@ -1222,8 +1224,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, - uint64_t set_stub[1024 / 8]; - struct ofpbuf set_buf = OFPBUF_STUB_INITIALIZER(set_stub); - char *set_data, *set_mask; -- char *key = (char *) &flower->rewrite.key; -- char *mask = (char *) &flower->rewrite.mask; -+ char *key = (char *) &action->rewrite.key; -+ char *mask = (char *) &action->rewrite.mask; - const struct nlattr *attr; - int i, j, type; - size_t size; -@@ -1265,14 +1267,6 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, - } - } - -- if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) { -- if (flower->rewrite.rewrite == false) { -- flower->rewrite.rewrite = true; -- action->type = TC_ACT_PEDIT; -- flower->action_count++; -- } -- } -- - if (hasmask && !is_all_zeros(set_mask, size)) { - VLOG_DBG_RL(&rl, "unsupported sub attribute of set action type %d", - type); -@@ -1281,6 +1275,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, - } - - ofpbuf_uninit(&set_buf); -+ action->type = TC_ACT_PEDIT; -+ flower->action_count++; - return 0; - } - -@@ -1541,6 +1537,12 @@ parse_match_ct_state_to_flower(struct tc_flower *flower, struct match *match) - flower->key.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); - flower->mask.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); - } -+ -+ if (flower->key.ct_state && -+ !(flower->key.ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { -+ flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; -+ flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; -+ } - } - - if (mask->ct_zone) { -@@ -1841,7 +1843,25 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, - VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port); - return ENODEV; - } -+ -+ if (!netdev_flow_api_equals(netdev, outdev)) { -+ VLOG_DBG_RL(&rl, -+ "Flow API provider mismatch between ingress (%s) " -+ "and egress (%s) ports", -+ netdev_get_name(netdev), netdev_get_name(outdev)); -+ netdev_close(outdev); -+ return EOPNOTSUPP; -+ } -+ - action->out.ifindex_out = netdev_get_ifindex(outdev); -+ if (action->out.ifindex_out < 0) { -+ VLOG_DBG_RL(&rl, -+ "Can't find ifindex for output port %s, error %d", -+ netdev_get_name(outdev), action->out.ifindex_out); -+ netdev_close(outdev); -+ return -action->out.ifindex_out; -+ } -+ - action->out.ingress = is_internal_port(netdev_get_type(outdev)); - action->type = TC_ACT_OUTPUT; - flower.action_count++; -@@ -2015,9 +2035,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, - if (stats) { - memset(stats, 0, sizeof *stats); - if (!tc_get_flower(&id, &flower)) { -- stats->n_packets = get_32aligned_u64(&flower.stats.n_packets); -- stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes); -- stats->used = flower.lastused; -+ parse_tc_flower_to_stats(&flower, stats); - } - } - -diff --git a/lib/odp-util.c b/lib/odp-util.c -index 7729a90608..36e7161714 100644 ---- a/lib/odp-util.c -+++ b/lib/odp-util.c -@@ -2941,7 +2941,7 @@ odp_nsh_key_from_attr__(const struct nlattr *attr, bool is_mask, - const struct ovs_nsh_key_md1 *md1 = nl_attr_get(a); - has_md1 = true; - memcpy(nsh->context, md1->context, sizeof md1->context); -- if (len == 2 * sizeof(*md1)) { -+ if (nsh_mask && (len == 2 * sizeof *md1)) { - const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; - memcpy(nsh_mask->context, md1_mask->context, - sizeof(*md1_mask)); -@@ -3212,7 +3212,7 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, - - opts.flags = tun_key->gtpu_flags; - opts.msgtype = tun_key->gtpu_msgtype; -- nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, -+ nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, - &opts, sizeof(opts)); - } - nl_msg_end_nested(a, tun_key_ofs); -@@ -4601,6 +4601,11 @@ odp_flow_format(const struct nlattr *key, size_t key_len, - ds_put_char(ds, ','); - } - ds_put_cstr(ds, "eth()"); -+ } else if (attr_type == OVS_KEY_ATTR_PACKET_TYPE && is_wildcard) { -+ /* See the above help text, however in the case where the -+ * packet type is not shown, we still need to display the -+ * eth() header if the packets type is wildcarded. */ -+ has_packet_type_key = false; - } - ofpbuf_clear(&ofp); - } -@@ -4618,7 +4623,7 @@ odp_flow_format(const struct nlattr *key, size_t key_len, - } - ds_put_char(ds, ')'); - } -- if (!has_ethtype_key) { -+ if (!has_ethtype_key && mask) { - const struct nlattr *ma = nl_attr_find__(mask, mask_len, - OVS_KEY_ATTR_ETHERTYPE); - if (ma) { -@@ -7132,11 +7137,6 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], - } - } - } -- } else if (src_flow->nw_proto == IPPROTO_IGMP -- && src_flow->dl_type == htons(ETH_TYPE_IP)) { -- /* OVS userspace parses the IGMP type, code, and group, but its -- * datapaths do not, so there is always missing information. */ -- return ODP_FIT_TOO_LITTLE; - } - if (is_mask && expected_bit != OVS_KEY_ATTR_UNSPEC) { - if ((flow->tp_src || flow->tp_dst) && flow->nw_proto != 0xff) { -diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c -index ecf914eac1..7ea4b6ed56 100644 ---- a/lib/ofp-actions.c -+++ b/lib/ofp-actions.c -@@ -853,7 +853,9 @@ decode_NXAST_RAW_CONTROLLER2(const struct ext_action_header *eah, - case NXAC2PT_REASON: { - uint8_t u8; - error = ofpprop_parse_u8(&payload, &u8); -- oc->reason = u8; -+ if (!error) { -+ oc->reason = u8; -+ } - break; - } - -diff --git a/lib/ofp-flow.c b/lib/ofp-flow.c -index ff0396845a..3bc744f78f 100644 ---- a/lib/ofp-flow.c -+++ b/lib/ofp-flow.c -@@ -1254,7 +1254,16 @@ ofputil_append_flow_stats_reply(const struct ofputil_flow_stats *fs, - OVS_NOT_REACHED(); - } - -- ofpmp_postappend(replies, start_ofs); -+ if ((reply->size - start_ofs) > (UINT16_MAX - ofpbuf_headersize(reply))) { -+ /* When this happens, the reply will not fit in a single OFP message, -+ * and we should not append it to the queue. We will log a warning -+ * and continue with the next flow stat entry. */ -+ reply->size = start_ofs; -+ VLOG_WARN_RL(&rl, "Flow exceeded the maximum flow statistics reply " -+ "size and was excluded from the response set"); -+ } else { -+ ofpmp_postappend(replies, start_ofs); -+ } - fs_->match.flow.tunnel.metadata.tab = orig_tun_table; - } - -diff --git a/lib/ofp-packet.c b/lib/ofp-packet.c -index 4579548ee1..9485ddfc93 100644 ---- a/lib/ofp-packet.c -+++ b/lib/ofp-packet.c -@@ -133,7 +133,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose, - case NXPINT_FULL_LEN: { - uint32_t u32; - error = ofpprop_parse_u32(&payload, &u32); -- *total_len = u32; -+ if (!error) { -+ *total_len = u32; -+ } - break; - } - -@@ -152,7 +154,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose, - case NXPINT_REASON: { - uint8_t reason; - error = ofpprop_parse_u8(&payload, &reason); -- pin->reason = reason; -+ if (!error) { -+ pin->reason = reason; -+ } - break; - } - -@@ -883,7 +887,9 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose, - case NXCPT_ODP_PORT: { - uint32_t value; - error = ofpprop_parse_u32(&payload, &value); -- pin->odp_port = u32_to_odp(value); -+ if (!error) { -+ pin->odp_port = u32_to_odp(value); -+ } - break; - } - -diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h -index ecc251a7ff..83bd10cca5 100644 ---- a/lib/ovs-numa.h -+++ b/lib/ovs-numa.h -@@ -68,9 +68,9 @@ void ovs_numa_dump_destroy(struct ovs_numa_dump *); - int ovs_numa_thread_setaffinity_core(unsigned core_id); - - #define FOR_EACH_CORE_ON_DUMP(ITER, DUMP) \ -- HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->cores) -+ HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->cores) - - #define FOR_EACH_NUMA_ON_DUMP(ITER, DUMP) \ -- HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->numas) -+ HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->numas) - - #endif /* ovs-numa.h */ -diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c -index 1866bd3088..946aa04d18 100644 ---- a/lib/ovs-rcu.c -+++ b/lib/ovs-rcu.c -@@ -444,3 +444,40 @@ ovsrcu_init_module(void) - ovsthread_once_done(&once); - } - } -+ -+static void -+ovsrcu_barrier_func(void *seq_) -+{ -+ struct seq *seq = (struct seq *) seq_; -+ seq_change(seq); -+} -+ -+/* Similar to the kernel rcu_barrier, ovsrcu_barrier waits for all outstanding -+ * RCU callbacks to complete. However, unlike the kernel rcu_barrier, which -+ * might return immediately if there are no outstanding RCU callbacks, -+ * this API will at least wait for a grace period. -+ * -+ * Another issue the caller might need to know is that the barrier is just -+ * for "one-shot", i.e. if inside some RCU callbacks, another RCU callback is -+ * registered, this API only guarantees the first round of RCU callbacks have -+ * been executed after it returns. -+ */ -+void -+ovsrcu_barrier(void) -+{ -+ struct seq *seq = seq_create(); -+ /* First let all threads flush their cbsets. */ -+ ovsrcu_synchronize(); -+ -+ /* Then register a new cbset, ensure this cbset -+ * is at the tail of the global list. */ -+ uint64_t seqno = seq_read(seq); -+ ovsrcu_postpone__(ovsrcu_barrier_func, (void *) seq); -+ -+ do { -+ seq_wait(seq, seqno); -+ poll_block(); -+ } while (seqno == seq_read(seq)); -+ -+ seq_destroy(seq); -+} -diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h -index ecc4c92010..8b397b7fb0 100644 ---- a/lib/ovs-rcu.h -+++ b/lib/ovs-rcu.h -@@ -155,6 +155,19 @@ - * port_delete(id); - * } - * -+ * Use ovsrcu_barrier() to wait for all the outstanding RCU callbacks to -+ * finish. This is useful when you have to destroy some resources however -+ * these resources are referenced in the outstanding RCU callbacks. -+ * -+ * void rcu_cb(void *A) { -+ * do_something(A); -+ * } -+ * -+ * void destroy_A() { -+ * ovsrcu_postpone(rcu_cb, A); // will use A later -+ * ovsrcu_barrier(); // wait for rcu_cb done -+ * do_destroy_A(); // free A -+ * } - */ - - #include "compiler.h" -@@ -310,4 +323,6 @@ void ovsrcu_synchronize(void); - - void ovsrcu_exit(void); - -+void ovsrcu_barrier(void); -+ - #endif /* ovs-rcu.h */ -diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c -index 659d49dbf7..dead31275d 100644 ---- a/lib/ovsdb-cs.c -+++ b/lib/ovsdb-cs.c -@@ -1109,6 +1109,23 @@ ovsdb_cs_db_sync_condition(struct ovsdb_cs_db *db) - } - table->req_cond = NULL; - db->cond_changed = true; -+ -+ /* There are two cases: -+ * a. either the server already processed the requested monitor -+ * condition change but the FSM was restarted before the -+ * client was notified. In this case the client should -+ * clear its local cache because it's out of sync with the -+ * monitor view on the server side. -+ * -+ * b. OR the server hasn't processed the requested monitor -+ * condition change yet. -+ * -+ * As there's no easy way to differentiate between the two, -+ * and given that this condition should be rare, reset the -+ * 'last_id', essentially flushing the local cached DB -+ * contents. -+ */ -+ db->last_id = UUID_ZERO; - } - } - } -@@ -1539,12 +1556,11 @@ ovsdb_cs_db_parse_monitor_reply(struct ovsdb_cs_db *db, - const struct json *table_updates; - bool clear; - if (version == 3) { -- struct uuid last_id; - if (result->type != JSON_ARRAY || result->array.n != 3 - || (result->array.elems[0]->type != JSON_TRUE && - result->array.elems[0]->type != JSON_FALSE) - || result->array.elems[1]->type != JSON_STRING -- || !uuid_from_string(&last_id, -+ || !uuid_from_string(&db->last_id, - json_string(result->array.elems[1]))) { - struct ovsdb_error *error = ovsdb_syntax_error( - result, NULL, "bad monitor_cond_since reply format"); -@@ -1833,7 +1849,7 @@ server_column_get_string(const struct server_row *row, - { - ovs_assert(server_columns[index].type.key.type == OVSDB_TYPE_STRING); - const struct ovsdb_datum *d = &row->data[index]; -- return d->n == 1 ? d->keys[0].string : default_value; -+ return d->n == 1 ? d->keys[0].s->string : default_value; - } - - static bool -diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c -index c145f5ad97..6654ed6deb 100644 ---- a/lib/ovsdb-data.c -+++ b/lib/ovsdb-data.c -@@ -74,7 +74,7 @@ ovsdb_atom_init_default(union ovsdb_atom *atom, enum ovsdb_atomic_type type) - break; - - case OVSDB_TYPE_STRING: -- atom->string = xmemdup("", 1); -+ atom->s = ovsdb_atom_string_create_nocopy(xmemdup("", 1)); - break; - - case OVSDB_TYPE_UUID: -@@ -136,7 +136,7 @@ ovsdb_atom_is_default(const union ovsdb_atom *atom, - return atom->boolean == false; - - case OVSDB_TYPE_STRING: -- return atom->string[0] == '\0'; -+ return atom->s->string[0] == '\0'; - - case OVSDB_TYPE_UUID: - return uuid_is_zero(&atom->uuid); -@@ -172,7 +172,8 @@ ovsdb_atom_clone(union ovsdb_atom *new, const union ovsdb_atom *old, - break; - - case OVSDB_TYPE_STRING: -- new->string = xstrdup(old->string); -+ new->s = old->s; -+ new->s->n_refs++; - break; - - case OVSDB_TYPE_UUID: -@@ -214,7 +215,7 @@ ovsdb_atom_hash(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, - return hash_boolean(atom->boolean, basis); - - case OVSDB_TYPE_STRING: -- return hash_string(atom->string, basis); -+ return hash_string(atom->s->string, basis); - - case OVSDB_TYPE_UUID: - return hash_int(uuid_hash(&atom->uuid), basis); -@@ -246,7 +247,7 @@ ovsdb_atom_compare_3way(const union ovsdb_atom *a, - return a->boolean - b->boolean; - - case OVSDB_TYPE_STRING: -- return strcmp(a->string, b->string); -+ return a->s == b->s ? 0 : strcmp(a->s->string, b->s->string); - - case OVSDB_TYPE_UUID: - return uuid_compare_3way(&a->uuid, &b->uuid); -@@ -404,7 +405,7 @@ ovsdb_atom_from_json__(union ovsdb_atom *atom, - - case OVSDB_TYPE_STRING: - if (json->type == JSON_STRING) { -- atom->string = xstrdup(json->string); -+ atom->s = ovsdb_atom_string_create(json->string); - return NULL; - } - break; -@@ -473,7 +474,7 @@ ovsdb_atom_to_json(const union ovsdb_atom *atom, enum ovsdb_atomic_type type) - return json_boolean_create(atom->boolean); - - case OVSDB_TYPE_STRING: -- return json_string_create(atom->string); -+ return json_string_create(atom->s->string); - - case OVSDB_TYPE_UUID: - return wrap_json("uuid", json_string_create_nocopy( -@@ -551,14 +552,18 @@ ovsdb_atom_from_string__(union ovsdb_atom *atom, - if (s_len < 2 || s[s_len - 1] != '"') { - return xasprintf("%s: missing quote at end of " - "quoted string", s); -- } else if (!json_string_unescape(s + 1, s_len - 2, -- &atom->string)) { -- char *error = xasprintf("%s: %s", s, atom->string); -- free(atom->string); -- return error; -+ } else { -+ char *res; -+ if (json_string_unescape(s + 1, s_len - 2, &res)) { -+ atom->s = ovsdb_atom_string_create_nocopy(res); -+ } else { -+ char *error = xasprintf("%s: %s", s, res); -+ free(res); -+ return error; -+ } - } - } else { -- atom->string = xstrdup(s); -+ atom->s = ovsdb_atom_string_create(s); - } - break; - -@@ -721,14 +726,14 @@ ovsdb_atom_to_string(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, - break; - - case OVSDB_TYPE_STRING: -- if (string_needs_quotes(atom->string)) { -+ if (string_needs_quotes(atom->s->string)) { - struct json json; - - json.type = JSON_STRING; -- json.string = atom->string; -+ json.string = atom->s->string; - json_to_ds(&json, 0, out); - } else { -- ds_put_cstr(out, atom->string); -+ ds_put_cstr(out, atom->s->string); - } - break; - -@@ -750,7 +755,7 @@ ovsdb_atom_to_bare(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, - struct ds *out) - { - if (type == OVSDB_TYPE_STRING) { -- ds_put_cstr(out, atom->string); -+ ds_put_cstr(out, atom->s->string); - } else { - ovsdb_atom_to_string(atom, type, out); - } -@@ -799,7 +804,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom, - const struct ovsdb_base_type *base) - { - if (base->enum_ -- && ovsdb_datum_find_key(base->enum_, atom, base->type) == UINT_MAX) { -+ && !ovsdb_datum_find_key(base->enum_, atom, base->type, NULL)) { - struct ovsdb_error *error; - struct ds actual = DS_EMPTY_INITIALIZER; - struct ds valid = DS_EMPTY_INITIALIZER; -@@ -877,7 +882,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom, - return NULL; - - case OVSDB_TYPE_STRING: -- return check_string_constraints(atom->string, &base->string); -+ return check_string_constraints(atom->s->string, &base->string); - - case OVSDB_TYPE_UUID: - return NULL; -@@ -1691,8 +1696,8 @@ ovsdb_datum_from_smap(struct ovsdb_datum *datum, const struct smap *smap) - struct smap_node *node; - size_t i = 0; - SMAP_FOR_EACH (node, smap) { -- datum->keys[i].string = xstrdup(node->key); -- datum->values[i].string = xstrdup(node->value); -+ datum->keys[i].s = ovsdb_atom_string_create(node->key); -+ datum->values[i].s = ovsdb_atom_string_create(node->value); - i++; - } - ovs_assert(i == datum->n); -@@ -1784,14 +1789,16 @@ ovsdb_datum_compare_3way(const struct ovsdb_datum *a, - a->n)); - } - --/* If 'key' is one of the keys in 'datum', returns its index within 'datum', -- * otherwise UINT_MAX. 'key.type' must be the type of the atoms stored in the -- * 'keys' array in 'datum'. -+/* If 'key' is one of the keys in 'datum', returns 'true' and sets '*pos' to -+ * its index within 'datum', otherwise returns 'false' and sets '*pos' to the -+ * index where 'key' should have been. 'key.type' must be the type of the -+ * atoms stored in the 'keys' array in 'datum'. - */ --unsigned int -+bool - ovsdb_datum_find_key(const struct ovsdb_datum *datum, - const union ovsdb_atom *key, -- enum ovsdb_atomic_type key_type) -+ enum ovsdb_atomic_type key_type, -+ unsigned int *pos) - { - unsigned int low = 0; - unsigned int high = datum->n; -@@ -1803,10 +1810,16 @@ ovsdb_datum_find_key(const struct ovsdb_datum *datum, - } else if (cmp > 0) { - low = idx + 1; - } else { -- return idx; -+ if (pos) { -+ *pos = idx; -+ } -+ return true; - } - } -- return UINT_MAX; -+ if (pos) { -+ *pos = low; -+ } -+ return false; - } - - /* If 'key' and 'value' is one of the key-value pairs in 'datum', returns its -@@ -1821,10 +1834,11 @@ ovsdb_datum_find_key_value(const struct ovsdb_datum *datum, - const union ovsdb_atom *value, - enum ovsdb_atomic_type value_type) - { -- unsigned int idx = ovsdb_datum_find_key(datum, key, key_type); -- if (idx != UINT_MAX -- && value_type != OVSDB_TYPE_VOID -- && !ovsdb_atom_equals(&datum->values[idx], value, value_type)) { -+ unsigned int idx; -+ -+ if (!ovsdb_datum_find_key(datum, key, key_type, &idx) -+ || (value_type != OVSDB_TYPE_VOID -+ && !ovsdb_atom_equals(&datum->values[idx], value, value_type))) { - idx = UINT_MAX; - } - return idx; -@@ -1948,38 +1962,68 @@ ovsdb_datum_add_unsafe(struct ovsdb_datum *datum, - } - } - -+/* Adds 'n' atoms starting from index 'start_idx' from 'src' to the end of -+ * 'dst'. 'dst' should have enough memory allocated to hold the additional -+ * 'n' atoms. Atoms are not cloned, i.e. 'dst' will reference the same data. -+ * Caller also should take care of the result being sorted. */ -+static void -+ovsdb_datum_push_unsafe(struct ovsdb_datum *dst, -+ const struct ovsdb_datum *src, -+ unsigned int start_idx, unsigned int n, -+ const struct ovsdb_type *type) -+{ -+ memcpy(&dst->keys[dst->n], &src->keys[start_idx], n * sizeof src->keys[0]); -+ if (type->value.type != OVSDB_TYPE_VOID) { -+ memcpy(&dst->values[dst->n], &src->values[start_idx], -+ n * sizeof src->values[0]); -+ } -+ dst->n += n; -+} -+ - void - ovsdb_datum_union(struct ovsdb_datum *a, const struct ovsdb_datum *b, -- const struct ovsdb_type *type, bool replace) -+ const struct ovsdb_type *type) - { -- unsigned int n; -- size_t bi; -+ struct ovsdb_datum result; -+ unsigned int copied, pos; - -- n = a->n; -- for (bi = 0; bi < b->n; bi++) { -- unsigned int ai; -+ ovsdb_datum_init_empty(&result); - -- ai = ovsdb_datum_find_key(a, &b->keys[bi], type->key.type); -- if (ai == UINT_MAX) { -- if (n == a->n) { -- ovsdb_datum_reallocate(a, type, a->n + (b->n - bi)); -- } -- ovsdb_atom_clone(&a->keys[n], &b->keys[bi], type->key.type); -- if (type->value.type != OVSDB_TYPE_VOID) { -- ovsdb_atom_clone(&a->values[n], &b->values[bi], -- type->value.type); -- } -- n++; -- } else if (replace && type->value.type != OVSDB_TYPE_VOID) { -- ovsdb_atom_destroy(&a->values[ai], type->value.type); -- ovsdb_atom_clone(&a->values[ai], &b->values[bi], -+ copied = 0; -+ for (size_t bi = 0; bi < b->n; bi++) { -+ if (ovsdb_datum_find_key(a, &b->keys[bi], type->key.type, &pos)) { -+ /* Atom with the same key already exists. */ -+ continue; -+ } -+ if (!result.keys) { -+ ovsdb_datum_reallocate(&result, type, a->n + (b->n - bi)); -+ } -+ if (pos > copied) { -+ /* Need to copy some atoms from 'a' first. */ -+ ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type); -+ copied = pos; -+ } -+ /* Inserting new atom from 'b'. */ -+ ovsdb_atom_clone(&result.keys[result.n], &b->keys[bi], type->key.type); -+ if (type->value.type != OVSDB_TYPE_VOID) { -+ ovsdb_atom_clone(&result.values[result.n], &b->values[bi], - type->value.type); - } -+ result.n++; - } -- if (n != a->n) { -- a->n = n; -- ovs_assert(!ovsdb_datum_sort(a, type->key.type)); -+ if (!result.keys) { -+ /* 'a' doesn't need to be changed. */ -+ return; -+ } -+ if (a->n > copied) { -+ /* Copying remaining atoms. */ -+ ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type); - } -+ /* All atoms are copied now. */ -+ a->n = 0; -+ -+ ovsdb_datum_swap(&result, a); -+ ovsdb_datum_destroy(&result, type); - } - - void -@@ -1987,26 +2031,55 @@ ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type, - const struct ovsdb_datum *b, - const struct ovsdb_type *b_type) - { -- bool changed = false; -- size_t i; -+ unsigned int *idx, ai; -+ size_t n_idx; - - ovs_assert(a_type->key.type == b_type->key.type); - ovs_assert(a_type->value.type == b_type->value.type - || b_type->value.type == OVSDB_TYPE_VOID); - -- /* XXX The big-O of this could easily be improved. */ -- for (i = 0; i < a->n; ) { -- unsigned int idx = ovsdb_datum_find(a, i, b, b_type); -- if (idx != UINT_MAX) { -- changed = true; -- ovsdb_datum_remove_unsafe(a, i, a_type); -- } else { -- i++; -+ idx = xmalloc(b->n * sizeof *idx); -+ n_idx = 0; -+ for (size_t bi = 0; bi < b->n; bi++) { -+ ai = ovsdb_datum_find(b, bi, a, b_type); -+ if (ai == UINT_MAX) { -+ /* No such atom in 'a'. */ -+ continue; - } -+ /* Not destroying right away since ovsdb_datum_find() will use them. */ -+ idx[n_idx++] = ai; - } -- if (changed) { -- ovsdb_datum_sort_assert(a, a_type->key.type); -+ if (!n_idx) { -+ free(idx); -+ return; -+ } -+ -+ struct ovsdb_datum result; -+ -+ ovsdb_datum_init_empty(&result); -+ ovsdb_datum_reallocate(&result, a_type, a->n - n_idx); -+ -+ unsigned int start_idx = 0; -+ for (size_t i = 0; i < n_idx; i++) { -+ ai = idx[i]; -+ -+ /* Destroying atom. */ -+ ovsdb_atom_destroy(&a->keys[ai], a_type->key.type); -+ if (a_type->value.type != OVSDB_TYPE_VOID) { -+ ovsdb_atom_destroy(&a->values[ai], a_type->value.type); -+ } -+ -+ /* Copy non-removed atoms from 'a' to result. */ -+ ovsdb_datum_push_unsafe(&result, a, start_idx, ai - start_idx, a_type); -+ start_idx = idx[i] + 1; - } -+ /* Copying remaining atoms. */ -+ ovsdb_datum_push_unsafe(&result, a, start_idx, a->n - start_idx, a_type); -+ a->n = 0; -+ -+ ovsdb_datum_swap(&result, a); -+ ovsdb_datum_destroy(&result, a_type); -+ free(idx); - } - - struct ovsdb_symbol_table * -@@ -2067,6 +2140,64 @@ ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab, - - /* APIs for Generating and apply diffs. */ - -+/* Find what needs to be added to and removed from 'old' to construct 'new'. -+ * -+ * The 'added' and 'removed' datums are always safe; the orders of keys are -+ * maintained since they are added in order. */ -+void -+ovsdb_datum_added_removed(struct ovsdb_datum *added, -+ struct ovsdb_datum *removed, -+ const struct ovsdb_datum *old, -+ const struct ovsdb_datum *new, -+ const struct ovsdb_type *type) -+{ -+ size_t oi, ni; -+ -+ ovsdb_datum_init_empty(added); -+ ovsdb_datum_init_empty(removed); -+ if (!ovsdb_type_is_composite(type)) { -+ ovsdb_datum_clone(removed, old, type); -+ ovsdb_datum_clone(added, new, type); -+ return; -+ } -+ -+ /* Generate the diff in O(n) time. */ -+ for (oi = ni = 0; oi < old->n && ni < new->n;) { -+ int c = ovsdb_atom_compare_3way(&old->keys[oi], &new->keys[ni], -+ type->key.type); -+ if (c < 0) { -+ ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi], -+ type, NULL); -+ oi++; -+ } else if (c > 0) { -+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni], -+ type, NULL); -+ ni++; -+ } else { -+ if (type->value.type != OVSDB_TYPE_VOID && -+ ovsdb_atom_compare_3way(&old->values[oi], &new->values[ni], -+ type->value.type)) { -+ ovsdb_datum_add_unsafe(removed, &old->keys[oi], -+ &old->values[oi], type, NULL); -+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni], -+ type, NULL); -+ } -+ oi++; ni++; -+ } -+ } -+ -+ for (; oi < old->n; oi++) { -+ ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi], -+ type, NULL); -+ } -+ -+ for (; ni < new->n; ni++) { -+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni], -+ type, NULL); -+ } -+} -+ -+ - /* Generate a difference ovsdb_dataum between 'old' and 'new'. - * 'new' can be regenerated by applying the difference to the 'old'. - * -@@ -2127,6 +2258,106 @@ ovsdb_datum_diff(struct ovsdb_datum *diff, - } - } - -+/* Apply 'diff' to 'a'. -+ * -+ * Return NULL if the 'a' is successfully updated, otherwise, return -+ * ovsdb_error. */ -+struct ovsdb_error * -+ovsdb_datum_apply_diff_in_place(struct ovsdb_datum *a, -+ const struct ovsdb_datum *diff, -+ const struct ovsdb_type *type) -+{ -+ struct ovsdb_error *error = NULL; -+ struct ovsdb_datum result; -+ size_t i, new_size; -+ unsigned int *idx, pos; -+ enum { -+ DIFF_OP_ADD, -+ DIFF_OP_REMOVE, -+ DIFF_OP_UPDATE, -+ } *operation; -+ -+ if (!ovsdb_type_is_composite(type)) { -+ ovsdb_datum_destroy(a, type); -+ ovsdb_datum_clone(a, diff, type); -+ return NULL; -+ } -+ -+ operation = xmalloc(diff->n * sizeof *operation); -+ idx = xmalloc(diff->n * sizeof *idx); -+ new_size = a->n; -+ for (i = 0; i < diff->n; i++) { -+ if (!ovsdb_datum_find_key(a, &diff->keys[i], type->key.type, &pos)) { -+ operation[i] = DIFF_OP_ADD; -+ new_size++; -+ } else if (type->value.type != OVSDB_TYPE_VOID -+ && !ovsdb_atom_equals(&diff->values[i], &a->values[pos], -+ type->value.type)) { -+ operation[i] = DIFF_OP_UPDATE; -+ } else { -+ operation[i] = DIFF_OP_REMOVE; -+ new_size--; -+ } -+ idx[i] = pos; -+ } -+ -+ /* Make sure member size of 'new' conforms to type. */ -+ if (new_size < type->n_min || new_size > type->n_max) { -+ error = ovsdb_error(NULL, "Datum crated by diff has size error"); -+ goto exit; -+ } -+ -+ ovsdb_datum_init_empty(&result); -+ ovsdb_datum_reallocate(&result, type, new_size); -+ -+ unsigned int copied = 0; -+ for (i = 0; i < diff->n; i++) { -+ pos = idx[i]; -+ -+ if (copied < pos) { -+ /* Copying all atoms that should go before the current one. */ -+ ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type); -+ copied = pos; -+ } -+ -+ switch (operation[i]) { -+ case DIFF_OP_UPDATE: -+ case DIFF_OP_ADD: -+ /* Inserting new atom from 'diff'. */ -+ ovsdb_atom_clone(&result.keys[result.n], -+ &diff->keys[i], type->key.type); -+ if (type->value.type != OVSDB_TYPE_VOID) { -+ ovsdb_atom_clone(&result.values[result.n], -+ &diff->values[i], type->value.type); -+ } -+ result.n++; -+ if (operation[i] != DIFF_OP_UPDATE) { -+ break; -+ } -+ /* fall through */ -+ -+ case DIFF_OP_REMOVE: -+ /* Destroying atom. */ -+ ovsdb_atom_destroy(&a->keys[pos], type->key.type); -+ if (type->value.type != OVSDB_TYPE_VOID) { -+ ovsdb_atom_destroy(&a->values[pos], type->value.type); -+ } -+ copied++; /* Skipping removed atom. */ -+ break; -+ } -+ } -+ /* Copying remaining atoms. */ -+ ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type); -+ a->n = 0; -+ -+ ovsdb_datum_swap(&result, a); -+ ovsdb_datum_destroy(&result, type); -+exit: -+ free(operation); -+ free(idx); -+ return error; -+} -+ - /* Apply 'diff' to 'old' to regenerate 'new'. - * - * Return NULL if the 'new' is successfully generated, otherwise, return -diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h -index c5a80ee39f..f66ed3472c 100644 ---- a/lib/ovsdb-data.h -+++ b/lib/ovsdb-data.h -@@ -20,6 +20,7 @@ - #include "compiler.h" - #include "ovsdb-types.h" - #include "openvswitch/shash.h" -+#include "util.h" - - #ifdef __cplusplus - extern "C" { -@@ -31,12 +32,33 @@ struct ds; - struct ovsdb_symbol_table; - struct smap; - -+struct ovsdb_atom_string { -+ char *string; -+ size_t n_refs; -+}; -+ -+static inline struct ovsdb_atom_string * -+ovsdb_atom_string_create_nocopy(char *str) -+{ -+ struct ovsdb_atom_string *s = xzalloc(sizeof *s); -+ -+ s->string = str; -+ s->n_refs = 1; -+ return s; -+} -+ -+static inline struct ovsdb_atom_string * -+ovsdb_atom_string_create(const char *str) -+{ -+ return ovsdb_atom_string_create_nocopy(xstrdup(str)); -+} -+ - /* One value of an atomic type (given by enum ovs_atomic_type). */ - union ovsdb_atom { - int64_t integer; - double real; - bool boolean; -- char *string; -+ struct ovsdb_atom_string *s; - struct uuid uuid; - }; - -@@ -66,8 +88,9 @@ ovsdb_atom_needs_destruction(enum ovsdb_atomic_type type) - static inline void - ovsdb_atom_destroy(union ovsdb_atom *atom, enum ovsdb_atomic_type type) - { -- if (type == OVSDB_TYPE_STRING) { -- free(atom->string); -+ if (type == OVSDB_TYPE_STRING && !--atom->s->n_refs) { -+ free(atom->s->string); -+ free(atom->s); - } - } - -@@ -209,9 +232,10 @@ bool ovsdb_datum_equals(const struct ovsdb_datum *, - const struct ovsdb_type *); - - /* Search. */ --unsigned int ovsdb_datum_find_key(const struct ovsdb_datum *, -- const union ovsdb_atom *key, -- enum ovsdb_atomic_type key_type); -+bool ovsdb_datum_find_key(const struct ovsdb_datum *, -+ const union ovsdb_atom *key, -+ enum ovsdb_atomic_type key_type, -+ unsigned int *pos); - unsigned int ovsdb_datum_find_key_value(const struct ovsdb_datum *, - const union ovsdb_atom *key, - enum ovsdb_atomic_type key_type, -@@ -227,14 +251,19 @@ bool ovsdb_datum_excludes_all(const struct ovsdb_datum *, - const struct ovsdb_type *); - void ovsdb_datum_union(struct ovsdb_datum *, - const struct ovsdb_datum *, -- const struct ovsdb_type *, -- bool replace); -+ const struct ovsdb_type *); - void ovsdb_datum_subtract(struct ovsdb_datum *a, - const struct ovsdb_type *a_type, - const struct ovsdb_datum *b, - const struct ovsdb_type *b_type); - - /* Generate and apply diffs */ -+void ovsdb_datum_added_removed(struct ovsdb_datum *added, -+ struct ovsdb_datum *removed, -+ const struct ovsdb_datum *old, -+ const struct ovsdb_datum *new, -+ const struct ovsdb_type *type); -+ - void ovsdb_datum_diff(struct ovsdb_datum *diff, - const struct ovsdb_datum *old_datum, - const struct ovsdb_datum *new_datum, -@@ -246,6 +275,12 @@ struct ovsdb_error *ovsdb_datum_apply_diff(struct ovsdb_datum *new_datum, - const struct ovsdb_type *type) - OVS_WARN_UNUSED_RESULT; - -+struct ovsdb_error * ovsdb_datum_apply_diff_in_place( -+ struct ovsdb_datum *a, -+ const struct ovsdb_datum *diff, -+ const struct ovsdb_type *type) -+OVS_WARN_UNUSED_RESULT; -+ - /* Raw operations that may not maintain the invariants. */ - void ovsdb_datum_remove_unsafe(struct ovsdb_datum *, size_t idx, - const struct ovsdb_type *); -diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c -index 2198c69c60..496ec490d3 100644 ---- a/lib/ovsdb-idl.c -+++ b/lib/ovsdb-idl.c -@@ -1898,8 +1898,7 @@ ovsdb_idl_index_destroy_row(const struct ovsdb_idl_row *row_) - BITMAP_FOR_EACH_1 (i, class->n_columns, row->written) { - c = &class->columns[i]; - (c->unparse) (row); -- free(row->new_datum[i].values); -- free(row->new_datum[i].keys); -+ ovsdb_datum_destroy(&row->new_datum[i], &c->type); - } - free(row->new_datum); - free(row->written); -@@ -2787,9 +2786,8 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row, - struct ovsdb_datum *new_datum; - unsigned int pos; - new_datum = map_op_datum(map_op); -- pos = ovsdb_datum_find_key(old_datum, -- &new_datum->keys[0], -- key_type); -+ ovsdb_datum_find_key(old_datum, &new_datum->keys[0], -+ key_type, &pos); - if (ovsdb_atom_equals(&new_datum->values[0], - &old_datum->values[pos], - value_type)) { -@@ -2798,11 +2796,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row, - } - } else if (map_op_type(map_op) == MAP_OP_DELETE){ - /* Verify that there is a key to delete. */ -- unsigned int pos; -- pos = ovsdb_datum_find_key(old_datum, -- &map_op_datum(map_op)->keys[0], -- key_type); -- if (pos == UINT_MAX) { -+ if (!ovsdb_datum_find_key(old_datum, -+ &map_op_datum(map_op)->keys[0], -+ key_type, NULL)) { - /* No key to delete. Move on to next update. */ - VLOG_WARN("Trying to delete a key that doesn't " - "exist in the map."); -@@ -2897,11 +2893,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row, - any_ins = true; - } else { /* SETP_OP_DELETE */ - /* Verify that there is a key to delete. */ -- unsigned int pos; -- pos = ovsdb_datum_find_key(old_datum, -- &set_op_datum(set_op)->keys[0], -- key_type); -- if (pos == UINT_MAX) { -+ if (!ovsdb_datum_find_key(old_datum, -+ &set_op_datum(set_op)->keys[0], -+ key_type, NULL)) { - /* No key to delete. Move on to next update. */ - VLOG_WARN("Trying to delete a key that doesn't " - "exist in the set."); -@@ -4066,7 +4060,6 @@ ovsdb_idl_txn_write_partial_map(const struct ovsdb_idl_row *row_, - struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_); - enum ovsdb_atomic_type key_type; - enum map_op_type op_type; -- unsigned int pos; - const struct ovsdb_datum *old_datum; - - if (!is_valid_partial_update(row, column, datum)) { -@@ -4078,8 +4071,11 @@ ovsdb_idl_txn_write_partial_map(const struct ovsdb_idl_row *row_, - /* Find out if this is an insert or an update. */ - key_type = column->type.key.type; - old_datum = ovsdb_idl_read(row, column); -- pos = ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type); -- op_type = pos == UINT_MAX ? MAP_OP_INSERT : MAP_OP_UPDATE; -+ if (ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type, NULL)) { -+ op_type = MAP_OP_UPDATE; -+ } else { -+ op_type = MAP_OP_INSERT; -+ } - - ovsdb_idl_txn_add_map_op(row, column, datum, op_type); - } -@@ -4112,6 +4108,9 @@ void - ovsdb_idl_loop_destroy(struct ovsdb_idl_loop *loop) - { - if (loop) { -+ if (loop->committing_txn) { -+ ovsdb_idl_txn_destroy(loop->committing_txn); -+ } - ovsdb_idl_destroy(loop->idl); - } - } -@@ -4121,8 +4120,8 @@ ovsdb_idl_loop_run(struct ovsdb_idl_loop *loop) - { - ovsdb_idl_run(loop->idl); - -- /* See if we can commit the loop->committing_txn. */ -- if (loop->committing_txn) { -+ /* See if the 'committing_txn' succeeded in the meantime. */ -+ if (loop->committing_txn && loop->committing_txn->status == TXN_SUCCESS) { - ovsdb_idl_try_commit_loop_txn(loop, NULL); - } - -diff --git a/lib/pcap-file.c b/lib/pcap-file.c -index b30a11c24b..41835f6f4d 100644 ---- a/lib/pcap-file.c -+++ b/lib/pcap-file.c -@@ -89,6 +89,7 @@ ovs_pcap_open(const char *file_name, const char *mode) - : mode[0] == 'w' ? "writing" - : "appending"), - ovs_strerror(errno)); -+ free(p_file); - return NULL; - } - -diff --git a/lib/rculist.h b/lib/rculist.h -index 1072b87af2..c0d77acf94 100644 ---- a/lib/rculist.h -+++ b/lib/rculist.h -@@ -365,35 +365,57 @@ rculist_is_singleton_protected(const struct rculist *list) - return list_next == list->prev && list_next != list; - } - --#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \ -- for (INIT_CONTAINER(ITER, rculist_next(RCULIST), MEMBER); \ -- &(ITER)->MEMBER != (RCULIST); \ -- ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER)) --#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \ -- for (ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER); \ -- &(ITER)->MEMBER != (RCULIST); \ -- ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER)) -- --#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \ -- for (INIT_CONTAINER(ITER, (RCULIST)->prev, MEMBER); \ -- &(ITER)->MEMBER != (RCULIST); \ -- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) --#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \ -- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ -- &(ITER)->MEMBER != (RCULIST); \ -- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) -- --#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \ -- for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \ -- &(ITER)->MEMBER != (RCULIST); \ -- ASSIGN_CONTAINER(ITER, rculist_next_protected(&(ITER)->MEMBER), \ -- MEMBER)) -- --#define RCULIST_FOR_EACH_SAFE_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \ -- for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \ -- (&(ITER)->MEMBER != (RCULIST) \ -- ? INIT_CONTAINER(NEXT, rculist_next_protected(&(ITER)->MEMBER), \ -- MEMBER), 1 : 0); \ -- (ITER) = (NEXT)) -+#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \ -+ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(RCULIST), \ -+ const struct rculist); \ -+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ -+ UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER)))) -+ -+#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \ -+ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(&(ITER)->MEMBER), \ -+ const struct rculist); \ -+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ -+ UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER)))) -+ -+#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \ -+ for (INIT_MULTIVAR(ITER, MEMBER, (RCULIST)->prev, struct rculist); \ -+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ -+ UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev)) -+ -+#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \ -+ for (INIT_MULTIVAR(ITER, MEMBER, (ITER)->MEMBER.prev, struct rculist); \ -+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ -+ UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev)) -+ -+#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \ -+ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next_protected(RCULIST), \ -+ struct rculist); \ -+ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ -+ UPDATE_MULTIVAR(ITER, rculist_next_protected(ITER_VAR(ITER))) \ -+ -+#define RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED(ITER, MEMBER, RCULIST) \ -+ for (INIT_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \ -+ rculist_next_protected(RCULIST), \ -+ struct rculist); \ -+ CONDITION_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \ -+ ITER_VAR(ITER) != (RCULIST), \ -+ ITER_NEXT_VAR(ITER) = rculist_next_protected(ITER_VAR(VAR))); \ -+ UPDATE_MULTIVAR_SHORT(ITER)) -+ -+#define RCULIST_FOR_EACH_SAFE_LONG_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \ -+ for (INIT_MULTIVAR_SAFE_LONG(ITER, NEXT, MEMBER, \ -+ rculist_next_protected(RCULIST) \ -+ struct rculist); \ -+ CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER \ -+ ITER_VAR(ITER) != (RCULIST), \ -+ ITER_VAR(NEXT) = rculist_next_protected(ITER_VAR(VAR)), \ -+ ITER_VAR(NEXT) != (RCULIST)); \ -+ UPDATE_MULTIVAR_LONG(ITER)) -+ -+#define RCULIST_FOR_EACH_SAFE_PROTECTED(...) \ -+ OVERLOAD_SAFE_MACRO(RCULIST_FOR_EACH_SAFE_LONG_PROTECTED, \ -+ RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED, \ -+ 4, __VA_ARGS__) -+ - - #endif /* rculist.h */ -diff --git a/lib/reconnect.c b/lib/reconnect.c -index a929ddfd2d..89a0bcaf95 100644 ---- a/lib/reconnect.c -+++ b/lib/reconnect.c -@@ -75,7 +75,8 @@ struct reconnect { - - static void reconnect_transition__(struct reconnect *, long long int now, - enum state state); --static long long int reconnect_deadline__(const struct reconnect *); -+static long long int reconnect_deadline__(const struct reconnect *, -+ long long int now); - static bool reconnect_may_retry(struct reconnect *); - - static const char * -@@ -539,7 +540,7 @@ reconnect_transition__(struct reconnect *fsm, long long int now, - } - - static long long int --reconnect_deadline__(const struct reconnect *fsm) -+reconnect_deadline__(const struct reconnect *fsm, long long int now) - { - ovs_assert(fsm->state_entered != LLONG_MIN); - switch (fsm->state) { -@@ -557,8 +558,18 @@ reconnect_deadline__(const struct reconnect *fsm) - if (fsm->probe_interval) { - long long int base = MAX(fsm->last_activity, fsm->state_entered); - long long int expiration = base + fsm->probe_interval; -- if (fsm->last_receive_attempt >= expiration) { -+ if (now < expiration || fsm->last_receive_attempt >= expiration) { -+ /* We still have time before the expiration or the time has -+ * already passed and there was no activity. In the first case -+ * we need to wait for the expiration, in the second - we're -+ * already past the deadline. */ - return expiration; -+ } else { -+ /* Time has already passed, but we didn't attempt to receive -+ * anything. We need to wake up and try to receive even if -+ * nothing is pending, so we can update the expiration time or -+ * transition to a different state. */ -+ return now + 1; - } - } - return LLONG_MAX; -@@ -566,8 +577,10 @@ reconnect_deadline__(const struct reconnect *fsm) - case S_IDLE: - if (fsm->probe_interval) { - long long int expiration = fsm->state_entered + fsm->probe_interval; -- if (fsm->last_receive_attempt >= expiration) { -+ if (now < expiration || fsm->last_receive_attempt >= expiration) { - return expiration; -+ } else { -+ return now + 1; - } - } - return LLONG_MAX; -@@ -618,7 +631,7 @@ reconnect_deadline__(const struct reconnect *fsm) - enum reconnect_action - reconnect_run(struct reconnect *fsm, long long int now) - { -- if (now >= reconnect_deadline__(fsm)) { -+ if (now >= reconnect_deadline__(fsm, now)) { - switch (fsm->state) { - case S_VOID: - return 0; -@@ -671,7 +684,7 @@ reconnect_wait(struct reconnect *fsm, long long int now) - int - reconnect_timeout(struct reconnect *fsm, long long int now) - { -- long long int deadline = reconnect_deadline__(fsm); -+ long long int deadline = reconnect_deadline__(fsm, now); - if (deadline != LLONG_MAX) { - long long int remaining = deadline - now; - return MAX(0, MIN(INT_MAX, remaining)); -diff --git a/lib/socket-util.c b/lib/socket-util.c -index 4f1ffecf5d..38705cc51e 100644 ---- a/lib/socket-util.c -+++ b/lib/socket-util.c -@@ -62,7 +62,8 @@ static bool parse_sockaddr_components(struct sockaddr_storage *ss, - const char *port_s, - uint16_t default_port, - const char *s, -- bool resolve_host); -+ bool resolve_host, -+ bool *dns_failure); - - /* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a - * positive errno value. */ -@@ -438,7 +439,7 @@ parse_sockaddr_components_dns(struct sockaddr_storage *ss OVS_UNUSED, - dns_resolve(host_s, &tmp_host_s); - if (tmp_host_s != NULL) { - parse_sockaddr_components(ss, tmp_host_s, port_s, -- default_port, s, false); -+ default_port, s, false, NULL); - free(tmp_host_s); - return true; - } -@@ -450,11 +451,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss, - char *host_s, - const char *port_s, uint16_t default_port, - const char *s, -- bool resolve_host) -+ bool resolve_host, bool *dns_failure) - { - struct sockaddr_in *sin = sin_cast(sa_cast(ss)); - int port; - -+ if (dns_failure) { -+ *dns_failure = false; -+ } -+ - if (port_s && port_s[0]) { - if (!str_to_int(port_s, 10, &port) || port < 0 || port > 65535) { - VLOG_ERR("%s: bad port number \"%s\"", s, port_s); -@@ -501,10 +506,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss, - return true; - - resolve: -- if (resolve_host && parse_sockaddr_components_dns(ss, host_s, port_s, -- default_port, s)) { -- return true; -- } else if (!resolve_host) { -+ if (resolve_host) { -+ if (parse_sockaddr_components_dns(ss, host_s, port_s, -+ default_port, s)) { -+ return true; -+ } -+ if (dns_failure) { -+ *dns_failure = true; -+ } -+ } else { - VLOG_ERR("%s: bad IP address \"%s\"", s, host_s); - } - exit: -@@ -521,10 +531,12 @@ exit: - * It resolves the host if 'resolve_host' is true. - * - * On success, returns true and stores the parsed remote address into '*ss'. -- * On failure, logs an error, stores zeros into '*ss', and returns false. */ -+ * On failure, logs an error, stores zeros into '*ss', and returns false, -+ * '*dns_failure' indicates if the host resolution failed. */ - bool - inet_parse_active(const char *target_, int default_port, -- struct sockaddr_storage *ss, bool resolve_host) -+ struct sockaddr_storage *ss, -+ bool resolve_host, bool *dns_failure) - { - char *target = xstrdup(target_); - char *port, *host; -@@ -539,7 +551,7 @@ inet_parse_active(const char *target_, int default_port, - ok = false; - } else { - ok = parse_sockaddr_components(ss, host, port, default_port, -- target_, resolve_host); -+ target_, resolve_host, dns_failure); - } - if (!ok) { - memset(ss, 0, sizeof *ss); -@@ -576,7 +588,7 @@ inet_open_active(int style, const char *target, int default_port, - int error; - - /* Parse. */ -- if (!inet_parse_active(target, default_port, &ss, true)) { -+ if (!inet_parse_active(target, default_port, &ss, true, NULL)) { - error = EAFNOSUPPORT; - goto exit; - } -@@ -660,7 +672,7 @@ inet_parse_passive(const char *target_, int default_port, - ok = false; - } else { - ok = parse_sockaddr_components(ss, host, port, default_port, -- target_, true); -+ target_, true, NULL); - } - if (!ok) { - memset(ss, 0, sizeof *ss); -@@ -783,7 +795,8 @@ inet_parse_address(const char *target_, struct sockaddr_storage *ss) - { - char *target = xstrdup(target_); - char *host = unbracket(target); -- bool ok = parse_sockaddr_components(ss, host, NULL, 0, target_, false); -+ bool ok = parse_sockaddr_components(ss, host, NULL, 0, -+ target_, false, NULL); - if (!ok) { - memset(ss, 0, sizeof *ss); - } -diff --git a/lib/socket-util.h b/lib/socket-util.h -index 9ccb7d4cc4..bf66393df9 100644 ---- a/lib/socket-util.h -+++ b/lib/socket-util.h -@@ -49,7 +49,8 @@ ovs_be32 guess_netmask(ovs_be32 ip); - void inet_parse_host_port_tokens(char *s, char **hostp, char **portp); - void inet_parse_port_host_tokens(char *s, char **portp, char **hostp); - bool inet_parse_active(const char *target, int default_port, -- struct sockaddr_storage *ssp, bool resolve_host); -+ struct sockaddr_storage *ssp, -+ bool resolve_host, bool *dns_failure); - int inet_open_active(int style, const char *target, int default_port, - struct sockaddr_storage *ssp, int *fdp, uint8_t dscp); - -diff --git a/lib/stopwatch.c b/lib/stopwatch.c -index f5602163bc..1c71df1a12 100644 ---- a/lib/stopwatch.c -+++ b/lib/stopwatch.c -@@ -114,7 +114,6 @@ static void - calc_percentile(unsigned long long n_samples, struct percentile *pctl, - unsigned long long new_sample) - { -- - if (n_samples < P_SQUARE_MIN) { - pctl->samples[n_samples - 1] = new_sample; - } -@@ -228,13 +227,12 @@ add_sample(struct stopwatch *sw, unsigned long long new_sample) - sw->min = new_sample; - } - -- calc_percentile(sw->n_samples, &sw->pctl, new_sample); -- - if (sw->n_samples++ == 0) { - sw->short_term.average = sw->long_term.average = new_sample; - return; - } - -+ calc_percentile(sw->n_samples, &sw->pctl, new_sample); - calc_average(&sw->short_term, new_sample); - calc_average(&sw->long_term, new_sample); - } -diff --git a/lib/stp.c b/lib/stp.c -index 809b405a52..a869b5f390 100644 ---- a/lib/stp.c -+++ b/lib/stp.c -@@ -737,7 +737,7 @@ void - stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) - { - struct stp *stp = p->stp; -- const struct stp_bpdu_header *header; -+ struct stp_bpdu_header header; - - ovs_mutex_lock(&mutex); - if (p->state == STP_DISABLED) { -@@ -750,19 +750,19 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) - goto out; - } - -- header = bpdu; -- if (header->protocol_id != htons(STP_PROTOCOL_ID)) { -+ memcpy(&header, bpdu, sizeof header); -+ if (header.protocol_id != htons(STP_PROTOCOL_ID)) { - VLOG_WARN("%s: received BPDU with unexpected protocol ID %"PRIu16, -- stp->name, ntohs(header->protocol_id)); -+ stp->name, ntohs(header.protocol_id)); - p->error_count++; - goto out; - } -- if (header->protocol_version != STP_PROTOCOL_VERSION) { -+ if (header.protocol_version != STP_PROTOCOL_VERSION) { - VLOG_DBG("%s: received BPDU with unexpected protocol version %"PRIu8, -- stp->name, header->protocol_version); -+ stp->name, header.protocol_version); - } - -- switch (header->bpdu_type) { -+ switch (header.bpdu_type) { - case STP_TYPE_CONFIG: - if (bpdu_size < sizeof(struct stp_config_bpdu)) { - VLOG_WARN("%s: received config BPDU with invalid size %"PRIuSIZE, -@@ -785,7 +785,7 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) - - default: - VLOG_WARN("%s: received BPDU of unexpected type %"PRIu8, -- stp->name, header->bpdu_type); -+ stp->name, header.bpdu_type); - p->error_count++; - goto out; - } -diff --git a/lib/stream.c b/lib/stream.c -index fcaddf10ad..71039e24f1 100644 ---- a/lib/stream.c -+++ b/lib/stream.c -@@ -788,7 +788,7 @@ stream_parse_target_with_default_port(const char *target, int default_port, - struct sockaddr_storage *ss) - { - return ((!strncmp(target, "tcp:", 4) || !strncmp(target, "ssl:", 4)) -- && inet_parse_active(target + 4, default_port, ss, true)); -+ && inet_parse_active(target + 4, default_port, ss, true, NULL)); - } - - /* Attempts to guess the content type of a stream whose first few bytes were -diff --git a/lib/tc.c b/lib/tc.c -index 38a1dfc0eb..df73a43d4c 100644 ---- a/lib/tc.c -+++ b/lib/tc.c -@@ -568,16 +568,17 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) - - flower->key.encap_eth_type[0] = - nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]); -+ flower->mask.encap_eth_type[0] = CONSTANT_HTONS(0xffff); - - if (attrs[TCA_FLOWER_KEY_VLAN_ID]) { - flower->key.vlan_id[0] = - nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]); -- flower->mask.vlan_id[0] = 0xffff; -+ flower->mask.vlan_id[0] = VLAN_VID_MASK >> VLAN_VID_SHIFT; - } - if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) { - flower->key.vlan_prio[0] = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]); -- flower->mask.vlan_prio[0] = 0xff; -+ flower->mask.vlan_prio[0] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT; - } - - if (!attrs[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { -@@ -590,17 +591,18 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) - } - - flower->key.encap_eth_type[1] = flower->key.encap_eth_type[0]; -+ flower->mask.encap_eth_type[1] = CONSTANT_HTONS(0xffff); - flower->key.encap_eth_type[0] = encap_ethtype; - - if (attrs[TCA_FLOWER_KEY_CVLAN_ID]) { - flower->key.vlan_id[1] = - nl_attr_get_u16(attrs[TCA_FLOWER_KEY_CVLAN_ID]); -- flower->mask.vlan_id[1] = 0xffff; -+ flower->mask.vlan_id[1] = VLAN_VID_MASK >> VLAN_VID_SHIFT; - } - if (attrs[TCA_FLOWER_KEY_CVLAN_PRIO]) { - flower->key.vlan_prio[1] = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_CVLAN_PRIO]); -- flower->mask.vlan_prio[1] = 0xff; -+ flower->mask.vlan_prio[1] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT; - } - } - -@@ -937,24 +939,21 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) { - key->icmp_code = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]); - mask->icmp_code = -- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]); -+ nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE_MASK]); - } - if (attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]) { -- key->icmp_type = -- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]); -+ key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE]); - mask->icmp_type = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]); - } - } else if (ip_proto == IPPROTO_ICMPV6) { - if (attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]) { -- key->icmp_code = -- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); -+ key->icmp_code = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); - mask->icmp_code = -- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); -+ nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]); - } - if (attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]) { -- key->icmp_type = -- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]); -+ key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE]); - mask->icmp_type = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]); - } -@@ -1006,14 +1005,14 @@ static const struct nl_policy pedit_policy[] = { - static int - nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) - { -- struct tc_action *action; -+ struct tc_action *action = &flower->actions[flower->action_count++]; - struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)]; - const struct tc_pedit *pe; - const struct tc_pedit_key *keys; - const struct nlattr *nla, *keys_ex, *ex_type; - const void *keys_attr; -- char *rewrite_key = (void *) &flower->rewrite.key; -- char *rewrite_mask = (void *) &flower->rewrite.mask; -+ char *rewrite_key = (void *) &action->rewrite.key; -+ char *rewrite_mask = (void *) &action->rewrite.mask; - size_t keys_ex_size, left; - int type, i = 0, err; - -@@ -1092,7 +1091,6 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) - i++; - } - -- action = &flower->actions[flower->action_count++]; - action->type = TC_ACT_PEDIT; - - return 0; -@@ -1487,7 +1485,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) - if (ipv4_max) { - ovs_be32 addr = nl_attr_get_be32(ipv4_max); - -- action->ct.range.ipv4.max = addr; -+ if (action->ct.range.ipv4.min != addr) { -+ action->ct.range.ipv4.max = addr; -+ } - } - } else if (ipv6_min) { - action->ct.range.ip_family = AF_INET6; -@@ -1496,7 +1496,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) - if (ipv6_max) { - struct in6_addr addr = nl_attr_get_in6_addr(ipv6_max); - -- action->ct.range.ipv6.max = addr; -+ if (!ipv6_addr_equals(&action->ct.range.ipv6.min, &addr)) { -+ action->ct.range.ipv6.max = addr; -+ } - } - } - -@@ -1504,6 +1506,10 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) - action->ct.range.port.min = nl_attr_get_be16(port_min); - if (port_max) { - action->ct.range.port.max = nl_attr_get_be16(port_max); -+ if (action->ct.range.port.min == -+ action->ct.range.port.max) { -+ action->ct.range.port.max = 0; -+ } - } - } - } -@@ -1702,6 +1708,9 @@ static const struct nl_policy stats_policy[] = { - [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC, - .min_len = sizeof(struct gnet_stats_basic), - .optional = false, }, -+ [TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC, -+ .min_len = sizeof(struct gnet_stats_basic), -+ .optional = true, }, - }; - - static int -@@ -1714,8 +1723,11 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, - const char *act_kind; - struct nlattr *action_attrs[ARRAY_SIZE(act_policy)]; - struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)]; -- struct ovs_flow_stats *stats = &flower->stats; -- const struct gnet_stats_basic *bs; -+ struct ovs_flow_stats *stats_sw = &flower->stats_sw; -+ struct ovs_flow_stats *stats_hw = &flower->stats_hw; -+ const struct gnet_stats_basic *bs_all = NULL; -+ const struct gnet_stats_basic *bs_hw = NULL; -+ struct gnet_stats_basic bs_sw = { .packets = 0, .bytes = 0, }; - int err = 0; - - if (!nl_parse_nested(action, act_policy, action_attrs, -@@ -1771,10 +1783,26 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, - return EPROTO; - } - -- bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs); -- if (bs->packets) { -- put_32aligned_u64(&stats->n_packets, bs->packets); -- put_32aligned_u64(&stats->n_bytes, bs->bytes); -+ bs_all = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs_all); -+ if (stats_attrs[TCA_STATS_BASIC_HW]) { -+ bs_hw = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC_HW], -+ sizeof *bs_hw); -+ -+ bs_sw.packets = bs_all->packets - bs_hw->packets; -+ bs_sw.bytes = bs_all->bytes - bs_hw->bytes; -+ } else { -+ bs_sw.packets = bs_all->packets; -+ bs_sw.bytes = bs_all->bytes; -+ } -+ -+ if (bs_sw.packets > get_32aligned_u64(&stats_sw->n_packets)) { -+ put_32aligned_u64(&stats_sw->n_packets, bs_sw.packets); -+ put_32aligned_u64(&stats_sw->n_bytes, bs_sw.bytes); -+ } -+ -+ if (bs_hw && bs_hw->packets > get_32aligned_u64(&stats_hw->n_packets)) { -+ put_32aligned_u64(&stats_hw->n_packets, bs_hw->packets); -+ put_32aligned_u64(&stats_hw->n_bytes, bs_hw->bytes); - } - - return 0; -@@ -2399,14 +2427,14 @@ nl_msg_put_act_flags(struct ofpbuf *request) { - * first_word_mask/last_word_mask - the mask to use for the first/last read - * (as we read entire words). */ - static void --calc_offsets(struct tc_flower *flower, struct flower_key_to_pedit *m, -+calc_offsets(struct tc_action *action, struct flower_key_to_pedit *m, - int *cur_offset, int *cnt, ovs_be32 *last_word_mask, - ovs_be32 *first_word_mask, ovs_be32 **mask, ovs_be32 **data) - { - int start_offset, max_offset, total_size; - int diff, right_zero_bits, left_zero_bits; -- char *rewrite_key = (void *) &flower->rewrite.key; -- char *rewrite_mask = (void *) &flower->rewrite.mask; -+ char *rewrite_key = (void *) &action->rewrite.key; -+ char *rewrite_mask = (void *) &action->rewrite.mask; - - max_offset = m->offset + m->size; - start_offset = ROUND_DOWN(m->offset, 4); -@@ -2473,7 +2501,8 @@ csum_update_flag(struct tc_flower *flower, - - static int - nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, -- struct tc_flower *flower) -+ struct tc_flower *flower, -+ struct tc_action *action) - { - struct { - struct tc_pedit sel; -@@ -2497,7 +2526,7 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, - continue; - } - -- calc_offsets(flower, m, &cur_offset, &cnt, &last_word_mask, -+ calc_offsets(action, m, &cur_offset, &cnt, &last_word_mask, - &first_word_mask, &mask, &data); - - for (j = 0; j < cnt; j++, mask++, data++, cur_offset += 4) { -@@ -2545,6 +2574,40 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, - return 0; - } - -+static void -+nl_msg_put_flower_acts_release(struct ofpbuf *request, uint16_t act_index) -+{ -+ size_t act_offset; -+ -+ act_offset = nl_msg_start_nested(request, act_index); -+ nl_msg_put_act_tunnel_key_release(request); -+ nl_msg_put_act_flags(request); -+ nl_msg_end_nested(request, act_offset); -+} -+ -+/* Aggregates all previous successive pedit actions csum_update_flags -+ * to flower->csum_update_flags. Only append one csum action to the -+ * last pedit action. */ -+static void -+nl_msg_put_csum_act(struct ofpbuf *request, struct tc_flower *flower, -+ uint16_t *act_index) -+{ -+ size_t act_offset; -+ -+ /* No pedit actions or processed already. */ -+ if (!flower->csum_update_flags) { -+ return; -+ } -+ -+ act_offset = nl_msg_start_nested(request, (*act_index)++); -+ nl_msg_put_act_csum(request, flower->csum_update_flags); -+ nl_msg_put_act_flags(request); -+ nl_msg_end_nested(request, act_offset); -+ -+ /* Clear it. So we can have another series of pedit actions. */ -+ flower->csum_update_flags = 0; -+} -+ - static int - nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) - { -@@ -2561,24 +2624,31 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) - - action = flower->actions; - for (i = 0; i < flower->action_count; i++, action++) { -+ if (action->type != TC_ACT_PEDIT) { -+ nl_msg_put_csum_act(request, flower, &act_index); -+ } - switch (action->type) { - case TC_ACT_PEDIT: { - act_offset = nl_msg_start_nested(request, act_index++); -- error = nl_msg_put_flower_rewrite_pedits(request, flower); -+ error = nl_msg_put_flower_rewrite_pedits(request, flower, -+ action); - if (error) { - return error; - } - nl_msg_end_nested(request, act_offset); - -- if (flower->csum_update_flags) { -- act_offset = nl_msg_start_nested(request, act_index++); -- nl_msg_put_act_csum(request, flower->csum_update_flags); -- nl_msg_put_act_flags(request); -- nl_msg_end_nested(request, act_offset); -+ if (i == flower->action_count - 1) { -+ /* If this is the last action check csum calc again. */ -+ nl_msg_put_csum_act(request, flower, &act_index); - } - } - break; - case TC_ACT_ENCAP: { -+ if (!released && flower->tunnel) { -+ nl_msg_put_flower_acts_release(request, act_index++); -+ released = true; -+ } -+ - act_offset = nl_msg_start_nested(request, act_index++); - nl_msg_put_act_tunnel_key_set(request, action->encap.id_present, - action->encap.id, -@@ -2636,10 +2706,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) - break; - case TC_ACT_OUTPUT: { - if (!released && flower->tunnel) { -- act_offset = nl_msg_start_nested(request, act_index++); -- nl_msg_put_act_tunnel_key_release(request); -- nl_msg_put_act_flags(request); -- nl_msg_end_nested(request, act_offset); -+ nl_msg_put_flower_acts_release(request, act_index++); - released = true; - } - -@@ -2901,13 +2968,13 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) - FLOWER_PUT_MASKED_VALUE(icmp_code, TCA_FLOWER_KEY_ICMPV6_CODE); - FLOWER_PUT_MASKED_VALUE(icmp_type, TCA_FLOWER_KEY_ICMPV6_TYPE); - } -- -- FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE); -- FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE); -- FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK); -- FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS); - } - -+ FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE); -+ FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE); -+ FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK); -+ FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS); -+ - if (host_eth_type == ETH_P_IP) { - FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_src, TCA_FLOWER_KEY_IPV4_SRC); - FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_dst, TCA_FLOWER_KEY_IPV4_DST); -@@ -2980,12 +3047,79 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) - return 0; - } - -+static void -+log_tc_flower_match(const char *msg, -+ const struct tc_flower *a, -+ const struct tc_flower *b) -+{ -+ uint8_t key_a[sizeof(struct tc_flower_key)]; -+ uint8_t key_b[sizeof(struct tc_flower_key)]; -+ struct ds s = DS_EMPTY_INITIALIZER; -+ -+ for (int i = 0; i < sizeof a->key; i++) { -+ uint8_t mask_a = ((uint8_t *) &a->mask)[i]; -+ uint8_t mask_b = ((uint8_t *) &b->mask)[i]; -+ -+ key_a[i] = ((uint8_t *) &a->key)[i] & mask_a; -+ key_b[i] = ((uint8_t *) &b->key)[i] & mask_b; -+ } -+ ds_put_cstr(&s, "\nExpected Mask:\n"); -+ ds_put_hex(&s, &a->mask, sizeof a->mask); -+ ds_put_cstr(&s, "\nReceived Mask:\n"); -+ ds_put_hex(&s, &b->mask, sizeof b->mask); -+ ds_put_cstr(&s, "\nExpected Key:\n"); -+ ds_put_hex(&s, &a->key, sizeof a->key); -+ ds_put_cstr(&s, "\nReceived Key:\n"); -+ ds_put_hex(&s, &b->key, sizeof b->key); -+ ds_put_cstr(&s, "\nExpected Masked Key:\n"); -+ ds_put_hex(&s, key_a, sizeof key_a); -+ ds_put_cstr(&s, "\nReceived Masked Key:\n"); -+ ds_put_hex(&s, key_b, sizeof key_b); -+ -+ if (a->action_count != b->action_count) { -+ /* If action count is not equal, we print all actions to see which -+ * ones are missing. */ -+ const struct tc_action *action; -+ int i; -+ -+ ds_put_cstr(&s, "\nExpected Actions:\n"); -+ for (i = 0, action = a->actions; i < a->action_count; i++, action++) { -+ ds_put_cstr(&s, " - "); -+ ds_put_hex(&s, action, sizeof *action); -+ ds_put_cstr(&s, "\n"); -+ } -+ ds_put_cstr(&s, "Received Actions:\n"); -+ for (i = 0, action = b->actions; i < b->action_count; i++, action++) { -+ ds_put_cstr(&s, " - "); -+ ds_put_hex(&s, action, sizeof *action); -+ ds_put_cstr(&s, "\n"); -+ } -+ } else { -+ /* Only dump the delta in actions. */ -+ const struct tc_action *action_a = a->actions; -+ const struct tc_action *action_b = b->actions; -+ -+ for (int i = 0; i < a->action_count; i++, action_a++, action_b++) { -+ if (memcmp(action_a, action_b, sizeof *action_a)) { -+ ds_put_format(&s, -+ "\nAction %d mismatch:\n - Expected Action: ", -+ i); -+ ds_put_hex(&s, action_a, sizeof *action_a); -+ ds_put_cstr(&s, "\n - Received Action: "); -+ ds_put_hex(&s, action_b, sizeof *action_b); -+ } -+ } -+ } -+ VLOG_DBG_RL(&error_rl, "%s%s", msg, ds_cstr(&s)); -+ ds_destroy(&s); -+} -+ - static bool - cmp_tc_flower_match_action(const struct tc_flower *a, - const struct tc_flower *b) - { - if (memcmp(&a->mask, &b->mask, sizeof a->mask)) { -- VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare"); -+ log_tc_flower_match("tc flower compare failed mask compare:", a, b); - return false; - } - -@@ -2998,8 +3132,8 @@ cmp_tc_flower_match_action(const struct tc_flower *a, - uint8_t key_b = ((uint8_t *)&b->key)[i] & mask; - - if (key_a != key_b) { -- VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at " -- "%d", i); -+ log_tc_flower_match("tc flower compare failed masked key compare:", -+ a, b); - return false; - } - } -@@ -3009,14 +3143,15 @@ cmp_tc_flower_match_action(const struct tc_flower *a, - const struct tc_action *action_b = b->actions; - - if (a->action_count != b->action_count) { -- VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check"); -+ log_tc_flower_match("tc flower compare failed action length check", -+ a, b); - return false; - } - - for (int i = 0; i < a->action_count; i++, action_a++, action_b++) { - if (memcmp(action_a, action_b, sizeof *action_a)) { -- VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare " -- "for %d", i); -+ log_tc_flower_match("tc flower compare failed action compare", -+ a, b); - return false; - } - } -diff --git a/lib/tc.h b/lib/tc.h -index a147ca461d..d6cdddd169 100644 ---- a/lib/tc.h -+++ b/lib/tc.h -@@ -256,11 +256,23 @@ struct tc_action { - bool force; - bool commit; - } ct; -+ -+ struct { -+ struct tc_flower_key key; -+ struct tc_flower_key mask; -+ } rewrite; - }; - - enum tc_action_type type; - }; - -+/* assert that if we overflow with a masked write of uint32_t to the last byte -+ * of action.rewrite we overflow inside struct tc_action. -+ * shouldn't happen unless someone moves rewrite to the end of action */ -+BUILD_ASSERT_DECL(offsetof(struct tc_action, rewrite) -+ + MEMBER_SIZEOF(struct tc_action, rewrite) -+ + sizeof(uint32_t) - 2 < sizeof(struct tc_action)); -+ - enum tc_offloaded_state { - TC_OFFLOADED_STATE_UNDEFINED, - TC_OFFLOADED_STATE_IN_HW, -@@ -330,15 +342,10 @@ struct tc_flower { - int action_count; - struct tc_action actions[TCA_ACT_MAX_NUM]; - -- struct ovs_flow_stats stats; -+ struct ovs_flow_stats stats_sw; -+ struct ovs_flow_stats stats_hw; - uint64_t lastused; - -- struct { -- bool rewrite; -- struct tc_flower_key key; -- struct tc_flower_key mask; -- } rewrite; -- - uint32_t csum_update_flags; - - bool tunnel; -@@ -352,13 +359,6 @@ struct tc_flower { - enum tc_offload_policy tc_policy; - }; - --/* assert that if we overflow with a masked write of uint32_t to the last byte -- * of flower.rewrite we overflow inside struct flower. -- * shouldn't happen unless someone moves rewrite to the end of flower */ --BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite) -- + MEMBER_SIZEOF(struct tc_flower, rewrite) -- + sizeof(uint32_t) - 2 < sizeof(struct tc_flower)); -- - int tc_replace_flower(struct tcf_id *id, struct tc_flower *flower); - int tc_del_filter(struct tcf_id *id); - int tc_get_flower(struct tcf_id *id, struct tc_flower *flower); -diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c -index 5bda4af7e0..995c88bf17 100644 ---- a/lib/tnl-neigh-cache.c -+++ b/lib/tnl-neigh-cache.c -@@ -32,6 +32,7 @@ - #include "errno.h" - #include "flow.h" - #include "netdev.h" -+#include "ovs-atomic.h" - #include "ovs-thread.h" - #include "packets.h" - #include "openvswitch/poll-loop.h" -@@ -44,14 +45,13 @@ - #include "openvswitch/vlog.h" - - --/* In seconds */ --#define NEIGH_ENTRY_DEFAULT_IDLE_TIME (15 * 60) -+#define NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS (15 * 60 * 1000) - - struct tnl_neigh_entry { - struct cmap_node cmap_node; - struct in6_addr ip; - struct eth_addr mac; -- time_t expires; /* Expiration time. */ -+ atomic_llong expires; /* Expiration time in ms. */ - char br_name[IFNAMSIZ]; - }; - -@@ -64,6 +64,16 @@ tnl_neigh_hash(const struct in6_addr *ip) - return hash_bytes(ip->s6_addr, 16, 0); - } - -+static bool -+tnl_neigh_expired(struct tnl_neigh_entry *neigh) -+{ -+ long long expires; -+ -+ atomic_read_explicit(&neigh->expires, &expires, memory_order_acquire); -+ -+ return expires <= time_msec(); -+} -+ - static struct tnl_neigh_entry * - tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst) - { -@@ -73,11 +83,13 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst) - hash = tnl_neigh_hash(dst); - CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) { - if (ipv6_addr_equals(&neigh->ip, dst) && !strcmp(neigh->br_name, br_name)) { -- if (neigh->expires <= time_now()) { -+ if (tnl_neigh_expired(neigh)) { - return NULL; - } - -- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME; -+ atomic_store_explicit(&neigh->expires, time_msec() + -+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS, -+ memory_order_release); - return neigh; - } - } -@@ -113,15 +125,16 @@ tnl_neigh_delete(struct tnl_neigh_entry *neigh) - ovsrcu_postpone(neigh_entry_free, neigh); - } - --static void --tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst, -- const struct eth_addr mac) -+void -+tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst, -+ const struct eth_addr mac) - { - ovs_mutex_lock(&mutex); - struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst); - if (neigh) { - if (eth_addr_equals(neigh->mac, mac)) { -- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME; -+ atomic_store_relaxed(&neigh->expires, time_msec() + -+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS); - ovs_mutex_unlock(&mutex); - return; - } -@@ -133,7 +146,8 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst, - - neigh->ip = *dst; - neigh->mac = mac; -- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME; -+ atomic_store_relaxed(&neigh->expires, time_msec() + -+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS); - ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name); - cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip)); - ovs_mutex_unlock(&mutex); -@@ -144,12 +158,12 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst, - const struct eth_addr mac) - { - struct in6_addr dst6 = in6_addr_mapped_ipv4(dst); -- tnl_neigh_set__(name, &dst6, mac); -+ tnl_neigh_set(name, &dst6, mac); - } - - static int - tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc, -- const char name[IFNAMSIZ]) -+ const char name[IFNAMSIZ], bool allow_update) - { - /* Snoop normal ARP replies and gratuitous ARP requests/replies only */ - if (!is_arp(flow) -@@ -159,13 +173,17 @@ tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc, - return EINVAL; - } - -- tnl_arp_set(name, FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src), flow->arp_sha); -+ memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); -+ -+ if (allow_update) { -+ tnl_arp_set(name, flow->nw_src, flow->arp_sha); -+ } - return 0; - } - - static int - tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc, -- const char name[IFNAMSIZ]) -+ const char name[IFNAMSIZ], bool allow_update) - { - if (!is_nd(flow, wc) || flow->tp_src != htons(ND_NEIGHBOR_ADVERT)) { - return EINVAL; -@@ -184,20 +202,22 @@ tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc, - memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst); - memset(&wc->masks.nd_target, 0xff, sizeof wc->masks.nd_target); - -- tnl_neigh_set__(name, &flow->nd_target, flow->arp_tha); -+ if (allow_update) { -+ tnl_neigh_set(name, &flow->nd_target, flow->arp_tha); -+ } - return 0; - } - - int - tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc, -- const char name[IFNAMSIZ]) -+ const char name[IFNAMSIZ], bool allow_update) - { - int res; -- res = tnl_arp_snoop(flow, wc, name); -+ res = tnl_arp_snoop(flow, wc, name, allow_update); - if (res != EINVAL) { - return res; - } -- return tnl_nd_snoop(flow, wc, name); -+ return tnl_nd_snoop(flow, wc, name, allow_update); - } - - void -@@ -208,7 +228,7 @@ tnl_neigh_cache_run(void) - - ovs_mutex_lock(&mutex); - CMAP_FOR_EACH(neigh, cmap_node, &table) { -- if (neigh->expires <= time_now()) { -+ if (tnl_neigh_expired(neigh)) { - tnl_neigh_delete(neigh); - changed = true; - } -@@ -294,7 +314,7 @@ tnl_neigh_cache_add(struct unixctl_conn *conn, int argc OVS_UNUSED, - return; - } - -- tnl_neigh_set__(br_name, &ip6, mac); -+ tnl_neigh_set(br_name, &ip6, mac); - unixctl_command_reply(conn, "OK"); - } - -@@ -319,7 +339,7 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED, - - ds_put_format(&ds, ETH_ADDR_FMT" %s", - ETH_ADDR_ARGS(neigh->mac), neigh->br_name); -- if (neigh->expires <= time_now()) { -+ if (tnl_neigh_expired(neigh)) { - ds_put_format(&ds, " STALE"); - } - ds_put_char(&ds, '\n'); -diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h -index e4b42b0594..877bca3127 100644 ---- a/lib/tnl-neigh-cache.h -+++ b/lib/tnl-neigh-cache.h -@@ -32,7 +32,9 @@ - #include "util.h" - - int tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc, -- const char dev_name[IFNAMSIZ]); -+ const char dev_name[IFNAMSIZ], bool allow_update); -+void tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst, -+ const struct eth_addr mac); - int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr *dst, - struct eth_addr *mac); - void tnl_neigh_cache_init(void); -diff --git a/ofproto/bond.c b/ofproto/bond.c -index a4116588f4..2c0ad5ef84 100644 ---- a/ofproto/bond.c -+++ b/ofproto/bond.c -@@ -1253,7 +1253,7 @@ insert_bal(struct ovs_list *bals, struct bond_member *member) - break; - } - } -- ovs_list_insert(&pos->bal_node, &member->bal_node); -+ ovs_list_insert(pos ? &pos->bal_node : bals, &member->bal_node); - } - - /* Removes 'member' from its current list and then inserts it into 'bals' so -diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c -index 864c136b5d..0f4a61ac6b 100644 ---- a/ofproto/ofproto-dpif-sflow.c -+++ b/ofproto/ofproto-dpif-sflow.c -@@ -468,7 +468,8 @@ sflow_choose_agent_address(const char *agent_device, - const char *target; - SSET_FOR_EACH (target, targets) { - struct sockaddr_storage ss; -- if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss, true)) { -+ if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, -+ &ss, true, NULL)) { - /* sFlow only supports target in default routing table with - * packet mark zero. - */ -diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c -index 1c9c720f04..57f94df544 100644 ---- a/ofproto/ofproto-dpif-upcall.c -+++ b/ofproto/ofproto-dpif-upcall.c -@@ -2971,11 +2971,11 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, - } - ds_put_char(&ds, '\n'); - -- for (i = 0; i < n_revalidators; i++) { -+ for (i = 0; i < udpif->n_revalidators; i++) { - struct revalidator *revalidator = &udpif->revalidators[i]; - int j, elements = 0; - -- for (j = i; j < N_UMAPS; j += n_revalidators) { -+ for (j = i; j < N_UMAPS; j += udpif->n_revalidators) { - elements += cmap_count(&udpif->ukeys[j].cmap); - } - ds_put_format(&ds, " %u: (keys %d)\n", revalidator->id, elements); -diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c -index dcc91cb380..9224ee2e6d 100644 ---- a/ofproto/ofproto-dpif-xlate-cache.c -+++ b/ofproto/ofproto-dpif-xlate-cache.c -@@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) - { - switch (entry->type) { - case XC_TABLE: -+ ofproto_unref(&(entry->table.ofproto->up)); - break; - case XC_RULE: - ofproto_rule_unref(&entry->rule->up); -@@ -231,6 +232,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) - free(entry->learn.ofm); - break; - case XC_NORMAL: -+ ofproto_unref(&(entry->normal.ofproto->up)); - break; - case XC_FIN_TIMEOUT: - /* 'u.fin.rule' is always already held as a XC_RULE, which -diff --git a/ofproto/ofproto-dpif-xlate-cache.h b/ofproto/ofproto-dpif-xlate-cache.h -index 114aff8ea3..0fc6d2ea60 100644 ---- a/ofproto/ofproto-dpif-xlate-cache.h -+++ b/ofproto/ofproto-dpif-xlate-cache.h -@@ -61,9 +61,8 @@ enum xc_type { - * that a flow relates to, although they may be used for other effects as well - * (for instance, refreshing hard timeouts for learned flows). - * -- * An explicit reference is taken to all pointers other than the ones for -- * struct ofproto_dpif. ofproto_dpif pointers are explicitly protected by -- * destroying all xlate caches before the ofproto is destroyed. */ -+ * An explicit reference is taken to all pointers. -+ */ - struct xc_entry { - enum xc_type type; - union { -diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c -index a426fcfeb6..b8886105df 100644 ---- a/ofproto/ofproto-dpif-xlate.c -+++ b/ofproto/ofproto-dpif-xlate.c -@@ -460,7 +460,7 @@ static void xlate_commit_actions(struct xlate_ctx *ctx); - - static void - patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, -- struct xport *out_dev); -+ struct xport *out_dev, bool is_last_action); - - static void - ctx_trigger_freeze(struct xlate_ctx *ctx) -@@ -865,7 +865,7 @@ xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge) - ovs_list_init(&xbridge->xbundles); - hmap_init(&xbridge->xports); - hmap_insert(&xcfg->xbridges, &xbridge->hmap_node, -- hash_pointer(xbridge->ofproto, 0)); -+ uuid_hash(&xbridge->ofproto->uuid)); - } - - static void -@@ -1639,7 +1639,7 @@ xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto) - - xbridges = &xcfg->xbridges; - -- HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0), -+ HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, uuid_hash(&ofproto->uuid), - xbridges) { - if (xbridge->ofproto == ofproto) { - return xbridge; -@@ -1661,6 +1661,23 @@ xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid) - return NULL; - } - -+struct ofproto_dpif * -+xlate_ofproto_lookup(const struct uuid *uuid) -+{ -+ struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); -+ struct xbridge *xbridge; -+ -+ if (!xcfg) { -+ return NULL; -+ } -+ -+ xbridge = xbridge_lookup_by_uuid(xcfg, uuid); -+ if (xbridge != NULL) { -+ return xbridge->ofproto; -+ } -+ return NULL; -+} -+ - static struct xbundle * - xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle) - { -@@ -2125,9 +2142,14 @@ mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle, - int snaplen; - - /* Get the details of the mirror represented by the rightmost 1-bit. */ -- ovs_assert(mirror_get(xbridge->mbridge, raw_ctz(mirrors), -- &vlans, &dup_mirrors, -- &out, &snaplen, &out_vlan)); -+ if (OVS_UNLIKELY(!mirror_get(xbridge->mbridge, raw_ctz(mirrors), -+ &vlans, &dup_mirrors, -+ &out, &snaplen, &out_vlan))) { -+ /* The mirror got reconfigured before we got to read it's -+ * configuration. */ -+ mirrors = zero_rightmost_1bit(mirrors); -+ continue; -+ } - - - /* If this mirror selects on the basis of VLAN, and it does not select -@@ -3015,7 +3037,7 @@ xlate_normal(struct xlate_ctx *ctx) - bool is_grat_arp = is_gratuitous_arp(flow, wc); - if (ctx->xin->allow_side_effects - && flow->packet_type == htonl(PT_ETH) -- && in_port->pt_mode != NETDEV_PT_LEGACY_L3 -+ && in_port && in_port->pt_mode != NETDEV_PT_LEGACY_L3 - ) { - update_learning_table(ctx, in_xbundle, flow->dl_src, vlan, - is_grat_arp); -@@ -3024,12 +3046,14 @@ xlate_normal(struct xlate_ctx *ctx) - struct xc_entry *entry; - - /* Save just enough info to update mac learning table later. */ -- entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL); -- entry->normal.ofproto = ctx->xbridge->ofproto; -- entry->normal.in_port = flow->in_port.ofp_port; -- entry->normal.dl_src = flow->dl_src; -- entry->normal.vlan = vlan; -- entry->normal.is_gratuitous_arp = is_grat_arp; -+ if (ofproto_try_ref(&ctx->xbridge->ofproto->up)) { -+ entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL); -+ entry->normal.ofproto = ctx->xbridge->ofproto; -+ entry->normal.in_port = flow->in_port.ofp_port; -+ entry->normal.dl_src = flow->dl_src; -+ entry->normal.vlan = vlan; -+ entry->normal.is_gratuitous_arp = is_grat_arp; -+ } - } - - /* Determine output bundle. */ -@@ -3048,7 +3072,6 @@ xlate_normal(struct xlate_ctx *ctx) - */ - ctx->xout->slow |= SLOW_ACTION; - -- memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); - if (mcast_snooping_is_membership(flow->tp_src) || - mcast_snooping_is_query(flow->tp_src)) { - if (ctx->xin->allow_side_effects && ctx->xin->packet) { -@@ -3272,7 +3295,9 @@ compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port) - struct dpif_ipfix *ipfix = ctx->xbridge->ipfix; - odp_port_t tunnel_out_port = ODPP_NONE; - -- if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) { -+ if (!ipfix || -+ (output_odp_port == ODPP_NONE && -+ ctx->xin->flow.in_port.ofp_port == OFPP_NONE)) { - return; - } - -@@ -3521,6 +3546,9 @@ propagate_tunnel_data_to_flow__(struct flow *dst_flow, - dst_flow->dl_dst = dmac; - dst_flow->dl_src = smac; - -+ /* Clear VLAN entries which do not apply for tunnel flows. */ -+ memset(dst_flow->vlans, 0, sizeof dst_flow->vlans); -+ - dst_flow->packet_type = htonl(PT_ETH); - dst_flow->nw_dst = src_flow->tunnel.ip_dst; - dst_flow->nw_src = src_flow->tunnel.ip_src; -@@ -3598,7 +3626,7 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac, - static int - native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport, - const struct flow *flow, odp_port_t tunnel_odp_port, -- bool truncate) -+ bool truncate, bool is_last_action) - { - struct netdev_tnl_build_header_params tnl_params; - struct ovs_action_push_tnl tnl_push_data; -@@ -3728,7 +3756,7 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport, - entry->tunnel_hdr.hdr_size = tnl_push_data.header_len; - entry->tunnel_hdr.operation = ADD; - -- patch_port_output(ctx, xport, out_dev); -+ patch_port_output(ctx, xport, out_dev, is_last_action); - - /* Similar to the stats update in revalidation, the x_cache entries - * are populated by the previous translation are used to update the -@@ -3822,7 +3850,7 @@ xlate_flow_is_protected(const struct xlate_ctx *ctx, const struct flow *flow, co - */ - static void - patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, -- struct xport *out_dev) -+ struct xport *out_dev, bool is_last_action) - { - struct flow *flow = &ctx->xin->flow; - struct flow old_flow = ctx->xin->flow; -@@ -3864,8 +3892,9 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, - if (!process_special(ctx, out_dev) && may_receive(out_dev, ctx)) { - if (xport_stp_forward_state(out_dev) && - xport_rstp_forward_state(out_dev)) { -+ - xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true, -- false, true, clone_xlate_actions); -+ false, is_last_action, clone_xlate_actions); - if (!ctx->freezing) { - xlate_action_set(ctx); - } -@@ -3880,7 +3909,7 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, - mirror_mask_t old_mirrors2 = ctx->mirrors; - - xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true, -- false, true, clone_xlate_actions); -+ false, is_last_action, clone_xlate_actions); - ctx->mirrors = old_mirrors2; - ctx->base_flow = old_base_flow; - ctx->odp_actions->size = old_size; -@@ -4097,7 +4126,21 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow, - (flow->dl_type == htons(ETH_TYPE_ARP) || - flow->nw_proto == IPPROTO_ICMPV6) && - is_neighbor_reply_correct(ctx, flow)) { -- tnl_neigh_snoop(flow, wc, ctx->xbridge->name); -+ tnl_neigh_snoop(flow, wc, ctx->xbridge->name, -+ ctx->xin->allow_side_effects); -+ } else if (*tnl_port != ODPP_NONE && -+ ctx->xin->allow_side_effects && -+ dl_type_is_ip_any(flow->dl_type)) { -+ struct eth_addr mac = flow->dl_src; -+ struct in6_addr s_ip6; -+ -+ if (flow->dl_type == htons(ETH_TYPE_IP)) { -+ in6_addr_set_mapped_ipv4(&s_ip6, flow->nw_src); -+ } else { -+ s_ip6 = flow->ipv6_src; -+ } -+ -+ tnl_neigh_set(ctx->xbridge->name, &s_ip6, mac); - } - } - -@@ -4107,7 +4150,7 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow, - static void - compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, - const struct xlate_bond_recirc *xr, bool check_stp, -- bool is_last_action OVS_UNUSED, bool truncate) -+ bool is_last_action, bool truncate) - { - const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port); - struct flow_wildcards *wc = ctx->wc; -@@ -4137,6 +4180,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, - if (xport->pt_mode == NETDEV_PT_LEGACY_L3) { - flow->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE, - ntohs(flow->dl_type)); -+ if (ctx->pending_encap) { -+ /* The Ethernet header was not actually added yet. */ -+ ctx->pending_encap = false; -+ } - } - } - -@@ -4144,7 +4191,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, - if (truncate) { - xlate_report_error(ctx, "Cannot truncate output to patch port"); - } -- patch_port_output(ctx, xport, xport->peer); -+ patch_port_output(ctx, xport, xport->peer, is_last_action); - return; - } - -@@ -4239,7 +4286,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, - xr->recirc_id); - } else if (is_native_tunnel) { - /* Output to native tunnel port. */ -- native_tunnel_output(ctx, xport, flow, odp_port, truncate); -+ native_tunnel_output(ctx, xport, flow, odp_port, truncate, -+ is_last_action); - flow->tunnel = flow_tnl; /* Restore tunnel metadata */ - - } else if (terminate_native_tunnel(ctx, flow, wc, -@@ -6177,11 +6225,32 @@ static void - compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, - bool is_last_action) - { -- ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label; -- uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark; -- size_t ct_offset; - uint16_t zone; -+ if (ofc->zone_src.field) { -+ union mf_subvalue value; -+ memset(&value, 0xff, sizeof(value)); -+ -+ zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow); -+ if (ctx->xin->frozen_state) { -+ /* If the upcall is a resume of a recirculation, we only need to -+ * unwildcard the fields that are not in the frozen_metadata, as -+ * when the rules update, OVS will generate a new recirc_id, -+ * which will invalidate the megaflow with old the recirc_id. -+ */ -+ if (!mf_is_frozen_metadata(ofc->zone_src.field)) { -+ mf_write_subfield_flow(&ofc->zone_src, &value, -+ &ctx->wc->masks); -+ } -+ } else { -+ mf_write_subfield_flow(&ofc->zone_src, &value, &ctx->wc->masks); -+ } -+ } else { -+ zone = ofc->zone_imm; -+ } - -+ size_t ct_offset; -+ ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label; -+ uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark; - /* Ensure that any prior actions are applied before composing the new - * conntrack action. */ - xlate_commit_actions(ctx); -@@ -6193,11 +6262,6 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, - do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx, - is_last_action, false); - -- if (ofc->zone_src.field) { -- zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow); -- } else { -- zone = ofc->zone_imm; -- } - - ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT); - if (ofc->flags & NX_CT_F_COMMIT) { -@@ -6333,6 +6397,7 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, - * then ctx->exit would be true. Reset to false so that we can - * do flow translation for 'IF_LESS_EQUAL' case. finish_freezing() - * would have taken care of Undoing the changes done for freeze. */ -+ bool old_exit = ctx->exit; - ctx->exit = false; - - offset_attr = nl_msg_start_nested( -@@ -6357,7 +6422,7 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, - ctx->was_mpls = old_was_mpls; - ctx->conntracked = old_conntracked; - ctx->xin->flow = old_flow; -- ctx->exit = true; -+ ctx->exit = old_exit; - } - - static void -@@ -6738,13 +6803,14 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, - return; - } - -+ bool exit = false; - OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { - struct ofpact_controller *controller; - const struct ofpact_metadata *metadata; - const struct ofpact_set_field *set_field; - const struct mf_field *mf; - bool last = is_last_action && ofpact_last(a, ofpacts, ofpacts_len) -- && ctx->action_set.size; -+ && !ctx->action_set.size; - - if (ctx->error) { - break; -@@ -6752,7 +6818,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, - - recirc_for_mpls(a, ctx); - -- if (ctx->exit) { -+ if (ctx->exit || exit) { - /* Check if need to store the remaining actions for later - * execution. */ - if (ctx->freezing) { -@@ -7149,17 +7215,18 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, - break; - - case OFPACT_CHECK_PKT_LARGER: { -- if (last) { -- /* If this is last action, then there is no need to -- * translate the action. */ -- break; -- } - const struct ofpact *remaining_acts = ofpact_next(a); - size_t remaining_acts_len = ofpact_remaining_len(remaining_acts, - ofpacts, - ofpacts_len); - xlate_check_pkt_larger(ctx, ofpact_get_CHECK_PKT_LARGER(a), - remaining_acts, remaining_acts_len); -+ if (ctx->xbridge->support.check_pkt_len) { -+ /* If datapath supports check_pkt_len, then -+ * xlate_check_pkt_larger() does the translation for the -+ * ofpacts following 'a'. */ -+ exit = true; -+ } - break; - } - } -@@ -7623,6 +7690,12 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) - goto exit; - } - -+ if (!xin->frozen_state -+ && xin->flow.ct_state -+ && xin->flow.ct_state & CS_TRACKED) { -+ ctx.conntracked = true; -+ } -+ - /* Tunnel metadata in udpif format must be normalized before translation. */ - if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) { - const struct tun_table *tun_tab = ofproto_get_tun_tab( -diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h -index 851088d794..2ba90e999c 100644 ---- a/ofproto/ofproto-dpif-xlate.h -+++ b/ofproto/ofproto-dpif-xlate.h -@@ -176,6 +176,7 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name, struct dpif *, - bool forward_bpdu, bool has_in_band, - const struct dpif_backer_support *support); - void xlate_remove_ofproto(struct ofproto_dpif *); -+struct ofproto_dpif *xlate_ofproto_lookup(const struct uuid *uuid); - - void xlate_bundle_set(struct ofproto_dpif *, struct ofbundle *, - const char *name, enum port_vlan_mode, -diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c -index cba49a99e1..ed92d3731f 100644 ---- a/ofproto/ofproto-dpif.c -+++ b/ofproto/ofproto-dpif.c -@@ -215,10 +215,6 @@ struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers); - static struct hmap all_ofproto_dpifs_by_name = - HMAP_INITIALIZER(&all_ofproto_dpifs_by_name); - --/* All existing ofproto_dpif instances, indexed by ->uuid. */ --static struct hmap all_ofproto_dpifs_by_uuid = -- HMAP_INITIALIZER(&all_ofproto_dpifs_by_uuid); -- - static bool ofproto_use_tnl_push_pop = true; - static void ofproto_unixctl_init(void); - static void ct_zone_config_init(struct dpif_backer *backer); -@@ -1682,9 +1678,6 @@ construct(struct ofproto *ofproto_) - hmap_insert(&all_ofproto_dpifs_by_name, - &ofproto->all_ofproto_dpifs_by_name_node, - hash_string(ofproto->up.name, 0)); -- hmap_insert(&all_ofproto_dpifs_by_uuid, -- &ofproto->all_ofproto_dpifs_by_uuid_node, -- uuid_hash(&ofproto->uuid)); - memset(&ofproto->stats, 0, sizeof ofproto->stats); - - ofproto_init_tables(ofproto_, N_TABLES); -@@ -1782,8 +1775,6 @@ destruct(struct ofproto *ofproto_, bool del) - - hmap_remove(&all_ofproto_dpifs_by_name, - &ofproto->all_ofproto_dpifs_by_name_node); -- hmap_remove(&all_ofproto_dpifs_by_uuid, -- &ofproto->all_ofproto_dpifs_by_uuid_node); - - OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { - CLS_FOR_EACH (rule, up.cr, &table->cls) { -@@ -1819,6 +1810,8 @@ destruct(struct ofproto *ofproto_, bool del) - - seq_destroy(ofproto->ams_seq); - -+ /* Wait for all the meter destroy work to finish. */ -+ ovsrcu_barrier(); - close_dpif_backer(ofproto->backer, del); - } - -@@ -2333,6 +2326,12 @@ set_ipfix( - dpif_ipfix_unref(di); - ofproto->ipfix = NULL; - } -+ -+ /* TODO: need to consider ipfix option changes more than -+ * enable/disable */ -+ if (new_di || !ofproto->ipfix) { -+ ofproto->backer->need_revalidate = REV_RECONFIGURE; -+ } - } - - return 0; -@@ -4433,12 +4432,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, - atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig); - } - if (xcache) { -- struct xc_entry *entry; -+ if (ofproto_try_ref(&ofproto->up)) { -+ struct xc_entry *entry; - -- entry = xlate_cache_add_entry(xcache, XC_TABLE); -- entry->table.ofproto = ofproto; -- entry->table.id = *table_id; -- entry->table.match = true; -+ entry = xlate_cache_add_entry(xcache, XC_TABLE); -+ entry->table.ofproto = ofproto; -+ entry->table.id = *table_id; -+ entry->table.match = true; -+ } - } - return rule; - } -@@ -4469,12 +4470,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, - stats->n_packets, &orig); - } - if (xcache) { -- struct xc_entry *entry; -+ if (ofproto_try_ref(&ofproto->up)) { -+ struct xc_entry *entry; - -- entry = xlate_cache_add_entry(xcache, XC_TABLE); -- entry->table.ofproto = ofproto; -- entry->table.id = next_id; -- entry->table.match = (rule != NULL); -+ entry = xlate_cache_add_entry(xcache, XC_TABLE); -+ entry->table.ofproto = ofproto; -+ entry->table.id = next_id; -+ entry->table.match = (rule != NULL); -+ } - } - if (rule) { - goto out; /* Match. */ -@@ -5556,6 +5559,7 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id, - ct_timeout_policy_unref(backer, ct_zone->ct_tp); - ct_zone->ct_tp = ct_tp; - ct_tp->ref_count++; -+ backer->need_revalidate = REV_RECONFIGURE; - } - } else { - struct ct_zone *new_ct_zone = ct_zone_alloc(zone_id); -@@ -5563,6 +5567,7 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id, - cmap_insert(&backer->ct_zones, &new_ct_zone->node, - hash_int(zone_id, 0)); - ct_tp->ref_count++; -+ backer->need_revalidate = REV_RECONFIGURE; - } - } - -@@ -5579,6 +5584,7 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id) - if (ct_zone) { - ct_timeout_policy_unref(backer, ct_zone->ct_tp); - ct_zone_remove_and_destroy(backer, ct_zone); -+ backer->need_revalidate = REV_RECONFIGURE; - } - } - -@@ -5779,15 +5785,7 @@ ofproto_dpif_lookup_by_name(const char *name) - struct ofproto_dpif * - ofproto_dpif_lookup_by_uuid(const struct uuid *uuid) - { -- struct ofproto_dpif *ofproto; -- -- HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node, -- uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) { -- if (uuid_equals(&ofproto->uuid, uuid)) { -- return ofproto; -- } -- } -- return NULL; -+ return xlate_ofproto_lookup(uuid); - } - - static void -@@ -6496,6 +6494,7 @@ ofproto_unixctl_dpif_show_dp_features(struct unixctl_conn *conn, - - dpif_show_support(&ofproto->backer->bt_support, &ds); - unixctl_command_reply(conn, ds_cstr(&ds)); -+ ds_destroy(&ds); - } - - static void -diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h -index 57c7d17cb2..47e96e62e1 100644 ---- a/ofproto/ofproto-provider.h -+++ b/ofproto/ofproto-provider.h -@@ -66,6 +66,7 @@ struct bfd_cfg; - struct meter; - struct ofoperation; - struct ofproto_packet_out; -+struct rule_collection; - struct smap; - - extern struct ovs_mutex ofproto_mutex; -@@ -115,6 +116,9 @@ struct ofproto { - /* List of expirable flows, in all flow tables. */ - struct ovs_list expirable OVS_GUARDED_BY(ofproto_mutex); - -+ /* List of flows to remove from flow tables. */ -+ struct rule_collection *to_remove OVS_GUARDED_BY(ofproto_mutex); -+ - /* Meter table. */ - struct ofputil_meter_features meter_features; - struct hmap meters; /* uint32_t indexed 'struct meter *'. */ -@@ -139,6 +143,8 @@ struct ofproto { - /* Variable length mf_field mapping. Stores all configured variable length - * meta-flow fields (struct mf_field) in a switch. */ - struct vl_mff_map vl_mff_map; -+ /* refcount to this ofproto, held by rule/group/xlate_caches */ -+ struct ovs_refcount refcount; - }; - - void ofproto_init_tables(struct ofproto *, int n_tables); -@@ -1962,6 +1968,7 @@ struct ofproto_flow_mod { - bool modify_may_add_flow; - bool modify_keep_counts; - enum nx_flow_update_event event; -+ uint8_t table_id; - - /* These are only used during commit execution. - * ofproto_flow_mod_uninit() does NOT clean these up. */ -diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c -index bd6103b1c8..7e09a588a2 100644 ---- a/ofproto/ofproto.c -+++ b/ofproto/ofproto.c -@@ -213,6 +213,8 @@ static void ofproto_rule_insert__(struct ofproto *, struct rule *) - OVS_REQUIRES(ofproto_mutex); - static void ofproto_rule_remove__(struct ofproto *, struct rule *) - OVS_REQUIRES(ofproto_mutex); -+static void remove_rules_postponed(struct rule_collection *) -+ OVS_REQUIRES(ofproto_mutex); - - /* The source of an OpenFlow request. - * -@@ -530,6 +532,8 @@ ofproto_create(const char *datapath_name, const char *datapath_type, - hindex_init(&ofproto->cookies); - hmap_init(&ofproto->learned_cookies); - ovs_list_init(&ofproto->expirable); -+ ofproto->to_remove = xzalloc(sizeof *ofproto->to_remove); -+ rule_collection_init(ofproto->to_remove); - ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name); - ofproto->min_mtu = INT_MAX; - cmap_init(&ofproto->groups); -@@ -545,6 +549,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, - - ovs_mutex_init(&ofproto->vl_mff_map.mutex); - cmap_init(&ofproto->vl_mff_map.cmap); -+ ovs_refcount_init(&ofproto->refcount); - - error = ofproto->ofproto_class->construct(ofproto); - if (error) { -@@ -1631,6 +1636,7 @@ ofproto_flush__(struct ofproto *ofproto, bool del) - } - ofproto_group_delete_all__(ofproto); - meter_delete_all(ofproto); -+ remove_rules_postponed(ofproto->to_remove); - /* XXX: Concurrent handler threads may insert new learned flows based on - * learn actions of the now deleted flows right after we release - * 'ofproto_mutex'. */ -@@ -1682,12 +1688,41 @@ ofproto_destroy__(struct ofproto *ofproto) - ovs_assert(hmap_is_empty(&ofproto->learned_cookies)); - hmap_destroy(&ofproto->learned_cookies); - -+ ovs_mutex_lock(&ofproto_mutex); -+ rule_collection_destroy(ofproto->to_remove); -+ free(ofproto->to_remove); -+ ovs_mutex_unlock(&ofproto_mutex); -+ - ofproto->ofproto_class->dealloc(ofproto); - } - --/* Destroying rules is doubly deferred, must have 'ofproto' around for them. -- * - 1st we defer the removal of the rules from the classifier -- * - 2nd we defer the actual destruction of the rules. */ -+/* -+ * Rule destruction requires ofproto to remain accessible. -+ * Depending on the rule destruction call (shown in below), it can take several -+ * RCU grace periods before the ofproto reference is not needed anymore. -+ * The ofproto destruction callback is thus protected by a refcount, -+ * and such destruction is itself deferred. -+ * -+ * remove_rules_postponed (one grace period) -+ * -> remove_rule_rcu -+ * -> remove_rule_rcu__ -+ * -> ofproto_rule_unref -> ref count != 1 -+ * -> ... more grace periods. -+ * -> rule_destroy_cb (> 2 grace periods) -+ * -> free -+ * -+ * NOTE: The original ofproto destruction is only deferred by two grace -+ * periods to keep ofproto accessible. By using refcount together the -+ * destruction can be deferred for longer time. Now ofproto has 3 states: -+ * -+ * state 1: alive, with refcount >= 1 -+ * state 2: dying, with refcount == 0, however pointer is valid -+ * state 3: died, memory freed, pointer might be dangling. -+ * -+ * We only need to add refcount to certain objects whose destruction can -+ * take several RCU grace periods (rule, group, xlate_cache). Other -+ * references to ofproto must be cleared before the 2 RCU grace periods. -+ */ - static void - ofproto_destroy_defer__(struct ofproto *ofproto) - OVS_EXCLUDED(ofproto_mutex) -@@ -1695,6 +1730,26 @@ ofproto_destroy_defer__(struct ofproto *ofproto) - ovsrcu_postpone(ofproto_destroy__, ofproto); - } - -+void -+ofproto_ref(struct ofproto *ofproto) -+{ -+ ovs_refcount_ref(&ofproto->refcount); -+} -+ -+bool -+ofproto_try_ref(struct ofproto *ofproto) -+{ -+ return ovs_refcount_try_ref_rcu(&ofproto->refcount); -+} -+ -+void -+ofproto_unref(struct ofproto *ofproto) -+{ -+ if (ofproto && ovs_refcount_unref(&ofproto->refcount) == 1) { -+ ovsrcu_postpone(ofproto_destroy_defer__, ofproto); -+ } -+} -+ - void - ofproto_destroy(struct ofproto *p, bool del) - OVS_EXCLUDED(ofproto_mutex) -@@ -1726,8 +1781,7 @@ ofproto_destroy(struct ofproto *p, bool del) - p->connmgr = NULL; - ovs_mutex_unlock(&ofproto_mutex); - -- /* Destroying rules is deferred, must have 'ofproto' around for them. */ -- ovsrcu_postpone(ofproto_destroy_defer__, p); -+ ofproto_unref(p); - } - - /* Destroys the datapath with the respective 'name' and 'type'. With the Linux -@@ -1878,6 +1932,9 @@ ofproto_run(struct ofproto *p) - - connmgr_run(p->connmgr, handle_openflow); - -+ ovs_mutex_lock(&ofproto_mutex); -+ remove_rules_postponed(p->to_remove); -+ ovs_mutex_unlock(&ofproto_mutex); - return error; - } - -@@ -2916,6 +2973,9 @@ ofproto_rule_destroy__(struct rule *rule) - cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr)); - rule_actions_destroy(rule_get_actions(rule)); - ovs_mutex_destroy(&rule->mutex); -+ /* ofproto_unref() must be called first. It is possible because ofproto -+ * destruction is deferred by an RCU grace period. */ -+ ofproto_unref(rule->ofproto); - rule->ofproto->ofproto_class->rule_dealloc(rule); - } - -@@ -3056,6 +3116,9 @@ group_destroy_cb(struct ofgroup *group) - &group->props)); - ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *, - &group->buckets)); -+ /* ofproto_unref() must be called first. It is possible because ofproto -+ * destruction is deferred by an RCU grace period. */ -+ ofproto_unref(group->ofproto); - group->ofproto->ofproto_class->group_dealloc(group); - } - -@@ -4437,6 +4500,20 @@ rule_criteria_destroy(struct rule_criteria *criteria) - criteria->version = OVS_VERSION_NOT_REMOVED; /* Mark as destroyed. */ - } - -+/* Adds rules to the 'to_remove' collection, so they can be destroyed -+ * later all together. Destroys 'rules'. */ -+static void -+rules_mark_for_removal(struct ofproto *ofproto, struct rule_collection *rules) -+ OVS_REQUIRES(ofproto_mutex) -+{ -+ struct rule *rule; -+ -+ RULE_COLLECTION_FOR_EACH (rule, rules) { -+ rule_collection_add(ofproto->to_remove, rule); -+ } -+ rule_collection_destroy(rules); -+} -+ - /* Schedules postponed removal of rules, destroys 'rules'. */ - static void - remove_rules_postponed(struct rule_collection *rules) -@@ -5244,10 +5321,15 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr, - struct rule *rule; - enum ofperr error; - -+ if (!ofproto_try_ref(ofproto)) { -+ return OFPERR_OFPFMFC_UNKNOWN; -+ } -+ - /* Allocate new rule. */ - rule = ofproto->ofproto_class->rule_alloc(); - if (!rule) { - cls_rule_destroy(cr); -+ ofproto_unref(ofproto); - VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); - return OFPERR_OFPFMFC_UNKNOWN; - } -@@ -5833,7 +5915,7 @@ modify_flows_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, - } - } - learned_cookies_flush(ofproto, &dead_cookies); -- remove_rules_postponed(old_rules); -+ rules_mark_for_removal(ofproto, old_rules); - } - - return error; -@@ -5941,7 +6023,7 @@ delete_flows_finish__(struct ofproto *ofproto, - learned_cookies_dec(ofproto, rule_get_actions(rule), - &dead_cookies); - } -- remove_rules_postponed(rules); -+ rules_mark_for_removal(ofproto, rules); - - learned_cookies_flush(ofproto, &dead_cookies); - } -@@ -7312,8 +7394,13 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm, - return OFPERR_OFPGMFC_BAD_TYPE; - } - -+ if (!ofproto_try_ref(ofproto)) { -+ return OFPERR_OFPFMFC_UNKNOWN; -+ } -+ - *ofgroup = ofproto->ofproto_class->group_alloc(); - if (!*ofgroup) { -+ ofproto_unref(ofproto); - VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name); - return OFPERR_OFPGMFC_OUT_OF_GROUPS; - } -@@ -7350,6 +7437,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm, - &(*ofgroup)->props)); - ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *, - &(*ofgroup)->buckets)); -+ ofproto_unref(ofproto); - ofproto->ofproto_class->group_dealloc(*ofgroup); - } - return error; -@@ -7967,6 +8055,7 @@ ofproto_flow_mod_init(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, - ofm->criteria.version = OVS_VERSION_NOT_REMOVED; - ofm->conjs = NULL; - ofm->n_conjs = 0; -+ ofm->table_id = fm->table_id; - - bool check_buffer_id = false; - -@@ -8104,6 +8193,33 @@ ofproto_flow_mod_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, - return error; - } - -+static void -+ofproto_table_classifier_defer(struct ofproto *ofproto, -+ const struct ofproto_flow_mod *ofm) -+{ -+ if (check_table_id(ofproto, ofm->table_id)) { -+ if (ofm->table_id == OFPTT_ALL) { -+ struct oftable *table; -+ -+ OFPROTO_FOR_EACH_TABLE (table, ofproto) { -+ classifier_defer(&table->cls); -+ } -+ } else { -+ classifier_defer(&ofproto->tables[ofm->table_id].cls); -+ } -+ } -+} -+ -+static void -+ofproto_publish_classifiers(struct ofproto *ofproto) -+{ -+ struct oftable *table; -+ -+ OFPROTO_FOR_EACH_TABLE (table, ofproto) { -+ classifier_publish(&table->cls); -+ } -+} -+ - /* Commit phases (all while locking ofproto_mutex): - * - * 1. Begin: Gather resources and make changes visible in the next version. -@@ -8165,6 +8281,10 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) - /* Store the version in which the changes should take - * effect. */ - be->ofm.version = version; -+ /* Publishing of the classifier update for every flow -+ * modification in a bundle separately is expensive in -+ * CPU time and memory. Deferring. */ -+ ofproto_table_classifier_defer(ofproto, &be->ofm); - error = ofproto_flow_mod_start(ofproto, &be->ofm); - } else if (be->type == OFPTYPE_GROUP_MOD) { - /* Store the version in which the changes should take -@@ -8173,6 +8293,9 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) - error = ofproto_group_mod_start(ofproto, &be->ogm); - } else if (be->type == OFPTYPE_PACKET_OUT) { - be->opo.version = version; -+ /* Need to use current version of flows for packet-out, -+ * so publishing all classifiers now. */ -+ ofproto_publish_classifiers(ofproto); - error = ofproto_packet_out_start(ofproto, &be->opo); - } else { - OVS_NOT_REACHED(); -@@ -8183,6 +8306,9 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) - } - } - -+ /* Publishing all changes made to classifiers. */ -+ ofproto_publish_classifiers(ofproto); -+ - if (error) { - /* Send error referring to the original message. */ - ofconn_send_error(ofconn, be->msg, error); -@@ -8191,14 +8317,23 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) - /* 2. Revert. Undo all the changes made above. */ - LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) { - if (be->type == OFPTYPE_FLOW_MOD) { -+ /* Publishing of the classifier update for every flow -+ * modification in a bundle separately is expensive in -+ * CPU time and memory. Deferring. */ -+ ofproto_table_classifier_defer(ofproto, &be->ofm); - ofproto_flow_mod_revert(ofproto, &be->ofm); - } else if (be->type == OFPTYPE_GROUP_MOD) { - ofproto_group_mod_revert(ofproto, &be->ogm); - } else if (be->type == OFPTYPE_PACKET_OUT) { -+ /* Need to use current version of flows for packet-out, -+ * so publishing all classifiers now. */ -+ ofproto_publish_classifiers(ofproto); - ofproto_packet_out_revert(ofproto, &be->opo); - } - /* Nothing needs to be reverted for a port mod. */ - } -+ /* Publishing all changes made to classifiers. */ -+ ofproto_publish_classifiers(ofproto); - } else { - /* 4. Finish. */ - LIST_FOR_EACH (be, node, &bundle->msg_list) { -diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h -index b0262da2df..4e15167ab7 100644 ---- a/ofproto/ofproto.h -+++ b/ofproto/ofproto.h -@@ -563,6 +563,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *, - enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *, - uint8_t table_id); - -+void ofproto_ref(struct ofproto *); -+void ofproto_unref(struct ofproto *); -+bool ofproto_try_ref(struct ofproto *); -+ - #ifdef __cplusplus - } - #endif -diff --git a/ovsdb/file.c b/ovsdb/file.c -index 59220824fa..9f44007d97 100644 ---- a/ovsdb/file.c -+++ b/ovsdb/file.c -@@ -113,19 +113,17 @@ ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting, - if (row_contains_diff - && !ovsdb_datum_is_default(&row->fields[column->index], - &column->type)) { -- struct ovsdb_datum new_datum; -- -- error = ovsdb_datum_apply_diff(&new_datum, -+ error = ovsdb_datum_apply_diff_in_place( - &row->fields[column->index], - &datum, &column->type); - ovsdb_datum_destroy(&datum, &column->type); - if (error) { - return error; - } -- ovsdb_datum_swap(&datum, &new_datum); -+ } else { -+ ovsdb_datum_swap(&row->fields[column->index], &datum); -+ ovsdb_datum_destroy(&datum, &column->type); - } -- ovsdb_datum_swap(&row->fields[column->index], &datum); -- ovsdb_datum_destroy(&datum, &column->type); - } - - return NULL; -diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c -index 532dedcb64..ab814cf20e 100644 ---- a/ovsdb/monitor.c -+++ b/ovsdb/monitor.c -@@ -1231,6 +1231,15 @@ ovsdb_monitor_get_update( - condition, - ovsdb_monitor_compose_row_update2); - if (!condition || !condition->conditional) { -+ if (json) { -+ struct json *json_serialized; -+ -+ /* Pre-serializing the object to avoid doing this -+ * for every client. */ -+ json_serialized = json_serialized_object_create(json); -+ json_destroy(json); -+ json = json_serialized; -+ } - ovsdb_monitor_json_cache_insert(dbmon, version, mcs, - json); - } -diff --git a/ovsdb/mutation.c b/ovsdb/mutation.c -index 56edc5f000..03d1c3499e 100644 ---- a/ovsdb/mutation.c -+++ b/ovsdb/mutation.c -@@ -383,7 +383,7 @@ ovsdb_mutation_set_execute(struct ovsdb_row *row, - break; - - case OVSDB_M_INSERT: -- ovsdb_datum_union(dst, arg, dst_type, false); -+ ovsdb_datum_union(dst, arg, dst_type); - error = ovsdb_mutation_check_count(dst, dst_type); - break; - -diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in -index 61cded16d3..a2ee10af1b 100755 ---- a/ovsdb/ovsdb-idlc.in -+++ b/ovsdb/ovsdb-idlc.in -@@ -551,20 +551,20 @@ static void - print(" smap_init(&row->%s);" % columnName) - print(" for (size_t i = 0; i < datum->n; i++) {") - print(" smap_add(&row->%s," % columnName) -- print(" datum->keys[i].string,") -- print(" datum->values[i].string);") -+ print(" datum->keys[i].s->string,") -+ print(" datum->values[i].s->string);") - print(" }") - elif (type.n_min == 1 and type.n_max == 1) or type.is_optional_pointer(): - print("") - print(" if (datum->n >= 1) {") - if not type.key.ref_table: -- print(" %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_string())) -+ print(" %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_rvalue_string())) - else: - print(" %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->keys[0].uuid));" % (keyVar, prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower())) - - if valueVar: - if not type.value.ref_table: -- print(" %s = datum->values[0].%s;" % (valueVar, type.value.type.to_string())) -+ print(" %s = datum->values[0].%s;" % (valueVar, type.value.type.to_rvalue_string())) - else: - print(" %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->values[0].uuid));" % (valueVar, prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower())) - print(" } else {") -@@ -592,7 +592,7 @@ static void - """ % (prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower())) - keySrc = "keyRow" - else: -- keySrc = "datum->keys[i].%s" % type.key.type.to_string() -+ keySrc = "datum->keys[i].%s" % type.key.type.to_rvalue_string() - if type.value and type.value.ref_table: - print("""\ - struct %s%s *valueRow = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->values[i].uuid)); -@@ -602,7 +602,7 @@ static void - """ % (prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower())) - valueSrc = "valueRow" - elif valueVar: -- valueSrc = "datum->values[i].%s" % type.value.type.to_string() -+ valueSrc = "datum->values[i].%s" % type.value.type.to_rvalue_string() - print(" if (!row->n_%s) {" % (columnName)) - - print(" %s = xmalloc(%s * sizeof *%s);" % ( -@@ -910,45 +910,45 @@ void - 'args': ', '.join(['%(type)s%(name)s' - % m for m in members])}) - if type.n_min == 1 and type.n_max == 1: -- print(" union ovsdb_atom key;") -+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);") - if type.value: -- print(" union ovsdb_atom value;") -+ print(" union ovsdb_atom *value = xmalloc(sizeof *value);") - print("") - print(" datum.n = 1;") -- print(" datum.keys = &key;") -- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar)) -+ print(" datum.keys = key;") -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar)) - if type.value: -- print(" datum.values = &value;") -- print(" "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar)) -+ print(" datum.values = value;") -+ print(" " + type.value.copyCValue("value->%s" % type.value.type.to_lvalue_string(), valueVar)) - else: - print(" datum.values = NULL;") -- txn_write_func = "ovsdb_idl_txn_write_clone" -+ txn_write_func = "ovsdb_idl_txn_write" - elif type.is_optional_pointer(): -- print(" union ovsdb_atom key;") - print("") - print(" if (%s) {" % keyVar) -+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);") - print(" datum.n = 1;") -- print(" datum.keys = &key;") -- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar)) -+ print(" datum.keys = key;") -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar)) - print(" } else {") - print(" datum.n = 0;") - print(" datum.keys = NULL;") - print(" }") - print(" datum.values = NULL;") -- txn_write_func = "ovsdb_idl_txn_write_clone" -+ txn_write_func = "ovsdb_idl_txn_write" - elif type.n_max == 1: -- print(" union ovsdb_atom key;") - print("") - print(" if (%s) {" % nVar) -+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);") - print(" datum.n = 1;") -- print(" datum.keys = &key;") -- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar)) -+ print(" datum.keys = key;") -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar)) - print(" } else {") - print(" datum.n = 0;") - print(" datum.keys = NULL;") - print(" }") - print(" datum.values = NULL;") -- txn_write_func = "ovsdb_idl_txn_write_clone" -+ txn_write_func = "ovsdb_idl_txn_write" - else: - print("") - print(" datum.n = %s;" % nVar) -@@ -958,9 +958,9 @@ void - else: - print(" datum.values = NULL;") - print(" for (size_t i = 0; i < %s; i++) {" % nVar) -- print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar)) -+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar)) - if type.value: -- print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar)) -+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar)) - print(" }") - if type.value: - valueType = type.value.toAtomicType() -@@ -996,9 +996,8 @@ void - ''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix), - 'valtype':column.type.value.to_const_c_type(prefix), 'S': structName.upper(), - 'C': columnName.upper(), 't': tableName}) -- -- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "new_key")) -- print(" "+ type.value.copyCValue("datum->values[0].%s" % type.value.type.to_string(), "new_value")) -+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "new_key")) -+ print(" " + type.value.copyCValue("datum->values[0].%s" % type.value.type.to_lvalue_string(), "new_value")) - print(''' - ovsdb_idl_txn_write_partial_map(&row->header_, - &%(s)s_col_%(c)s, -@@ -1022,8 +1021,7 @@ void - ''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix), - 'valtype':column.type.value.to_const_c_type(prefix), 'S': structName.upper(), - 'C': columnName.upper(), 't': tableName}) -- -- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "delete_key")) -+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "delete_key")) - print(''' - ovsdb_idl_txn_delete_partial_map(&row->header_, - &%(s)s_col_%(c)s, -@@ -1049,8 +1047,7 @@ void - datum->values = NULL; - ''' % {'s': structName, 'c': columnName, - 'valtype':column.type.key.to_const_c_type(prefix), 't': tableName}) -- -- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "new_value")) -+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "new_value")) - print(''' - ovsdb_idl_txn_write_partial_set(&row->header_, - &%(s)s_col_%(c)s, -@@ -1074,8 +1071,7 @@ void - ''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix), - 'valtype':column.type.key.to_const_c_type(prefix), 'S': structName.upper(), - 'C': columnName.upper(), 't': tableName}) -- -- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "delete_value")) -+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "delete_value")) - print(''' - ovsdb_idl_txn_delete_partial_set(&row->header_, - &%(s)s_col_%(c)s, -@@ -1143,37 +1139,36 @@ void - print(" struct ovsdb_datum datum;") - free = [] - if type.n_min == 1 and type.n_max == 1: -- print(" union ovsdb_atom key;") -+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);") - if type.value: -- print(" union ovsdb_atom value;") -+ print(" union ovsdb_atom *value = xmalloc(sizeof *value);") - print("") - print(" datum.n = 1;") -- print(" datum.keys = &key;") -- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar, refTable=False)) -+ print(" datum.keys = key;") -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar, refTable=False)) - if type.value: -- print(" datum.values = &value;") -- print(" "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar, refTable=False)) -+ print(" " + type.value.copyCValue("value.%s" % type.value.type.to_lvalue_string(), valueVar, refTable=False)) - else: - print(" datum.values = NULL;") - elif type.is_optional_pointer(): -- print(" union ovsdb_atom key;") - print("") - print(" if (%s) {" % keyVar) -+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);") - print(" datum.n = 1;") -- print(" datum.keys = &key;") -- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar, refTable=False)) -+ print(" datum.keys = key;") -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar, refTable=False)) - print(" } else {") - print(" datum.n = 0;") - print(" datum.keys = NULL;") - print(" }") - print(" datum.values = NULL;") - elif type.n_max == 1: -- print(" union ovsdb_atom key;") - print("") - print(" if (%s) {" % nVar) -+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);") - print(" datum.n = 1;") -- print(" datum.keys = &key;") -- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar, refTable=False)) -+ print(" datum.keys = key;") -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar, refTable=False)) - print(" } else {") - print(" datum.n = 0;") - print(" datum.keys = NULL;") -@@ -1182,16 +1177,14 @@ void - else: - print(" datum.n = %s;" % nVar) - print(" datum.keys = %s ? xmalloc(%s * sizeof *datum.keys) : NULL;" % (nVar, nVar)) -- free += ['datum.keys'] - if type.value: - print(" datum.values = xmalloc(%s * sizeof *datum.values);" % nVar) -- free += ['datum.values'] - else: - print(" datum.values = NULL;") - print(" for (size_t i = 0; i < %s; i++) {" % nVar) -- print(" " + type.key.assign_c_value_casting_away_const("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar, refTable=False)) -+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar, refTable=False)) - if type.value: -- print(" " + type.value.assign_c_value_casting_away_const("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar, refTable=False)) -+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar, refTable=False)) - print(" }") - if type.value: - valueType = type.value.toAtomicType() -@@ -1211,8 +1204,8 @@ void - 's': structName, - 'S': structName.upper(), - 'c': columnName}) -- for var in free: -- print(" free(%s);" % var) -+ print(" ovsdb_datum_destroy(&datum, &%(s)s_col_%(c)s.type);" \ -+ % {'s': structName, 'c': columnName}) - print("}") - - # Index table related functions -@@ -1272,7 +1265,7 @@ struct ovsdb_idl_cursor - struct ovsdb_idl_index *index, const struct %(s)s *target) - { - ovs_assert(index->table->class_ == &%(p)stable_%(tl)s); -- return ovsdb_idl_cursor_first_ge(index, &target->header_); -+ return ovsdb_idl_cursor_first_ge(index, target ? &target->header_ : NULL); - } - - struct %(s)s * -@@ -1309,8 +1302,8 @@ struct %(s)s * - - i = 0; - SMAP_FOR_EACH (node, %(c)s) { -- datum->keys[i].string = node->key; -- datum->values[i].string = node->value; -+ datum->keys[i].s = ovsdb_atom_string_create(node->key); -+ datum->values[i].s = ovsdb_atom_string_create(node->value); - i++; - } - ovsdb_datum_sort_unique(datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING); -@@ -1359,10 +1352,10 @@ struct %(s)s * - print() - print(" datum.n = 1;") - print(" datum.keys = key;") -- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar)) -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar)) - if type.value: - print(" datum.values = value;") -- print(" "+ type.value.assign_c_value_casting_away_const("value->%s" % type.value.type.to_string(), valueVar)) -+ print(" " + type.value.copyCValue("value->%s" % type.value.type.to_lvalue_string(), valueVar)) - else: - print(" datum.values = NULL;") - txn_write_func = "ovsdb_idl_index_write" -@@ -1373,7 +1366,7 @@ struct %(s)s * - print(" key = xmalloc(sizeof (union ovsdb_atom));") - print(" datum.n = 1;") - print(" datum.keys = key;") -- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar)) -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar)) - print(" } else {") - print(" datum.n = 0;") - print(" datum.keys = NULL;") -@@ -1387,7 +1380,7 @@ struct %(s)s * - print(" key = xmalloc(sizeof(union ovsdb_atom));") - print(" datum.n = 1;") - print(" datum.keys = key;") -- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), "*" + keyVar)) -+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar)) - print(" } else {") - print(" datum.n = 0;") - print(" datum.keys = NULL;") -@@ -1404,9 +1397,9 @@ struct %(s)s * - else: - print(" datum.values = NULL;") - print(" for (i = 0; i < %s; i++) {" % nVar) -- print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar)) -+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar)) - if type.value: -- print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar)) -+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar)) - print(" }") - if type.value: - valueType = type.value.toAtomicType() -diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c -index 0b3d2bb714..5772955c92 100644 ---- a/ovsdb/ovsdb-server.c -+++ b/ovsdb/ovsdb-server.c -@@ -26,6 +26,7 @@ - #include "command-line.h" - #include "daemon.h" - #include "dirs.h" -+#include "dns-resolve.h" - #include "openvswitch/dynamic-string.h" - #include "fatal-signal.h" - #include "file.h" -@@ -329,6 +330,7 @@ main(int argc, char *argv[]) - service_start(&argc, &argv); - fatal_ignore_sigpipe(); - process_init(); -+ dns_resolve_init(true); - - bool active = false; - parse_options(argc, argv, &db_filenames, &remotes, &unixctl_path, -@@ -511,6 +513,7 @@ main(int argc, char *argv[]) - run_command, process_status_msg(status)); - } - } -+ dns_resolve_destroy(); - perf_counters_destroy(); - service_stop(); - return 0; -@@ -904,8 +907,8 @@ query_db_string(const struct shash *all_dbs, const char *name, - - datum = &row->fields[column->index]; - for (i = 0; i < datum->n; i++) { -- if (datum->keys[i].string[0]) { -- return datum->keys[i].string; -+ if (datum->keys[i].s->string[0]) { -+ return datum->keys[i].s->string; - } - } - } -@@ -1018,7 +1021,7 @@ query_db_remotes(const char *name, const struct shash *all_dbs, - - datum = &row->fields[column->index]; - for (i = 0; i < datum->n; i++) { -- add_remote(remotes, datum->keys[i].string); -+ add_remote(remotes, datum->keys[i].s->string); - } - } - } else if (column->type.key.type == OVSDB_TYPE_UUID -diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c -index 05a0223e71..d4a9e34cc4 100644 ---- a/ovsdb/ovsdb-tool.c -+++ b/ovsdb/ovsdb-tool.c -@@ -919,7 +919,8 @@ print_raft_header(const struct raft_header *h, - if (!uuid_is_zero(&h->snap.eid)) { - printf(" prev_eid: %04x\n", uuid_prefix(&h->snap.eid, 4)); - } -- print_data("prev_", h->snap.data, schemap, names); -+ print_data("prev_", raft_entry_get_parsed_data(&h->snap), -+ schemap, names); - } - } - -@@ -973,11 +974,13 @@ raft_header_to_standalone_log(const struct raft_header *h, - struct ovsdb_log *db_log_data) - { - if (h->snap_index) { -- if (!h->snap.data || json_array(h->snap.data)->n != 2) { -+ const struct json *data = raft_entry_get_parsed_data(&h->snap); -+ -+ if (!data || json_array(data)->n != 2) { - ovs_fatal(0, "Incorrect raft header data array length"); - } - -- struct json_array *pa = json_array(h->snap.data); -+ struct json_array *pa = json_array(data); - struct json *schema_json = pa->elems[0]; - struct ovsdb_error *error = NULL; - -@@ -1373,7 +1376,7 @@ do_check_cluster(struct ovs_cmdl_context *ctx) - } - struct raft_entry *e = &s->entries[log_idx]; - e->term = r->term; -- e->data = r->entry.data; -+ raft_entry_set_parsed_data_nocopy(e, r->entry.data); - e->eid = r->entry.eid; - e->servers = r->entry.servers; - break; -diff --git a/ovsdb/ovsdb-util.c b/ovsdb/ovsdb-util.c -index c4075cdae3..6d7be066b6 100644 ---- a/ovsdb/ovsdb-util.c -+++ b/ovsdb/ovsdb-util.c -@@ -111,13 +111,13 @@ ovsdb_util_read_map_string_column(const struct ovsdb_row *row, - - for (i = 0; i < datum->n; i++) { - atom_key = &datum->keys[i]; -- if (!strcmp(atom_key->string, key)) { -+ if (!strcmp(atom_key->s->string, key)) { - atom_value = &datum->values[i]; - break; - } - } - -- return atom_value ? atom_value->string : NULL; -+ return atom_value ? atom_value->s->string : NULL; - } - - /* Read string-uuid key-values from a map. Returns the row associated with -@@ -143,7 +143,7 @@ ovsdb_util_read_map_string_uuid_column(const struct ovsdb_row *row, - const struct ovsdb_datum *datum = &row->fields[column->index]; - for (size_t i = 0; i < datum->n; i++) { - union ovsdb_atom *atom_key = &datum->keys[i]; -- if (!strcmp(atom_key->string, key)) { -+ if (!strcmp(atom_key->s->string, key)) { - const union ovsdb_atom *atom_value = &datum->values[i]; - return ovsdb_table_get_row(ref_table, &atom_value->uuid); - } -@@ -181,7 +181,7 @@ ovsdb_util_read_string_column(const struct ovsdb_row *row, - const union ovsdb_atom *atom; - - atom = ovsdb_util_read_column(row, column_name, OVSDB_TYPE_STRING); -- *stringp = atom ? atom->string : NULL; -+ *stringp = atom ? atom->s->string : NULL; - return atom != NULL; - } - -@@ -269,8 +269,10 @@ ovsdb_util_write_string_column(struct ovsdb_row *row, const char *column_name, - const char *string) - { - if (string) { -- const union ovsdb_atom atom = { .string = CONST_CAST(char *, string) }; -+ union ovsdb_atom atom = { -+ .s = ovsdb_atom_string_create(CONST_CAST(char *, string)) }; - ovsdb_util_write_singleton(row, column_name, &atom, OVSDB_TYPE_STRING); -+ ovsdb_atom_destroy(&atom, OVSDB_TYPE_STRING); - } else { - ovsdb_util_clear_column(row, column_name); - } -@@ -305,8 +307,8 @@ ovsdb_util_write_string_string_column(struct ovsdb_row *row, - datum->values = xmalloc(n * sizeof *datum->values); - - for (i = 0; i < n; ++i) { -- datum->keys[i].string = keys[i]; -- datum->values[i].string = values[i]; -+ datum->keys[i].s = ovsdb_atom_string_create_nocopy(keys[i]); -+ datum->values[i].s = ovsdb_atom_string_create_nocopy(values[i]); - } - - /* Sort and check constraints. */ -diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c -index 126d16a2f5..e6d866182c 100644 ---- a/ovsdb/ovsdb.c -+++ b/ovsdb/ovsdb.c -@@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage) - ovs_list_init(&db->triggers); - db->run_triggers_now = db->run_triggers = false; - -+ db->n_atoms = 0; -+ - db->is_relay = false; - ovs_list_init(&db->txn_forward_new); - hmap_init(&db->txn_forward_sent); -@@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage) - } - - simap_increase(usage, "cells", cells); -+ simap_increase(usage, "atoms", db->n_atoms); -+ simap_increase(usage, "txn-history", db->n_txn_history); -+ simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms); - - if (db->storage) { - ovsdb_storage_get_memory_usage(db->storage, usage); -diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h -index 4a7bd0f0ec..ec2d235ec2 100644 ---- a/ovsdb/ovsdb.h -+++ b/ovsdb/ovsdb.h -@@ -90,8 +90,11 @@ struct ovsdb { - /* History trasanctions for incremental monitor transfer. */ - bool need_txn_history; /* Need to maintain history of transactions. */ - unsigned int n_txn_history; /* Current number of history transactions. */ -+ unsigned int n_txn_history_atoms; /* Total number of atoms in history. */ - struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */ - -+ size_t n_atoms; /* Total number of ovsdb atoms in the database. */ -+ - /* Relay mode. */ - bool is_relay; /* True, if database is in relay mode. */ - /* List that holds transactions waiting to be forwarded to the server. */ -diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c -index 26d39a087f..4145c8729f 100644 ---- a/ovsdb/raft-private.c -+++ b/ovsdb/raft-private.c -@@ -18,11 +18,14 @@ - - #include "raft-private.h" - -+#include "coverage.h" - #include "openvswitch/dynamic-string.h" - #include "ovsdb-error.h" - #include "ovsdb-parser.h" - #include "socket-util.h" - #include "sset.h" -+ -+COVERAGE_DEFINE(raft_entry_serialize); - - /* Addresses of Raft servers. */ - -@@ -33,7 +36,10 @@ raft_address_validate(const char *address) - return NULL; - } else if (!strncmp(address, "ssl:", 4) || !strncmp(address, "tcp:", 4)) { - struct sockaddr_storage ss; -- if (!inet_parse_active(address + 4, -1, &ss, true)) { -+ bool dns_failure = false; -+ -+ if (!inet_parse_active(address + 4, -1, &ss, true, &dns_failure) -+ && !dns_failure) { - return ovsdb_error(NULL, "%s: syntax error in address", address); - } - return NULL; -@@ -281,7 +287,8 @@ void - raft_entry_clone(struct raft_entry *dst, const struct raft_entry *src) - { - dst->term = src->term; -- dst->data = json_nullable_clone(src->data); -+ dst->data.full_json = json_nullable_clone(src->data.full_json); -+ dst->data.serialized = json_nullable_clone(src->data.serialized); - dst->eid = src->eid; - dst->servers = json_nullable_clone(src->servers); - dst->election_timer = src->election_timer; -@@ -291,7 +298,8 @@ void - raft_entry_uninit(struct raft_entry *e) - { - if (e) { -- json_destroy(e->data); -+ json_destroy(e->data.full_json); -+ json_destroy(e->data.serialized); - json_destroy(e->servers); - } - } -@@ -301,8 +309,9 @@ raft_entry_to_json(const struct raft_entry *e) - { - struct json *json = json_object_create(); - raft_put_uint64(json, "term", e->term); -- if (e->data) { -- json_object_put(json, "data", json_clone(e->data)); -+ if (raft_entry_has_data(e)) { -+ json_object_put(json, "data", -+ json_clone(raft_entry_get_serialized_data(e))); - json_object_put_format(json, "eid", UUID_FMT, UUID_ARGS(&e->eid)); - } - if (e->servers) { -@@ -323,9 +332,10 @@ raft_entry_from_json(struct json *json, struct raft_entry *e) - struct ovsdb_parser p; - ovsdb_parser_init(&p, json, "raft log entry"); - e->term = raft_parse_required_uint64(&p, "term"); -- e->data = json_nullable_clone( -+ raft_entry_set_parsed_data(e, - ovsdb_parser_member(&p, "data", OP_OBJECT | OP_ARRAY | OP_OPTIONAL)); -- e->eid = e->data ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO; -+ e->eid = raft_entry_has_data(e) -+ ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO; - e->servers = json_nullable_clone( - ovsdb_parser_member(&p, "servers", OP_OBJECT | OP_OPTIONAL)); - if (e->servers) { -@@ -344,9 +354,72 @@ bool - raft_entry_equals(const struct raft_entry *a, const struct raft_entry *b) - { - return (a->term == b->term -- && json_equal(a->data, b->data) - && uuid_equals(&a->eid, &b->eid) -- && json_equal(a->servers, b->servers)); -+ && json_equal(a->servers, b->servers) -+ && json_equal(raft_entry_get_parsed_data(a), -+ raft_entry_get_parsed_data(b))); -+} -+ -+bool -+raft_entry_has_data(const struct raft_entry *e) -+{ -+ return e->data.full_json || e->data.serialized; -+} -+ -+static void -+raft_entry_data_serialize(struct raft_entry *e) -+{ -+ if (!raft_entry_has_data(e) || e->data.serialized) { -+ return; -+ } -+ COVERAGE_INC(raft_entry_serialize); -+ e->data.serialized = json_serialized_object_create(e->data.full_json); -+} -+ -+void -+raft_entry_set_parsed_data_nocopy(struct raft_entry *e, struct json *json) -+{ -+ ovs_assert(!json || json->type != JSON_SERIALIZED_OBJECT); -+ e->data.full_json = json; -+ e->data.serialized = NULL; -+} -+ -+void -+raft_entry_set_parsed_data(struct raft_entry *e, const struct json *json) -+{ -+ raft_entry_set_parsed_data_nocopy(e, json_nullable_clone(json)); -+} -+ -+/* Returns a pointer to the fully parsed json object of the data. -+ * Caller takes the ownership of the result. -+ * -+ * Entry will no longer contain a fully parsed json object. -+ * Subsequent calls for the same raft entry will return NULL. */ -+struct json * OVS_WARN_UNUSED_RESULT -+raft_entry_steal_parsed_data(struct raft_entry *e) -+{ -+ /* Ensure that serialized version exists. */ -+ raft_entry_data_serialize(e); -+ -+ struct json *json = e->data.full_json; -+ e->data.full_json = NULL; -+ -+ return json; -+} -+ -+/* Returns a pointer to the fully parsed json object of the data, if any. */ -+const struct json * -+raft_entry_get_parsed_data(const struct raft_entry *e) -+{ -+ return e->data.full_json; -+} -+ -+/* Returns a pointer to the JSON_SERIALIZED_OBJECT of the data. */ -+const struct json * -+raft_entry_get_serialized_data(const struct raft_entry *e) -+{ -+ raft_entry_data_serialize(CONST_CAST(struct raft_entry *, e)); -+ return e->data.serialized; - } - - void -@@ -402,8 +475,8 @@ raft_header_from_json__(struct raft_header *h, struct ovsdb_parser *p) - * present, all of them must be. */ - h->snap_index = raft_parse_optional_uint64(p, "prev_index"); - if (h->snap_index) { -- h->snap.data = json_nullable_clone( -- ovsdb_parser_member(p, "prev_data", OP_ANY)); -+ raft_entry_set_parsed_data( -+ &h->snap, ovsdb_parser_member(p, "prev_data", OP_ANY)); - h->snap.eid = raft_parse_required_uuid(p, "prev_eid"); - h->snap.term = raft_parse_required_uint64(p, "prev_term"); - h->snap.election_timer = raft_parse_optional_uint64( -@@ -455,8 +528,9 @@ raft_header_to_json(const struct raft_header *h) - if (h->snap_index) { - raft_put_uint64(json, "prev_index", h->snap_index); - raft_put_uint64(json, "prev_term", h->snap.term); -- if (h->snap.data) { -- json_object_put(json, "prev_data", json_clone(h->snap.data)); -+ if (raft_entry_has_data(&h->snap)) { -+ json_object_put(json, "prev_data", -+ json_clone(raft_entry_get_serialized_data(&h->snap))); - } - json_object_put_format(json, "prev_eid", - UUID_FMT, UUID_ARGS(&h->snap.eid)); -diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h -index a69e37e5c2..48c6df511f 100644 ---- a/ovsdb/raft-private.h -+++ b/ovsdb/raft-private.h -@@ -118,7 +118,10 @@ void raft_servers_format(const struct hmap *servers, struct ds *ds); - * entry. */ - struct raft_entry { - uint64_t term; -- struct json *data; -+ struct { -+ struct json *full_json; /* Fully parsed JSON object. */ -+ struct json *serialized; /* JSON_SERIALIZED_OBJECT version of data. */ -+ } data; - struct uuid eid; - struct json *servers; - uint64_t election_timer; -@@ -130,6 +133,13 @@ struct json *raft_entry_to_json(const struct raft_entry *); - struct ovsdb_error *raft_entry_from_json(struct json *, struct raft_entry *) - OVS_WARN_UNUSED_RESULT; - bool raft_entry_equals(const struct raft_entry *, const struct raft_entry *); -+bool raft_entry_has_data(const struct raft_entry *); -+void raft_entry_set_parsed_data(struct raft_entry *, const struct json *); -+void raft_entry_set_parsed_data_nocopy(struct raft_entry *, struct json *); -+struct json *raft_entry_steal_parsed_data(struct raft_entry *) -+ OVS_WARN_UNUSED_RESULT; -+const struct json *raft_entry_get_parsed_data(const struct raft_entry *); -+const struct json *raft_entry_get_serialized_data(const struct raft_entry *); - - /* On disk data serialization and deserialization. */ - -diff --git a/ovsdb/raft.c b/ovsdb/raft.c -index 2fb5156519..b70fbed5d4 100644 ---- a/ovsdb/raft.c -+++ b/ovsdb/raft.c -@@ -74,9 +74,12 @@ enum raft_failure_test { - FT_CRASH_BEFORE_SEND_EXEC_REQ, - FT_CRASH_AFTER_SEND_EXEC_REQ, - FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, -+ FT_CRASH_BEFORE_SEND_SNAPSHOT_REP, - FT_DELAY_ELECTION, - FT_DONT_SEND_VOTE_REQUEST, - FT_STOP_RAFT_RPC, -+ FT_TRANSFER_LEADERSHIP, -+ FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ, - }; - static enum raft_failure_test failure_test; - -@@ -379,12 +382,19 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *); - static void raft_run_reconfigure(struct raft *); - - static void raft_set_leader(struct raft *, const struct uuid *sid); -+ - static struct raft_server * - raft_find_server(const struct raft *raft, const struct uuid *sid) - { - return raft_server_find(&raft->servers, sid); - } - -+static struct raft_server * -+raft_find_new_server(struct raft *raft, const struct uuid *uuid) -+{ -+ return raft_server_find(&raft->add_servers, uuid); -+} -+ - static char * - raft_make_address_passive(const char *address_) - { -@@ -494,11 +504,11 @@ raft_create_cluster(const char *file_name, const char *name, - .snap_index = index++, - .snap = { - .term = term, -- .data = json_nullable_clone(data), - .eid = uuid_random(), - .servers = json_object_create(), - }, - }; -+ raft_entry_set_parsed_data(&h.snap, data); - shash_add_nocopy(json_object(h.snap.servers), - xasprintf(UUID_FMT, UUID_ARGS(&h.sid)), - json_string_create(local_address)); -@@ -727,10 +737,10 @@ raft_add_entry(struct raft *raft, - uint64_t index = raft->log_end++; - struct raft_entry *entry = &raft->entries[index - raft->log_start]; - entry->term = term; -- entry->data = data; - entry->eid = eid ? *eid : UUID_ZERO; - entry->servers = servers; - entry->election_timer = election_timer; -+ raft_entry_set_parsed_data_nocopy(entry, data); - return index; - } - -@@ -741,13 +751,16 @@ raft_write_entry(struct raft *raft, uint64_t term, struct json *data, - const struct uuid *eid, struct json *servers, - uint64_t election_timer) - { -+ uint64_t index = raft_add_entry(raft, term, data, eid, servers, -+ election_timer); -+ const struct json *entry_data = raft_entry_get_serialized_data( -+ &raft->entries[index - raft->log_start]); - struct raft_record r = { - .type = RAFT_REC_ENTRY, - .term = term, - .entry = { -- .index = raft_add_entry(raft, term, data, eid, servers, -- election_timer), -- .data = data, -+ .index = index, -+ .data = CONST_CAST(struct json *, entry_data), - .servers = servers, - .election_timer = election_timer, - .eid = eid ? *eid : UUID_ZERO, -@@ -1864,6 +1877,8 @@ raft_open_conn(struct raft *raft, const char *address, const struct uuid *sid) - static void - raft_conn_close(struct raft_conn *conn) - { -+ VLOG_DBG("closing connection to server %s (%s)", -+ conn->nickname, jsonrpc_session_get_name(conn->js)); - jsonrpc_session_close(conn->js); - ovs_list_remove(&conn->list_node); - free(conn->nickname); -@@ -1918,6 +1933,13 @@ raft_run(struct raft *raft) - return; - } - -+ if (failure_test == FT_TRANSFER_LEADERSHIP) { -+ /* Using this function as it conveniently implements all we need and -+ * snapshotting is the main test scenario for leadership transfer. */ -+ raft_notify_snapshot_recommended(raft); -+ failure_test = FT_NO_TEST; -+ } -+ - raft_waiters_run(raft); - - if (!raft->listener && time_msec() >= raft->listen_backoff) { -@@ -1954,16 +1976,30 @@ raft_run(struct raft *raft) - } - - /* Close unneeded sessions. */ -+ struct raft_server *server; - struct raft_conn *next; - LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { - if (!raft_conn_should_stay_open(raft, conn)) { -+ server = raft_find_new_server(raft, &conn->sid); -+ if (server) { -+ /* We only have one incoming connection from joining servers, -+ * so if it's closed, we need to destroy the record about the -+ * server. This way the process can be started over on the -+ * next join request. */ -+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); -+ VLOG_INFO_RL(&rl, "cluster "CID_FMT": server %s (%s) " -+ "disconnected while joining", -+ CID_ARGS(&raft->cid), -+ server->nickname, server->address); -+ hmap_remove(&raft->add_servers, &server->hmap_node); -+ raft_server_destroy(server); -+ } - raft->n_disconnections++; - raft_conn_close(conn); - } - } - - /* Open needed sessions. */ -- struct raft_server *server; - HMAP_FOR_EACH (server, hmap_node, &raft->servers) { - raft_open_conn(raft, server->address, &server->sid); - } -@@ -2040,7 +2076,14 @@ raft_run(struct raft *raft) - HMAP_FOR_EACH_SAFE (cmd, next_cmd, hmap_node, &raft->commands) { - if (cmd->timestamp - && now - cmd->timestamp > raft->election_timer * 2) { -- raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT); -+ if (cmd->index && raft->role != RAFT_LEADER) { -+ /* This server lost leadership and command didn't complete -+ * in time. Likely, it wasn't replicated to the majority -+ * of servers before losing the leadership. */ -+ raft_command_complete(raft, cmd, RAFT_CMD_LOST_LEADERSHIP); -+ } else { -+ raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT); -+ } - } - } - raft_reset_ping_timer(raft); -@@ -2161,7 +2204,7 @@ raft_get_eid(const struct raft *raft, uint64_t index) - { - for (; index >= raft->log_start; index--) { - const struct raft_entry *e = raft_get_entry(raft, index); -- if (e->data) { -+ if (raft_entry_has_data(e)) { - return &e->eid; - } - } -@@ -2232,6 +2275,9 @@ raft_command_initiate(struct raft *raft, - if (failure_test == FT_CRASH_AFTER_SEND_APPEND_REQ) { - ovs_fatal(0, "Raft test: crash after sending append_request."); - } -+ if (failure_test == FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ) { -+ failure_test = FT_TRANSFER_LEADERSHIP; -+ } - raft_reset_ping_timer(raft); - - return cmd; -@@ -2598,7 +2644,13 @@ raft_become_follower(struct raft *raft) - * configuration is already part of the log. Possibly the configuration - * log entry will not be committed, but until we know that we must use the - * new configuration. Our AppendEntries processing will properly update -- * the server configuration later, if necessary. */ -+ * the server configuration later, if necessary. -+ * -+ * Also we do not complete commands here, as they can still be completed -+ * if their log entries have already been replicated to other servers. -+ * If the entries were actually committed according to the new leader, our -+ * AppendEntries processing will complete the corresponding commands. -+ */ - struct raft_server *s; - HMAP_FOR_EACH (s, hmap_node, &raft->add_servers) { - raft_send_add_server_reply__(raft, &s->sid, s->address, false, -@@ -2612,8 +2664,6 @@ raft_become_follower(struct raft *raft) - raft_server_destroy(raft->remove_server); - raft->remove_server = NULL; - } -- -- raft_complete_all_commands(raft, RAFT_CMD_LOST_LEADERSHIP); - } - - static void -@@ -2826,8 +2876,8 @@ raft_truncate(struct raft *raft, uint64_t new_end) - return servers_changed; - } - --static const struct json * --raft_peek_next_entry(struct raft *raft, struct uuid *eid) -+static const struct raft_entry * -+raft_peek_next_entry(struct raft *raft) - { - /* Invariant: log_start - 2 <= last_applied <= commit_index < log_end. */ - ovs_assert(raft->log_start <= raft->last_applied + 2); -@@ -2839,32 +2889,20 @@ raft_peek_next_entry(struct raft *raft, struct uuid *eid) - } - - if (raft->log_start == raft->last_applied + 2) { -- *eid = raft->snap.eid; -- return raft->snap.data; -+ return &raft->snap; - } - - while (raft->last_applied < raft->commit_index) { - const struct raft_entry *e = raft_get_entry(raft, - raft->last_applied + 1); -- if (e->data) { -- *eid = e->eid; -- return e->data; -+ if (raft_entry_has_data(e)) { -+ return e; - } - raft->last_applied++; - } - return NULL; - } - --static const struct json * --raft_get_next_entry(struct raft *raft, struct uuid *eid) --{ -- const struct json *data = raft_peek_next_entry(raft, eid); -- if (data) { -- raft->last_applied++; -- } -- return data; --} -- - /* Updates commit index in raft log. If commit index is already up-to-date - * it does nothing and return false, otherwise, returns true. */ - static bool -@@ -2874,61 +2912,56 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) - return false; - } - -- if (raft->role == RAFT_LEADER) { -- while (raft->commit_index < new_commit_index) { -- uint64_t index = ++raft->commit_index; -- const struct raft_entry *e = raft_get_entry(raft, index); -- if (e->data) { -- struct raft_command *cmd -- = raft_find_command_by_eid(raft, &e->eid); -- if (cmd) { -- if (!cmd->index) { -- VLOG_DBG("Command completed after role change from" -- " follower to leader "UUID_FMT, -- UUID_ARGS(&e->eid)); -- cmd->index = index; -- } -- raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); -+ while (raft->commit_index < new_commit_index) { -+ uint64_t index = ++raft->commit_index; -+ const struct raft_entry *e = raft_get_entry(raft, index); -+ -+ if (raft_entry_has_data(e)) { -+ struct raft_command *cmd = raft_find_command_by_eid(raft, &e->eid); -+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); -+ -+ if (cmd) { -+ if (!cmd->index && raft->role == RAFT_LEADER) { -+ VLOG_INFO_RL(&rl, -+ "command completed after role change from " -+ "follower to leader (eid: "UUID_FMT", " -+ "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); -+ } else if (!cmd->index && raft->role != RAFT_LEADER) { -+ /* This can happen when leader fail-over before sending -+ * execute_command_reply. */ -+ VLOG_INFO_RL(&rl, -+ "command completed without reply (eid: "UUID_FMT", " -+ "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); -+ } else if (cmd->index && raft->role != RAFT_LEADER) { -+ /* This can happen if current server lost leadership after -+ * sending append requests to the majority of servers, but -+ * before receiving majority of append replies. */ -+ VLOG_INFO_RL(&rl, -+ "command completed after role change from " -+ "leader to follower (eid: "UUID_FMT", " -+ "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); -+ /* Clearing 'sid' to avoid sending cmd execution reply. */ -+ cmd->sid = UUID_ZERO; -+ } else { -+ /* (cmd->index && raft->role == RAFT_LEADER) -+ * Normal command completion on a leader. */ - } -- } -- if (e->election_timer) { -- VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, -- raft->election_timer, e->election_timer); -- raft->election_timer = e->election_timer; -- raft->election_timer_new = 0; -- raft_update_probe_intervals(raft); -- } -- if (e->servers) { -- /* raft_run_reconfigure() can write a new Raft entry, which can -- * reallocate raft->entries, which would invalidate 'e', so -- * this case must be last, after the one for 'e->data'. */ -- raft_run_reconfigure(raft); -+ cmd->index = index; -+ raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); - } - } -- } else { -- while (raft->commit_index < new_commit_index) { -- uint64_t index = ++raft->commit_index; -- const struct raft_entry *e = raft_get_entry(raft, index); -- if (e->election_timer) { -- VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, -- raft->election_timer, e->election_timer); -- raft->election_timer = e->election_timer; -- raft_update_probe_intervals(raft); -- } -+ if (e->election_timer) { -+ VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, -+ raft->election_timer, e->election_timer); -+ raft->election_timer = e->election_timer; -+ raft->election_timer_new = 0; -+ raft_update_probe_intervals(raft); - } -- /* Check if any pending command can be completed, and complete it. -- * This can happen when leader fail-over before sending -- * execute_command_reply. */ -- const struct uuid *eid = raft_get_eid(raft, new_commit_index); -- struct raft_command *cmd = raft_find_command_by_eid(raft, eid); -- if (cmd) { -- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); -- VLOG_INFO_RL(&rl, -- "Command completed without reply (eid: "UUID_FMT", " -- "commit index: %"PRIu64")", -- UUID_ARGS(eid), new_commit_index); -- cmd->index = new_commit_index; -- raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); -+ if (e->servers && raft->role == RAFT_LEADER) { -+ /* raft_run_reconfigure() can write a new Raft entry, which can -+ * reallocate raft->entries, which would invalidate 'e', so -+ * this case must be last, after the one for 'e->data'. */ -+ raft_run_reconfigure(raft); - } - } - -@@ -3059,7 +3092,9 @@ raft_handle_append_entries(struct raft *raft, - for (; i < n_entries; i++) { - const struct raft_entry *e = &entries[i]; - error = raft_write_entry(raft, e->term, -- json_nullable_clone(e->data), &e->eid, -+ json_nullable_clone( -+ raft_entry_get_parsed_data(e)), -+ &e->eid, - json_nullable_clone(e->servers), - e->election_timer); - if (error) { -@@ -3314,20 +3349,29 @@ bool - raft_has_next_entry(const struct raft *raft_) - { - struct raft *raft = CONST_CAST(struct raft *, raft_); -- struct uuid eid; -- return raft_peek_next_entry(raft, &eid) != NULL; -+ return raft_peek_next_entry(raft) != NULL; - } - - /* Returns the next log entry or snapshot from 'raft', or NULL if there are -- * none left to read. Stores the entry ID of the log entry in '*eid'. Stores -- * true in '*is_snapshot' if the returned data is a snapshot, false if it is a -- * log entry. */ --const struct json * --raft_next_entry(struct raft *raft, struct uuid *eid, bool *is_snapshot) -+ * none left to read. Stores the entry ID of the log entry in '*eid'. -+ * -+ * The caller takes ownership of the result. */ -+struct json * OVS_WARN_UNUSED_RESULT -+raft_next_entry(struct raft *raft, struct uuid *eid) - { -- const struct json *data = raft_get_next_entry(raft, eid); -- *is_snapshot = data == raft->snap.data; -- return data; -+ const struct raft_entry *e = raft_peek_next_entry(raft); -+ -+ if (!e) { -+ return NULL; -+ } -+ -+ raft->last_applied++; -+ *eid = e->eid; -+ -+ /* DB will only read each entry once, so we don't need to store the fully -+ * parsed json object any longer. The serialized version is sufficient -+ * for sending to other cluster members or writing to the log. */ -+ return raft_entry_steal_parsed_data(CONST_CAST(struct raft_entry *, e)); - } - - /* Returns the log index of the last-read snapshot or log entry. */ -@@ -3352,12 +3396,6 @@ raft_find_peer(struct raft *raft, const struct uuid *uuid) - return s && !uuid_equals(&raft->sid, &s->sid) ? s : NULL; - } - --static struct raft_server * --raft_find_new_server(struct raft *raft, const struct uuid *uuid) --{ -- return raft_server_find(&raft->add_servers, uuid); --} -- - /* Figure 3.1: "If there exists an N such that N > commitIndex, a - * majority of matchIndex[i] >= N, and log[N].term == currentTerm, set - * commitIndex = N (sections 3.5 and 3.6)." */ -@@ -3420,6 +3458,7 @@ raft_send_install_snapshot_request(struct raft *raft, - const struct raft_server *s, - const char *comment) - { -+ const struct json *data = raft_entry_get_serialized_data(&raft->snap); - union raft_rpc rpc = { - .install_snapshot_request = { - .common = { -@@ -3432,7 +3471,7 @@ raft_send_install_snapshot_request(struct raft *raft, - .last_term = raft->snap.term, - .last_servers = raft->snap.servers, - .last_eid = raft->snap.eid, -- .data = raft->snap.data, -+ .data = CONST_CAST(struct json *, data), - .election_timer = raft->election_timer, /* use latest value */ - } - }; -@@ -3980,6 +4019,10 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, - uint64_t new_log_start, - const struct raft_entry *new_snapshot) - { -+ /* Ensure that new snapshot contains serialized data object, so it will -+ * not be allocated while serializing the on-stack raft header object. */ -+ ovs_assert(raft_entry_get_serialized_data(new_snapshot)); -+ - struct raft_header h = { - .sid = raft->sid, - .cid = raft->cid, -@@ -3998,12 +4041,13 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, - /* Write log records. */ - for (uint64_t index = new_log_start; index < raft->log_end; index++) { - const struct raft_entry *e = &raft->entries[index - raft->log_start]; -+ const struct json *log_data = raft_entry_get_serialized_data(e); - struct raft_record r = { - .type = RAFT_REC_ENTRY, - .term = e->term, - .entry = { - .index = index, -- .data = e->data, -+ .data = CONST_CAST(struct json *, log_data), - .servers = e->servers, - .election_timer = e->election_timer, - .eid = e->eid, -@@ -4093,19 +4137,21 @@ raft_handle_install_snapshot_request__( - - /* Case 3: The new snapshot starts past the end of our current log, so - * discard all of our current log. */ -- const struct raft_entry new_snapshot = { -+ struct raft_entry new_snapshot = { - .term = rq->last_term, -- .data = rq->data, - .eid = rq->last_eid, -- .servers = rq->last_servers, -+ .servers = json_clone(rq->last_servers), - .election_timer = rq->election_timer, - }; -+ raft_entry_set_parsed_data(&new_snapshot, rq->data); -+ - struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, - &new_snapshot); - if (error) { - char *error_s = ovsdb_error_to_string_free(error); - VLOG_WARN("could not save snapshot: %s", error_s); - free(error_s); -+ raft_entry_uninit(&new_snapshot); - return false; - } - -@@ -4120,7 +4166,7 @@ raft_handle_install_snapshot_request__( - } - - raft_entry_uninit(&raft->snap); -- raft_entry_clone(&raft->snap, &new_snapshot); -+ raft->snap = new_snapshot; - - raft_get_servers_from_log(raft, VLL_INFO); - raft_get_election_timer_from_log(raft); -@@ -4132,6 +4178,10 @@ static void - raft_handle_install_snapshot_request( - struct raft *raft, const struct raft_install_snapshot_request *rq) - { -+ if (failure_test == FT_CRASH_BEFORE_SEND_SNAPSHOT_REP) { -+ ovs_fatal(0, "Raft test: crash before sending install_snapshot_reply"); -+ } -+ - if (raft_handle_install_snapshot_request__(raft, rq)) { - union raft_rpc rpy = { - .install_snapshot_reply = { -@@ -4216,7 +4266,7 @@ raft_may_snapshot(const struct raft *raft) - && !raft->leaving - && !raft->left - && !raft->failed -- && raft->role != RAFT_LEADER -+ && (raft->role == RAFT_FOLLOWER || hmap_count(&raft->servers) == 1) - && raft->last_applied >= raft->log_start); - } - -@@ -4265,11 +4315,12 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) - uint64_t new_log_start = raft->last_applied + 1; - struct raft_entry new_snapshot = { - .term = raft_get_term(raft, new_log_start - 1), -- .data = json_clone(new_snapshot_data), - .eid = *raft_get_eid(raft, new_log_start - 1), - .servers = json_clone(raft_servers_for_index(raft, new_log_start - 1)), - .election_timer = raft->election_timer, - }; -+ raft_entry_set_parsed_data(&new_snapshot, new_snapshot_data); -+ - struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, - &new_snapshot); - if (error) { -@@ -4286,6 +4337,9 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) - memmove(&raft->entries[0], &raft->entries[new_log_start - raft->log_start], - (raft->log_end - new_log_start) * sizeof *raft->entries); - raft->log_start = new_log_start; -+ /* It's a snapshot of the current database state, ovsdb-server will not -+ * read it back. Destroying the parsed json object to not waste memory. */ -+ json_destroy(raft_entry_steal_parsed_data(&raft->snap)); - return NULL; - } - -@@ -4926,6 +4980,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, - failure_test = FT_CRASH_AFTER_SEND_EXEC_REQ; - } else if (!strcmp(test, "crash-after-receiving-append-request-update")) { - failure_test = FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE; -+ } else if (!strcmp(test, "crash-before-sending-install-snapshot-reply")) { -+ failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP; - } else if (!strcmp(test, "delay-election")) { - failure_test = FT_DELAY_ELECTION; - struct raft *raft; -@@ -4938,6 +4994,11 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, - failure_test = FT_DONT_SEND_VOTE_REQUEST; - } else if (!strcmp(test, "stop-raft-rpc")) { - failure_test = FT_STOP_RAFT_RPC; -+ } else if (!strcmp(test, -+ "transfer-leadership-after-sending-append-request")) { -+ failure_test = FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ; -+ } else if (!strcmp(test, "transfer-leadership")) { -+ failure_test = FT_TRANSFER_LEADERSHIP; - } else if (!strcmp(test, "clear")) { - failure_test = FT_NO_TEST; - unixctl_command_reply(conn, "test dismissed"); -diff --git a/ovsdb/raft.h b/ovsdb/raft.h -index 3545c41c2c..599bc0ae86 100644 ---- a/ovsdb/raft.h -+++ b/ovsdb/raft.h -@@ -132,8 +132,8 @@ bool raft_left(const struct raft *); - bool raft_failed(const struct raft *); - - /* Reading snapshots and log entries. */ --const struct json *raft_next_entry(struct raft *, struct uuid *eid, -- bool *is_snapshot); -+struct json *raft_next_entry(struct raft *, struct uuid *eid) -+ OVS_WARN_UNUSED_RESULT; - bool raft_has_next_entry(const struct raft *); - - uint64_t raft_get_applied_index(const struct raft *); -diff --git a/ovsdb/rbac.c b/ovsdb/rbac.c -index 2986027c90..ff411675f0 100644 ---- a/ovsdb/rbac.c -+++ b/ovsdb/rbac.c -@@ -53,8 +53,8 @@ ovsdb_find_row_by_string_key(const struct ovsdb_table *table, - HMAP_FOR_EACH (row, hmap_node, &table->rows) { - const struct ovsdb_datum *datum = &row->fields[column->index]; - for (size_t i = 0; i < datum->n; i++) { -- if (datum->keys[i].string[0] && -- !strcmp(key, datum->keys[i].string)) { -+ if (datum->keys[i].s->string[0] && -+ !strcmp(key, datum->keys[i].s->string)) { - return row; - } - } -@@ -113,7 +113,7 @@ ovsdb_rbac_authorized(const struct ovsdb_row *perms, - } - - for (i = 0; i < datum->n; i++) { -- const char *name = datum->keys[i].string; -+ const char *name = datum->keys[i].s->string; - const char *value = NULL; - bool is_map; - -@@ -271,7 +271,7 @@ rbac_column_modification_permitted(const struct ovsdb_column *column, - size_t i; - - for (i = 0; i < modifiable->n; i++) { -- char *name = modifiable->keys[i].string; -+ char *name = modifiable->keys[i].s->string; - - if (!strcmp(name, column->name)) { - return true; -diff --git a/ovsdb/row.c b/ovsdb/row.c -index 65a0546211..e83c60a218 100644 ---- a/ovsdb/row.c -+++ b/ovsdb/row.c -@@ -38,8 +38,7 @@ allocate_row(const struct ovsdb_table *table) - struct ovsdb_row *row = xmalloc(row_size); - row->table = CONST_CAST(struct ovsdb_table *, table); - row->txn_row = NULL; -- ovs_list_init(&row->src_refs); -- ovs_list_init(&row->dst_refs); -+ hmap_init(&row->dst_refs); - row->n_refs = 0; - return row; - } -@@ -61,6 +60,78 @@ ovsdb_row_create(const struct ovsdb_table *table) - return row; - } - -+static struct ovsdb_weak_ref * -+ovsdb_weak_ref_clone(struct ovsdb_weak_ref *src) -+{ -+ struct ovsdb_weak_ref *weak = xzalloc(sizeof *weak); -+ -+ hmap_node_nullify(&weak->dst_node); -+ ovs_list_init(&weak->src_node); -+ weak->src_table = src->src_table; -+ weak->src = src->src; -+ weak->dst_table = src->dst_table; -+ weak->dst = src->dst; -+ ovsdb_atom_clone(&weak->key, &src->key, src->type.key.type); -+ if (src->type.value.type != OVSDB_TYPE_VOID) { -+ ovsdb_atom_clone(&weak->value, &src->value, src->type.value.type); -+ } -+ ovsdb_type_clone(&weak->type, &src->type); -+ weak->column_idx = src->column_idx; -+ weak->by_key = src->by_key; -+ return weak; -+} -+ -+uint32_t -+ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *weak) -+{ -+ return uuid_hash(&weak->src); -+} -+ -+static bool -+ovsdb_weak_ref_equals(const struct ovsdb_weak_ref *a, -+ const struct ovsdb_weak_ref *b) -+{ -+ if (a == b) { -+ return true; -+ } -+ return a->src_table == b->src_table -+ && a->dst_table == b->dst_table -+ && uuid_equals(&a->src, &b->src) -+ && uuid_equals(&a->dst, &b->dst) -+ && a->column_idx == b->column_idx -+ && a->by_key == b->by_key -+ && ovsdb_atom_equals(&a->key, &b->key, a->type.key.type); -+} -+ -+struct ovsdb_weak_ref * -+ovsdb_row_find_weak_ref(const struct ovsdb_row *row, -+ const struct ovsdb_weak_ref *ref) -+{ -+ struct ovsdb_weak_ref *weak; -+ HMAP_FOR_EACH_WITH_HASH (weak, dst_node, -+ ovsdb_weak_ref_hash(ref), &row->dst_refs) { -+ if (ovsdb_weak_ref_equals(weak, ref)) { -+ return weak; -+ } -+ } -+ return NULL; -+} -+ -+void -+ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *weak) -+{ -+ if (!weak) { -+ return; -+ } -+ ovs_assert(ovs_list_is_empty(&weak->src_node)); -+ ovsdb_atom_destroy(&weak->key, weak->type.key.type); -+ if (weak->type.value.type != OVSDB_TYPE_VOID) { -+ ovsdb_atom_destroy(&weak->value, weak->type.value.type); -+ } -+ ovsdb_type_destroy(&weak->type); -+ free(weak); -+} -+ - struct ovsdb_row * - ovsdb_row_clone(const struct ovsdb_row *old) - { -@@ -75,6 +146,13 @@ ovsdb_row_clone(const struct ovsdb_row *old) - &old->fields[column->index], - &column->type); - } -+ -+ struct ovsdb_weak_ref *weak, *clone; -+ HMAP_FOR_EACH (weak, dst_node, &old->dst_refs) { -+ clone = ovsdb_weak_ref_clone(weak); -+ hmap_insert(&new->dst_refs, &clone->dst_node, -+ ovsdb_weak_ref_hash(clone)); -+ } - return new; - } - -@@ -85,20 +163,13 @@ ovsdb_row_destroy(struct ovsdb_row *row) - { - if (row) { - const struct ovsdb_table *table = row->table; -- struct ovsdb_weak_ref *weak, *next; -+ struct ovsdb_weak_ref *weak; - const struct shash_node *node; - -- LIST_FOR_EACH_SAFE (weak, next, dst_node, &row->dst_refs) { -- ovs_list_remove(&weak->src_node); -- ovs_list_remove(&weak->dst_node); -- free(weak); -- } -- -- LIST_FOR_EACH_SAFE (weak, next, src_node, &row->src_refs) { -- ovs_list_remove(&weak->src_node); -- ovs_list_remove(&weak->dst_node); -- free(weak); -+ HMAP_FOR_EACH_POP (weak, dst_node, &row->dst_refs) { -+ ovsdb_weak_ref_destroy(weak); - } -+ hmap_destroy(&row->dst_refs); - - SHASH_FOR_EACH (node, &table->schema->columns) { - const struct ovsdb_column *column = node->data; -diff --git a/ovsdb/row.h b/ovsdb/row.h -index 394ac8eb49..fe04555d0c 100644 ---- a/ovsdb/row.h -+++ b/ovsdb/row.h -@@ -36,11 +36,28 @@ struct ovsdb_column_set; - * ovsdb_weak_ref" structures are created for them. - */ - struct ovsdb_weak_ref { -- struct ovs_list src_node; /* In src->src_refs list. */ -- struct ovs_list dst_node; /* In destination row's dst_refs list. */ -- struct ovsdb_row *src; /* Source row. */ -- struct ovsdb_table *dst_table; /* Destination table. */ -+ struct hmap_node dst_node; /* In ovsdb_row's 'dst_refs' hmap. */ -+ struct ovs_list src_node; /* In txn_row's 'deleted/added_refs'. */ -+ -+ struct ovsdb_table *src_table; /* Source row table. */ -+ struct uuid src; /* Source row uuid. */ -+ -+ struct ovsdb_table *dst_table; /* Destination row table. */ - struct uuid dst; /* Destination row uuid. */ -+ -+ /* Source row's key-value pair that created this reference. -+ * This information is needed in order to find and delete the reference -+ * from the source row. We need both key and value in order to avoid -+ * accidential deletion of an updated data, i.e. if value in datum got -+ * updated and the reference was created by the old value. -+ * Storing column index in order to remove references from the correct -+ * column. 'by_key' flag allows to distinguish 2 references in a corner -+ * case where key and value are the same. */ -+ union ovsdb_atom key; -+ union ovsdb_atom value; -+ struct ovsdb_type type; /* Datum type of the key-value pair. */ -+ unsigned int column_idx; /* Row column index for this pair. */ -+ bool by_key; /* 'true' if reference is a 'key'. */ - }; - - /* A row in a database table. */ -@@ -50,8 +67,7 @@ struct ovsdb_row { - struct ovsdb_txn_row *txn_row; /* Transaction that row is in, if any. */ - - /* Weak references. Updated and checked only at transaction commit. */ -- struct ovs_list src_refs; /* Weak references from this row. */ -- struct ovs_list dst_refs; /* Weak references to this row. */ -+ struct hmap dst_refs; /* Weak references to this row. */ - - /* Number of strong refs to this row from other rows, in this table or - * other tables, through 'uuid' columns that have a 'refTable' constraint -@@ -69,6 +85,12 @@ struct ovsdb_row { - * index 'i' is contained in hmap table->indexes[i]. */ - }; - -+uint32_t ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *); -+struct ovsdb_weak_ref * ovsdb_row_find_weak_ref(const struct ovsdb_row *, -+ const struct ovsdb_weak_ref *); -+void ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *); -+ -+ - struct ovsdb_row *ovsdb_row_create(const struct ovsdb_table *); - struct ovsdb_row *ovsdb_row_clone(const struct ovsdb_row *); - void ovsdb_row_destroy(struct ovsdb_row *); -diff --git a/ovsdb/storage.c b/ovsdb/storage.c -index d727b1eacd..d4984be250 100644 ---- a/ovsdb/storage.c -+++ b/ovsdb/storage.c -@@ -268,9 +268,7 @@ ovsdb_storage_read(struct ovsdb_storage *storage, - struct json *schema_json = NULL; - struct json *txn_json = NULL; - if (storage->raft) { -- bool is_snapshot; -- json = json_nullable_clone( -- raft_next_entry(storage->raft, txnid, &is_snapshot)); -+ json = raft_next_entry(storage->raft, txnid); - if (!json) { - return NULL; - } else if (json->type != JSON_ARRAY || json->array.n != 2) { -@@ -509,7 +507,11 @@ schedule_next_snapshot(struct ovsdb_storage *storage, bool quick) - - long long int now = time_msec(); - storage->next_snapshot_min = now + base + random_range(range); -- storage->next_snapshot_max = now + 60LL * 60 * 24 * 1000; /* 1 day */ -+ if (!quick) { -+ long long int one_day = 60LL * 60 * 24 * 1000; -+ -+ storage->next_snapshot_max = now + one_day; -+ } - } else { - storage->next_snapshot_min = LLONG_MAX; - storage->next_snapshot_max = LLONG_MAX; -@@ -517,7 +519,7 @@ schedule_next_snapshot(struct ovsdb_storage *storage, bool quick) - } - - bool --ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage) -+ovsdb_storage_should_snapshot(struct ovsdb_storage *storage) - { - if (storage->raft || storage->log) { - /* If we haven't reached the minimum snapshot time, don't snapshot. */ -@@ -546,6 +548,15 @@ ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage) - } - - if (!snapshot_recommended) { -+ if (storage->raft) { -+ /* Re-scheduling with a quick retry in order to avoid condition -+ * where all the raft servers passed the minimal time already, -+ * but the log didn't grow a lot, so they are all checking on -+ * every iteration. This will randomize the time of the next -+ * attempt, so all the servers will not start snapshotting at -+ * the same time when the log reaches a critical size. */ -+ schedule_next_snapshot(storage, true); -+ } - return false; - } - -diff --git a/ovsdb/storage.h b/ovsdb/storage.h -index e120094d7a..ff026b77fa 100644 ---- a/ovsdb/storage.h -+++ b/ovsdb/storage.h -@@ -76,7 +76,7 @@ uint64_t ovsdb_write_get_commit_index(const struct ovsdb_write *); - void ovsdb_write_wait(const struct ovsdb_write *); - void ovsdb_write_destroy(struct ovsdb_write *); - --bool ovsdb_storage_should_snapshot(const struct ovsdb_storage *); -+bool ovsdb_storage_should_snapshot(struct ovsdb_storage *); - struct ovsdb_error *ovsdb_storage_store_snapshot(struct ovsdb_storage *storage, - const struct json *schema, - const struct json *snapshot) -diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c -index 8ffefcf7c9..db86d847c3 100644 ---- a/ovsdb/transaction.c -+++ b/ovsdb/transaction.c -@@ -41,6 +41,9 @@ struct ovsdb_txn { - struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */ - struct ds comment; - struct uuid txnid; /* For clustered mode only. It is the eid. */ -+ size_t n_atoms; /* Number of atoms in all transaction rows. */ -+ ssize_t n_atoms_diff; /* Difference between number of added and -+ * removed atoms. */ - }; - - /* A table modified by a transaction. */ -@@ -86,6 +89,10 @@ struct ovsdb_txn_row { - struct uuid uuid; - struct ovsdb_table *table; - -+ /* Weak refs that needs to be added/deleted to/from destination rows. */ -+ struct ovs_list added_refs; -+ struct ovs_list deleted_refs; -+ - /* Used by for_each_txn_row(). */ - unsigned int serial; /* Serial number of in-progress commit. */ - -@@ -151,6 +158,23 @@ ovsdb_txn_row_abort(struct ovsdb_txn *txn OVS_UNUSED, - } else { - hmap_replace(&new->table->rows, &new->hmap_node, &old->hmap_node); - } -+ -+ struct ovsdb_weak_ref *weak, *next; -+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { -+ ovs_list_remove(&weak->src_node); -+ ovs_list_init(&weak->src_node); -+ if (hmap_node_is_null(&weak->dst_node)) { -+ ovsdb_weak_ref_destroy(weak); -+ } -+ } -+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { -+ ovs_list_remove(&weak->src_node); -+ ovs_list_init(&weak->src_node); -+ if (hmap_node_is_null(&weak->dst_node)) { -+ ovsdb_weak_ref_destroy(weak); -+ } -+ } -+ - ovsdb_row_destroy(new); - free(txn_row); - -@@ -266,9 +290,9 @@ ovsdb_txn_adjust_atom_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, - - static struct ovsdb_error * OVS_WARN_UNUSED_RESULT - ovsdb_txn_adjust_row_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, -- const struct ovsdb_column *column, int delta) -+ const struct ovsdb_column *column, -+ const struct ovsdb_datum *field, int delta) - { -- const struct ovsdb_datum *field = &r->fields[column->index]; - struct ovsdb_error *error; - - error = ovsdb_txn_adjust_atom_refs(txn, r, column, &column->type.key, -@@ -291,14 +315,39 @@ update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r) - struct ovsdb_error *error; - - if (bitmap_is_set(r->changed, column->index)) { -- if (r->old) { -- error = ovsdb_txn_adjust_row_refs(txn, r->old, column, -1); -+ if (r->old && !r->new) { -+ error = ovsdb_txn_adjust_row_refs( -+ txn, r->old, column, -+ &r->old->fields[column->index], -1); - if (error) { - return OVSDB_WRAP_BUG("error decreasing refcount", error); - } -- } -- if (r->new) { -- error = ovsdb_txn_adjust_row_refs(txn, r->new, column, 1); -+ } else if (!r->old && r->new) { -+ error = ovsdb_txn_adjust_row_refs( -+ txn, r->new, column, -+ &r->new->fields[column->index], 1); -+ if (error) { -+ return error; -+ } -+ } else if (r->old && r->new) { -+ struct ovsdb_datum added, removed; -+ -+ ovsdb_datum_added_removed(&added, &removed, -+ &r->old->fields[column->index], -+ &r->new->fields[column->index], -+ &column->type); -+ -+ error = ovsdb_txn_adjust_row_refs( -+ txn, r->old, column, &removed, -1); -+ ovsdb_datum_destroy(&removed, &column->type); -+ if (error) { -+ ovsdb_datum_destroy(&added, &column->type); -+ return OVSDB_WRAP_BUG("error decreasing refcount", error); -+ } -+ -+ error = ovsdb_txn_adjust_row_refs( -+ txn, r->new, column, &added, 1); -+ ovsdb_datum_destroy(&added, &column->type); - if (error) { - return error; - } -@@ -459,93 +508,125 @@ static struct ovsdb_error * - ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED, - struct ovsdb_txn_row *txn_row) - { -- struct ovsdb_weak_ref *weak, *next; -+ struct ovsdb_weak_ref *weak, *next, *dst_weak; -+ struct ovsdb_row *dst_row; - -- /* Remove the weak references originating in the old version of the row. */ -- if (txn_row->old) { -- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->old->src_refs) { -- ovs_list_remove(&weak->src_node); -- ovs_list_remove(&weak->dst_node); -- free(weak); -+ /* Find and clean up deleted references from destination rows. */ -+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { -+ dst_row = CONST_CAST(struct ovsdb_row *, -+ ovsdb_table_get_row(weak->dst_table, &weak->dst)); -+ if (dst_row) { -+ dst_weak = ovsdb_row_find_weak_ref(dst_row, weak); -+ hmap_remove(&dst_row->dst_refs, &dst_weak->dst_node); -+ ovs_assert(ovs_list_is_empty(&dst_weak->src_node)); -+ ovsdb_weak_ref_destroy(dst_weak); -+ } -+ ovs_list_remove(&weak->src_node); -+ ovs_list_init(&weak->src_node); -+ if (hmap_node_is_null(&weak->dst_node)) { -+ ovsdb_weak_ref_destroy(weak); - } - } - -- /* Although the originating rows have the responsibility of updating the -- * weak references in the dst, it is possible that some source rows aren't -- * part of the transaction. In that situation this row needs to move the -- * list of incoming weak references from the old row into the new one. -- */ -- if (txn_row->old && txn_row->new) { -- /* Move the incoming weak references from old to new. */ -- ovs_list_push_back_all(&txn_row->new->dst_refs, -- &txn_row->old->dst_refs); -- } -- -- /* Insert the weak references originating in the new version of the row. */ -- struct ovsdb_row *dst_row; -- if (txn_row->new) { -- LIST_FOR_EACH (weak, src_node, &txn_row->new->src_refs) { -- /* dst_row MUST exist. */ -- dst_row = CONST_CAST(struct ovsdb_row *, -+ /* Insert the weak references added in the new version of the row. */ -+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { -+ dst_row = CONST_CAST(struct ovsdb_row *, - ovsdb_table_get_row(weak->dst_table, &weak->dst)); -- ovs_list_insert(&dst_row->dst_refs, &weak->dst_node); -- } -+ -+ ovs_assert(!ovsdb_row_find_weak_ref(dst_row, weak)); -+ hmap_insert(&dst_row->dst_refs, &weak->dst_node, -+ ovsdb_weak_ref_hash(weak)); -+ ovs_list_remove(&weak->src_node); -+ ovs_list_init(&weak->src_node); - } - - return NULL; - } - - static void --add_weak_ref(const struct ovsdb_row *src_, const struct ovsdb_row *dst_) -+add_weak_ref(struct ovsdb_txn_row *txn_row, const struct ovsdb_row *dst_, -+ struct ovs_list *ref_list, -+ const union ovsdb_atom *key, const union ovsdb_atom *value, -+ bool by_key, const struct ovsdb_column *column) - { -- struct ovsdb_row *src = CONST_CAST(struct ovsdb_row *, src_); - struct ovsdb_row *dst = CONST_CAST(struct ovsdb_row *, dst_); - struct ovsdb_weak_ref *weak; - -- if (src == dst) { -+ if (txn_row->new == dst) { - return; - } - -- if (!ovs_list_is_empty(&dst->dst_refs)) { -- /* Omit duplicates. */ -- weak = CONTAINER_OF(ovs_list_back(&dst->dst_refs), -- struct ovsdb_weak_ref, dst_node); -- if (weak->src == src) { -- return; -- } -- } -- -- weak = xmalloc(sizeof *weak); -- weak->src = src; -+ weak = xzalloc(sizeof *weak); -+ weak->src_table = txn_row->new->table; -+ weak->src = *ovsdb_row_get_uuid(txn_row->new); - weak->dst_table = dst->table; - weak->dst = *ovsdb_row_get_uuid(dst); -- /* The dst_refs list is updated at commit time. */ -- ovs_list_init(&weak->dst_node); -- ovs_list_push_back(&src->src_refs, &weak->src_node); -+ ovsdb_type_clone(&weak->type, &column->type); -+ ovsdb_atom_clone(&weak->key, key, column->type.key.type); -+ if (column->type.value.type != OVSDB_TYPE_VOID) { -+ ovsdb_atom_clone(&weak->value, value, column->type.value.type); -+ } -+ weak->by_key = by_key; -+ weak->column_idx = column->index; -+ hmap_node_nullify(&weak->dst_node); -+ ovs_list_push_back(ref_list, &weak->src_node); -+} -+ -+static void -+find_and_add_weak_ref(struct ovsdb_txn_row *txn_row, -+ const union ovsdb_atom *key, -+ const union ovsdb_atom *value, -+ const struct ovsdb_column *column, -+ bool by_key, struct ovs_list *ref_list, -+ struct ovsdb_datum *not_found, bool *zero) -+{ -+ const struct ovsdb_row *row = by_key -+ ? ovsdb_table_get_row(column->type.key.uuid.refTable, &key->uuid) -+ : ovsdb_table_get_row(column->type.value.uuid.refTable, &value->uuid); -+ -+ if (row) { -+ add_weak_ref(txn_row, row, ref_list, key, value, by_key, column); -+ } else if (not_found) { -+ if (uuid_is_zero(by_key ? &key->uuid : &value->uuid)) { -+ *zero = true; -+ } -+ ovsdb_datum_add_unsafe(not_found, key, value, &column->type, NULL); -+ } - } - - static struct ovsdb_error * OVS_WARN_UNUSED_RESULT - assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) - { -+ struct ovsdb_weak_ref *weak, *next; - struct ovsdb_table *table; - struct shash_node *node; - - if (txn_row->old && !txn_row->new) { - /* Mark rows that have weak references to 'txn_row' as modified, so -- * that their weak references will get reassessed. */ -- struct ovsdb_weak_ref *weak, *next; -- -- LIST_FOR_EACH_SAFE (weak, next, dst_node, &txn_row->old->dst_refs) { -- if (!weak->src->txn_row) { -- ovsdb_txn_row_modify(txn, weak->src); -+ * that their weak references will get reassessed. Adding all weak -+ * refs to 'deleted_ref' lists of their source rows, so they will be -+ * cleaned up from datums and deleted on commit. */ -+ -+ HMAP_FOR_EACH (weak, dst_node, &txn_row->old->dst_refs) { -+ struct ovsdb_txn_row *src_txn_row; -+ -+ src_txn_row = find_or_make_txn_row(txn, weak->src_table, -+ &weak->src); -+ if (!src_txn_row) { -+ /* Source row is also removed. */ -+ continue; - } -+ ovs_assert(src_txn_row); -+ ovs_assert(ovs_list_is_empty(&weak->src_node)); -+ ovs_list_insert(&src_txn_row->deleted_refs, &weak->src_node); - } - } - - if (!txn_row->new) { -- /* We don't have to do anything about references that originate at -- * 'txn_row', because ovsdb_row_destroy() will remove those weak -- * references. */ -+ /* Since all the atoms will be destroyed by the ovsdb_row_destroy(), -+ * there is no need to check them here. Source references queued -+ * into 'deleted_ref' while removing other rows will be cleaned up at -+ * commit time. */ - return NULL; - } - -@@ -553,50 +634,94 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) - SHASH_FOR_EACH (node, &table->schema->columns) { - const struct ovsdb_column *column = node->data; - struct ovsdb_datum *datum = &txn_row->new->fields[column->index]; -+ struct ovsdb_datum added, removed, deleted_refs; - unsigned int orig_n, i; - bool zero = false; - - orig_n = datum->n; - -+ /* Collecting all key-value pairs that references deleted rows. */ -+ ovsdb_datum_init_empty(&deleted_refs); -+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { -+ if (column->index == weak->column_idx) { -+ ovsdb_datum_add_unsafe(&deleted_refs, &weak->key, &weak->value, -+ &column->type, NULL); -+ ovs_list_remove(&weak->src_node); -+ ovs_list_init(&weak->src_node); -+ } -+ } -+ ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type, -+ column->type.value.type); -+ -+ /* Removing elements that references deleted rows. */ -+ ovsdb_datum_subtract(datum, &column->type, -+ &deleted_refs, &column->type); -+ ovsdb_datum_destroy(&deleted_refs, &column->type); -+ -+ /* Generating the difference between old and new data. */ -+ if (txn_row->old) { -+ ovsdb_datum_added_removed(&added, &removed, -+ &txn_row->old->fields[column->index], -+ datum, &column->type); -+ } else { -+ ovsdb_datum_init_empty(&removed); -+ ovsdb_datum_clone(&added, datum, &column->type); -+ } -+ -+ /* Checking added data and creating new references. */ -+ ovsdb_datum_init_empty(&deleted_refs); - if (ovsdb_base_type_is_weak_ref(&column->type.key)) { -- for (i = 0; i < datum->n; ) { -- const struct ovsdb_row *row; -- -- row = ovsdb_table_get_row(column->type.key.uuid.refTable, -- &datum->keys[i].uuid); -- if (row) { -- add_weak_ref(txn_row->new, row); -- i++; -- } else { -- if (uuid_is_zero(&datum->keys[i].uuid)) { -- zero = true; -- } -- ovsdb_datum_remove_unsafe(datum, i, &column->type); -- } -+ for (i = 0; i < added.n; i++) { -+ find_and_add_weak_ref(txn_row, &added.keys[i], -+ added.values ? &added.values[i] : NULL, -+ column, true, &txn_row->added_refs, -+ &deleted_refs, &zero); - } - } - - if (ovsdb_base_type_is_weak_ref(&column->type.value)) { -- for (i = 0; i < datum->n; ) { -- const struct ovsdb_row *row; -- -- row = ovsdb_table_get_row(column->type.value.uuid.refTable, -- &datum->values[i].uuid); -- if (row) { -- add_weak_ref(txn_row->new, row); -- i++; -- } else { -- if (uuid_is_zero(&datum->values[i].uuid)) { -- zero = true; -- } -- ovsdb_datum_remove_unsafe(datum, i, &column->type); -- } -+ for (i = 0; i < added.n; i++) { -+ find_and_add_weak_ref(txn_row, &added.keys[i], -+ &added.values[i], -+ column, false, &txn_row->added_refs, -+ &deleted_refs, &zero); -+ } -+ } -+ if (deleted_refs.n) { -+ /* Removing all the references that doesn't point to valid rows. */ -+ ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type, -+ column->type.value.type); -+ ovsdb_datum_subtract(datum, &column->type, -+ &deleted_refs, &column->type); -+ ovsdb_datum_destroy(&deleted_refs, &column->type); -+ } -+ ovsdb_datum_destroy(&added, &column->type); -+ -+ /* Creating refs that needs to be removed on commit. This includes -+ * both: the references that got directly removed from the datum and -+ * references removed due to deletion of a referenced row. */ -+ if (ovsdb_base_type_is_weak_ref(&column->type.key)) { -+ for (i = 0; i < removed.n; i++) { -+ find_and_add_weak_ref(txn_row, &removed.keys[i], -+ removed.values -+ ? &removed.values[i] : NULL, -+ column, true, &txn_row->deleted_refs, -+ NULL, NULL); - } - } - -+ if (ovsdb_base_type_is_weak_ref(&column->type.value)) { -+ for (i = 0; i < removed.n; i++) { -+ find_and_add_weak_ref(txn_row, &removed.keys[i], -+ &removed.values[i], -+ column, false, &txn_row->deleted_refs, -+ NULL, NULL); -+ } -+ } -+ ovsdb_datum_destroy(&removed, &column->type); -+ - if (datum->n != orig_n) { - bitmap_set1(txn_row->changed, column->index); -- ovsdb_datum_sort_assert(datum, column->type.key.type); - if (datum->n < column->type.n_min) { - const struct uuid *row_uuid = ovsdb_row_get_uuid(txn_row->new); - if (zero && !txn_row->old) { -@@ -817,6 +942,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED, - return NULL; - } - -+static struct ovsdb_error * OVS_WARN_UNUSED_RESULT -+count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) -+{ -+ struct ovsdb_table *table = txn_row->table; -+ ssize_t n_atoms_old = 0, n_atoms_new = 0; -+ struct shash_node *node; -+ -+ SHASH_FOR_EACH (node, &table->schema->columns) { -+ const struct ovsdb_column *column = node->data; -+ const struct ovsdb_type *type = &column->type; -+ unsigned int idx = column->index; -+ -+ if (txn_row->old) { -+ n_atoms_old += txn_row->old->fields[idx].n; -+ if (type->value.type != OVSDB_TYPE_VOID) { -+ n_atoms_old += txn_row->old->fields[idx].n; -+ } -+ } -+ if (txn_row->new) { -+ n_atoms_new += txn_row->new->fields[idx].n; -+ if (type->value.type != OVSDB_TYPE_VOID) { -+ n_atoms_new += txn_row->new->fields[idx].n; -+ } -+ } -+ } -+ -+ txn->n_atoms += n_atoms_old + n_atoms_new; -+ txn->n_atoms_diff += n_atoms_new - n_atoms_old; -+ return NULL; -+} -+ - static struct ovsdb_error * OVS_WARN_UNUSED_RESULT - update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row) - { -@@ -885,6 +1041,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn) - return error; - } - -+ /* Count atoms. */ -+ error = for_each_txn_row(txn, count_atoms); -+ if (error) { -+ return OVSDB_WRAP_BUG("can't happen", error); -+ } -+ - /* Update _version for rows that changed. */ - error = for_each_txn_row(txn, update_version); - if (error) { -@@ -900,6 +1062,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn) - struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned); - ovs_list_init(&txn_cloned->txn_tables); - txn_cloned->txnid = txn->txnid; -+ txn_cloned->n_atoms = txn->n_atoms; -+ txn_cloned->n_atoms_diff = txn->n_atoms_diff; - - struct ovsdb_txn_table *t; - LIST_FOR_EACH (t, node, &txn->txn_tables) { -@@ -958,6 +1122,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn) - node->txn = ovsdb_txn_clone(txn); - ovs_list_push_back(&txn->db->txn_history, &node->node); - txn->db->n_txn_history++; -+ txn->db->n_txn_history_atoms += txn->n_atoms; - } - } - -@@ -968,6 +1133,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn) - if (!ovsdb_txn_is_empty(txn)) { - - txn->db->run_triggers_now = txn->db->run_triggers = true; -+ txn->db->n_atoms += txn->n_atoms_diff; - ovsdb_monitors_commit(txn->db, txn); - ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs)); - ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit)); -@@ -1215,6 +1381,9 @@ ovsdb_txn_row_create(struct ovsdb_txn *txn, struct ovsdb_table *table, - txn_row->n_refs = old ? old->n_refs : 0; - txn_row->serial = serial - 1; - -+ ovs_list_init(&txn_row->added_refs); -+ ovs_list_init(&txn_row->deleted_refs); -+ - if (old) { - old->txn_row = txn_row; - } -@@ -1423,12 +1592,20 @@ ovsdb_txn_history_run(struct ovsdb *db) - if (!db->need_txn_history) { - return; - } -- /* Remove old histories to limit the size of the history */ -- while (db->n_txn_history > 100) { -+ /* Remove old histories to limit the size of the history. Removing until -+ * the number of ovsdb atoms in history becomes less than the number of -+ * atoms in the database, because it will be faster to just get a database -+ * snapshot than re-constructing changes from the history that big. -+ * Keeping at least one transaction to avoid sending UUID_ZERO as a last id -+ * if all entries got removed due to the size limit. */ -+ while (db->n_txn_history > 1 && -+ (db->n_txn_history > 100 || -+ db->n_txn_history_atoms > db->n_atoms)) { - struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF( - ovs_list_pop_front(&db->txn_history), - struct ovsdb_txn_history_node, node); - -+ db->n_txn_history_atoms -= txn_h_node->txn->n_atoms; - ovsdb_txn_destroy_cloned(txn_h_node->txn); - free(txn_h_node); - db->n_txn_history--; -@@ -1440,6 +1617,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history) - { - db->need_txn_history = need_txn_history; - db->n_txn_history = 0; -+ db->n_txn_history_atoms = 0; - ovs_list_init(&db->txn_history); - } - -@@ -1458,4 +1636,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db) - free(txn_h_node); - } - db->n_txn_history = 0; -+ db->n_txn_history_atoms = 0; - } -diff --git a/python/ovs/db/data.py b/python/ovs/db/data.py -index 2a2102d6be..99bf80ed62 100644 ---- a/python/ovs/db/data.py -+++ b/python/ovs/db/data.py -@@ -204,7 +204,7 @@ class Atom(object): - else: - return '.boolean = false' - elif self.type == ovs.db.types.StringType: -- return '.string = "%s"' % escapeCString(self.value) -+ return '.s = %s' % escapeCString(self.value) - elif self.type == ovs.db.types.UuidType: - return '.uuid = %s' % ovs.ovsuuid.to_c_assignment(self.value) - -@@ -563,16 +563,41 @@ class Datum(object): - if n == 0: - return ["static struct ovsdb_datum %s = { .n = 0 };"] - -- s = ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)] -- for key in sorted(self.values): -- s += [" { %s }," % key.cInitAtom(key)] -- s += ["};"] -+ s = [] -+ if self.type.key.type == ovs.db.types.StringType: -+ s += ["static struct ovsdb_atom_string %s_key_strings[%d] = {" -+ % (name, n)] -+ for key in sorted(self.values): -+ s += [' { .string = "%s", .n_refs = 2 },' -+ % escapeCString(key.value)] -+ s += ["};"] -+ s += ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)] -+ for i in range(n): -+ s += [" { .s = &%s_key_strings[%d] }," % (name, i)] -+ s += ["};"] -+ else: -+ s = ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)] -+ for key in sorted(self.values): -+ s += [" { %s }," % key.cInitAtom(key)] -+ s += ["};"] - - if self.type.value: -- s = ["static union ovsdb_atom %s_values[%d] = {" % (name, n)] -- for k, v in sorted(self.values.items()): -- s += [" { %s }," % v.cInitAtom(v)] -- s += ["};"] -+ if self.type.value.type == ovs.db.types.StringType: -+ s += ["static struct ovsdb_atom_string %s_val_strings[%d] = {" -+ % (name, n)] -+ for k, v in sorted(self.values): -+ s += [' { .string = "%s", .n_refs = 2 },' -+ % escapeCString(v.value)] -+ s += ["};"] -+ s += ["static union ovsdb_atom %s_values[%d] = {" % (name, n)] -+ for i in range(n): -+ s += [" { .s = &%s_val_strings[%d] }," % (name, i)] -+ s += ["};"] -+ else: -+ s = ["static union ovsdb_atom %s_values[%d] = {" % (name, n)] -+ for k, v in sorted(self.values.items()): -+ s += [" { %s }," % v.cInitAtom(v)] -+ s += ["};"] - - s += ["static struct ovsdb_datum %s = {" % name] - s += [" .n = %d," % n] -diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py -index ecae5e1432..87ee06cdef 100644 ---- a/python/ovs/db/idl.py -+++ b/python/ovs/db/idl.py -@@ -1505,6 +1505,11 @@ class Transaction(object): - if self != self.idl.txn: - return self._status - -+ if self.idl.state != Idl.IDL_S_MONITORING: -+ self._status = Transaction.TRY_AGAIN -+ self.__disassemble() -+ return self._status -+ - # If we need a lock but don't have it, give up quickly. - if self.idl.lock_name and not self.idl.has_lock: - self._status = Transaction.NOT_LOCKED -diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py -index 626ae8fc44..3318a3b6f8 100644 ---- a/python/ovs/db/types.py -+++ b/python/ovs/db/types.py -@@ -48,6 +48,16 @@ class AtomicType(object): - def to_string(self): - return self.name - -+ def to_rvalue_string(self): -+ if self == StringType: -+ return 's->' + self.name -+ return self.name -+ -+ def to_lvalue_string(self): -+ if self == StringType: -+ return 's' -+ return self.name -+ - def to_json(self): - return self.name - -@@ -373,18 +383,7 @@ class BaseType(object): - return "%(dst)s = *%(src)s;" % args - return ("%(dst)s = %(src)s->header_.uuid;") % args - elif self.type == StringType: -- return "%(dst)s = xstrdup(%(src)s);" % args -- else: -- return "%(dst)s = %(src)s;" % args -- -- def assign_c_value_casting_away_const(self, dst, src, refTable=True): -- args = {'dst': dst, 'src': src} -- if self.ref_table_name: -- if not refTable: -- return "%(dst)s = *%(src)s;" % args -- return ("%(dst)s = %(src)s->header_.uuid;") % args -- elif self.type == StringType: -- return "%(dst)s = CONST_CAST(char *, %(src)s);" % args -+ return "%(dst)s = ovsdb_atom_string_create(%(src)s);" % args - else: - return "%(dst)s = %(src)s;" % args - -diff --git a/python/ovs/poller.py b/python/ovs/poller.py -index 3624ec8655..157719c3a4 100644 ---- a/python/ovs/poller.py -+++ b/python/ovs/poller.py -@@ -26,9 +26,9 @@ if sys.platform == "win32": - import ovs.winutils as winutils - - try: -- from OpenSSL import SSL -+ import ssl - except ImportError: -- SSL = None -+ ssl = None - - try: - from eventlet import patcher as eventlet_patcher -@@ -73,7 +73,7 @@ class _SelectSelect(object): - def register(self, fd, events): - if isinstance(fd, socket.socket): - fd = fd.fileno() -- if SSL and isinstance(fd, SSL.Connection): -+ if ssl and isinstance(fd, ssl.SSLSocket): - fd = fd.fileno() - - if sys.platform != 'win32': -diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py -index c4c6c87e9f..6b0d023ae3 100644 ---- a/python/ovs/reconnect.py -+++ b/python/ovs/reconnect.py -@@ -44,7 +44,7 @@ class Reconnect(object): - is_connected = False - - @staticmethod -- def deadline(fsm): -+ def deadline(fsm, now): - return None - - @staticmethod -@@ -56,7 +56,7 @@ class Reconnect(object): - is_connected = False - - @staticmethod -- def deadline(fsm): -+ def deadline(fsm, now): - return None - - @staticmethod -@@ -68,7 +68,7 @@ class Reconnect(object): - is_connected = False - - @staticmethod -- def deadline(fsm): -+ def deadline(fsm, now): - return fsm.state_entered + fsm.backoff - - @staticmethod -@@ -80,7 +80,7 @@ class Reconnect(object): - is_connected = False - - @staticmethod -- def deadline(fsm): -+ def deadline(fsm, now): - return fsm.state_entered + max(1000, fsm.backoff) - - @staticmethod -@@ -92,13 +92,24 @@ class Reconnect(object): - is_connected = True - - @staticmethod -- def deadline(fsm): -+ def deadline(fsm, now): - if fsm.probe_interval: - base = max(fsm.last_activity, fsm.state_entered) - expiration = base + fsm.probe_interval -- if (fsm.last_receive_attempt is None or -+ if (now < expiration or -+ fsm.last_receive_attempt is None or - fsm.last_receive_attempt >= expiration): -+ # We still have time before the expiration or the time has -+ # already passed and there was no activity. In the first -+ # case we need to wait for the expiration, in the second - -+ # we're already past the deadline. */ - return expiration -+ else: -+ # Time has already passed, but we didn't attempt to receive -+ # anything. We need to wake up and try to receive even if -+ # nothing is pending, so we can update the expiration time -+ # or transition to a different state. -+ return now + 1 - return None - - @staticmethod -@@ -114,12 +125,15 @@ class Reconnect(object): - is_connected = True - - @staticmethod -- def deadline(fsm): -+ def deadline(fsm, now): - if fsm.probe_interval: - expiration = fsm.state_entered + fsm.probe_interval -- if (fsm.last_receive_attempt is None or -+ if (now < expiration or -+ fsm.last_receive_attempt is None or - fsm.last_receive_attempt >= expiration): - return expiration -+ else: -+ return now + 1 - return None - - @staticmethod -@@ -134,7 +148,7 @@ class Reconnect(object): - is_connected = False - - @staticmethod -- def deadline(fsm): -+ def deadline(fsm, now): - return fsm.state_entered - - @staticmethod -@@ -545,7 +559,7 @@ class Reconnect(object): - returned if the "probe interval" is nonzero--see - self.set_probe_interval()).""" - -- deadline = self.state.deadline(self) -+ deadline = self.state.deadline(self, now) - if deadline is not None and now >= deadline: - return self.state.run(self, now) - else: -@@ -562,7 +576,7 @@ class Reconnect(object): - """Returns the number of milliseconds after which self.run() should be - called if nothing else notable happens in the meantime, or None if this - is currently unnecessary.""" -- deadline = self.state.deadline(self) -+ deadline = self.state.deadline(self, now) - if deadline is not None: - remaining = deadline - now - return max(0, remaining) -diff --git a/python/ovs/socket_util.py b/python/ovs/socket_util.py -index 3faa64e9d7..651012bf06 100644 ---- a/python/ovs/socket_util.py -+++ b/python/ovs/socket_util.py -@@ -222,8 +222,7 @@ def inet_parse_active(target, default_port): - return (host_name, port) - - --def inet_open_active(style, target, default_port, dscp): -- address = inet_parse_active(target, default_port) -+def inet_create_socket_active(style, address): - try: - is_addr_inet = is_valid_ipv4_address(address[0]) - if is_addr_inet: -@@ -235,23 +234,32 @@ def inet_open_active(style, target, default_port, dscp): - except socket.error as e: - return get_exception_errno(e), None - -+ return family, sock -+ -+ -+def inet_connect_active(sock, address, family, dscp): - try: - set_nonblocking(sock) - set_dscp(sock, family, dscp) -- try: -- sock.connect(address) -- except socket.error as e: -- error = get_exception_errno(e) -- if sys.platform == 'win32' and error == errno.WSAEWOULDBLOCK: -- # WSAEWOULDBLOCK would be the equivalent on Windows -- # for EINPROGRESS on Unix. -- error = errno.EINPROGRESS -- if error != errno.EINPROGRESS: -- raise -- return 0, sock -+ error = sock.connect_ex(address) -+ if error not in (0, errno.EINPROGRESS, errno.EWOULDBLOCK): -+ sock.close() -+ return error -+ return 0 - except socket.error as e: - sock.close() -- return get_exception_errno(e), None -+ return get_exception_errno(e) -+ -+ -+def inet_open_active(style, target, default_port, dscp): -+ address = inet_parse_active(target, default_port) -+ family, sock = inet_create_socket_active(style, address) -+ if sock is None: -+ return family, sock -+ error = inet_connect_active(sock, address, family, dscp) -+ if error: -+ return error, None -+ return 0, sock - - - def get_exception_errno(e): -diff --git a/python/ovs/stream.py b/python/ovs/stream.py -index f5a520862c..ac5b0fd0c6 100644 ---- a/python/ovs/stream.py -+++ b/python/ovs/stream.py -@@ -22,9 +22,9 @@ import ovs.socket_util - import ovs.vlog - - try: -- from OpenSSL import SSL -+ import ssl - except ImportError: -- SSL = None -+ ssl = None - - if sys.platform == 'win32': - import ovs.winutils as winutils -@@ -322,6 +322,12 @@ class Stream(object): - The recv function will not block waiting for data to arrive. If no - data have been received, it returns (errno.EAGAIN, "") immediately.""" - -+ try: -+ return self._recv(n) -+ except socket.error as e: -+ return (ovs.socket_util.get_exception_errno(e), "") -+ -+ def _recv(self, n): - retval = self.connect() - if retval != 0: - return (retval, "") -@@ -331,10 +337,7 @@ class Stream(object): - if sys.platform == 'win32' and self.socket is None: - return self.__recv_windows(n) - -- try: -- return (0, self.socket.recv(n)) -- except socket.error as e: -- return (ovs.socket_util.get_exception_errno(e), "") -+ return (0, self.socket.recv(n)) - - def __recv_windows(self, n): - if self._read_pending: -@@ -396,6 +399,12 @@ class Stream(object): - Will not block. If no bytes can be immediately accepted for - transmission, returns -errno.EAGAIN immediately.""" - -+ try: -+ return self._send(buf) -+ except socket.error as e: -+ return -ovs.socket_util.get_exception_errno(e) -+ -+ def _send(self, buf): - retval = self.connect() - if retval != 0: - return -retval -@@ -409,10 +418,7 @@ class Stream(object): - if sys.platform == 'win32' and self.socket is None: - return self.__send_windows(buf) - -- try: -- return self.socket.send(buf) -- except socket.error as e: -- return -ovs.socket_util.get_exception_errno(e) -+ return self.socket.send(buf) - - def __send_windows(self, buf): - if self._write_pending: -@@ -769,35 +775,42 @@ class SSLStream(Stream): - def check_connection_completion(sock): - try: - return Stream.check_connection_completion(sock) -- except SSL.SysCallError as e: -+ except ssl.SSLSyscallError as e: - return ovs.socket_util.get_exception_errno(e) - - @staticmethod - def needs_probes(): - return True - -- @staticmethod -- def verify_cb(conn, cert, errnum, depth, ok): -- return ok -- - @staticmethod - def _open(suffix, dscp): -- error, sock = TCPStream._open(suffix, dscp) -- if error: -- return error, None -+ address = ovs.socket_util.inet_parse_active(suffix, 0) -+ family, sock = ovs.socket_util.inet_create_socket_active( -+ socket.SOCK_STREAM, address) -+ if sock is None: -+ return family, sock - - # Create an SSL context -- ctx = SSL.Context(SSL.SSLv23_METHOD) -- ctx.set_verify(SSL.VERIFY_PEER, SSLStream.verify_cb) -- ctx.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3) -+ ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) -+ ctx.verify_mode = ssl.CERT_REQUIRED -+ ctx.options |= ssl.OP_NO_SSLv2 -+ ctx.options |= ssl.OP_NO_SSLv3 - # If the client has not set the SSL configuration files - # exception would be raised. -- ctx.use_privatekey_file(Stream._SSL_private_key_file) -- ctx.use_certificate_file(Stream._SSL_certificate_file) - ctx.load_verify_locations(Stream._SSL_ca_cert_file) -+ ctx.load_cert_chain(Stream._SSL_certificate_file, -+ Stream._SSL_private_key_file) -+ ssl_sock = ctx.wrap_socket(sock, do_handshake_on_connect=False) - -- ssl_sock = SSL.Connection(ctx, sock) -- ssl_sock.set_connect_state() -+ # Connect -+ error = ovs.socket_util.inet_connect_active(ssl_sock, address, family, -+ dscp) -+ if not error: -+ try: -+ ssl_sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) -+ except socket.error as e: -+ ssl_sock.close() -+ return ovs.socket_util.get_exception_errno(e), None - return error, ssl_sock - - def connect(self): -@@ -809,40 +822,44 @@ class SSLStream(Stream): - # TCP Connection is successful. Now do the SSL handshake - try: - self.socket.do_handshake() -- except SSL.WantReadError: -+ except ssl.SSLWantReadError: - return errno.EAGAIN -- except SSL.SysCallError as e: -+ except ssl.SSLSyscallError as e: - return ovs.socket_util.get_exception_errno(e) - - return 0 - - def recv(self, n): - try: -- return super(SSLStream, self).recv(n) -- except SSL.WantReadError: -+ return super(SSLStream, self)._recv(n) -+ except ssl.SSLWantReadError: - return (errno.EAGAIN, "") -- except SSL.SysCallError as e: -+ except ssl.SSLSyscallError as e: - return (ovs.socket_util.get_exception_errno(e), "") -- except SSL.ZeroReturnError: -+ except ssl.SSLZeroReturnError: - return (0, "") -+ except socket.error as e: -+ return (ovs.socket_util.get_exception_errno(e), "") - - def send(self, buf): - try: -- return super(SSLStream, self).send(buf) -- except SSL.WantWriteError: -+ return super(SSLStream, self)._send(buf) -+ except ssl.SSLWantWriteError: - return -errno.EAGAIN -- except SSL.SysCallError as e: -+ except ssl.SSLSyscallError as e: -+ return -ovs.socket_util.get_exception_errno(e) -+ except socket.error as e: - return -ovs.socket_util.get_exception_errno(e) - - def close(self): - if self.socket: - try: -- self.socket.shutdown() -- except SSL.Error: -+ self.socket.shutdown(socket.SHUT_RDWR) -+ except socket.error: - pass - return super(SSLStream, self).close() - - --if SSL: -+if ssl: - # Register SSL only if the OpenSSL module is available - Stream.register_method("ssl", SSLStream) -diff --git a/tests/alb.at b/tests/alb.at -index 903238fcb2..67eb14f473 100644 ---- a/tests/alb.at -+++ b/tests/alb.at -@@ -86,6 +86,52 @@ OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance - OVS_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([ALB - cross-numa]) -+OVS_VSWITCHD_START([add-port br0 p0 \ -+ -- set Interface p0 type=dummy-pmd options:n_rxq=4 \ -+ -- set Interface p0 options:numa_id=0 \ -+ -- set Open_vSwitch . other_config:pmd-cpu-mask=0x3 \ -+ -- set open_vswitch . other_config:pmd-rxq-assign=group \ -+ -- set open_vswitch . other_config:pmd-rxq-isolate=false \ -+ -- set open_vswitch . other_config:pmd-auto-lb="true" \ -+ -- set open_vswitch . other_config:pmd-auto-lb-load-threshold=0], -+ [], [], [--dummy-numa 1,2,1,2]) -+OVS_WAIT_UNTIL([grep "PMD auto load balance is enabled" ovs-vswitchd.log]) -+AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg]) -+ -+# no pinned rxqs - cross-numa pmd could change -+get_log_next_line_num -+ovs-appctl time/warp 600000 10000 -+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) -+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) -+ -+# all pinned rxqs - cross-numa pmd will not change -+AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1,3:1']) -+get_log_next_line_num -+ovs-appctl time/warp 600000 10000 -+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) -+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "Variance improvement 0%."]) -+ -+# mix of pinned (non-isolated) and non-pinned rxqs - cross-numa pmd could change -+AT_CHECK([ovs-vsctl remove Interface p0 other_config pmd-rxq-affinity]) -+AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1']) -+get_log_next_line_num -+ovs-appctl time/warp 600000 10000 -+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) -+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) -+ -+# mix of pinned (isolated) and non-pinned rxqs - cross-numa pmd could change -+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf]) -+AT_CHECK([ovs-vsctl set Interface p0 options:n_rxq=6]) -+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-isolate=true]) -+get_log_next_line_num -+ovs-appctl time/warp 600000 10000 -+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) -+OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) -+ -+OVS_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_SETUP([ALB - PMD/RxQ assignment type]) - OVS_VSWITCHD_START([add-port br0 p0 \ - -- set Interface p0 type=dummy-pmd options:n_rxq=3 \ -diff --git a/tests/classifier.at b/tests/classifier.at -index cdcd72c156..f652b59837 100644 ---- a/tests/classifier.at -+++ b/tests/classifier.at -@@ -129,6 +129,31 @@ Datapath actions: 3 - OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"]) - AT_CLEANUP - -+AT_SETUP([flow classifier - ipv6 ND dependency]) -+OVS_VSWITCHD_START -+add_of_ports br0 1 2 -+AT_DATA([flows.txt], [dnl -+ table=0,priority=100,ipv6,ipv6_src=1000::/10 actions=resubmit(,1) -+ table=0,priority=0 actions=NORMAL -+ table=1,priority=110,ipv6,ipv6_dst=1000::3 actions=resubmit(,2) -+ table=1,priority=100,ipv6,ipv6_dst=1000::4 actions=resubmit(,2) -+ table=1,priority=0 actions=NORMAL -+ table=2,priority=120,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1 actions=NORMAL -+ table=2,priority=100,tcp actions=NORMAL -+ table=2,priority=100,icmp6 actions=NORMAL -+ table=2,priority=0 actions=NORMAL -+]) -+AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -+ -+# test ICMPv6 echo request (which should have no nd_target field) -+AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=1,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,icmpv6_type=128,icmpv6_code=0"], [0], [stdout]) -+AT_CHECK([tail -2 stdout], [0], -+ [Megaflow: recirc_id=0,eth,icmp6,in_port=1,dl_src=f6:d2:b0:19:5e:7b,dl_dst=d2:49:19:91:78:fe,ipv6_src=1000::/10,ipv6_dst=1000::4,nw_ttl=0,nw_frag=no -+Datapath actions: 100,2 -+]) -+OVS_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_BANNER([conjunctive match]) - - AT_SETUP([single conjunctive match]) -diff --git a/tests/flowgen.py b/tests/flowgen.py -index 7ef32d13cb..cb0e9df388 100755 ---- a/tests/flowgen.py -+++ b/tests/flowgen.py -@@ -135,7 +135,7 @@ def output(attrs): - 12893) # urgent pointer - if attrs['TP_PROTO'] == 'TCP+options': - tcp = (tcp[:12] -- + struct.pack('H', (6 << 12) | 0x02 | 0x10) -+ + struct.pack('>H', (6 << 12) | 0x02 | 0x10) - + tcp[14:]) - tcp += struct.pack('>BBH', 2, 4, 1975) # MSS option - tcp += b'payload' -@@ -166,15 +166,15 @@ def output(attrs): - ip = ip[:2] + struct.pack('>H', len(ip)) + ip[4:] - packet += ip - if attrs['DL_HEADER'].startswith('802.2'): -- packet_len = len(packet) -+ packet_len = len(packet) - 14 - if flow['DL_VLAN'] != 0xffff: - packet_len -= 4 - packet = (packet[:len_ofs] - + struct.pack('>H', packet_len) - + packet[len_ofs + 2:]) - -- print(' '.join(['%s=%s' for k, v in attrs.items()])) -- print(' '.join(['%s=%s' for k, v in flow.items()])) -+ print(' '.join(['%s=%s' % (k, v) for k, v in attrs.items()])) -+ print(' '.join(['%s=%s' % (k, v) for k, v in flow.items()])) - print() - - flows.write(struct.pack('>LH', -diff --git a/tests/library.at b/tests/library.at -index 1702b7556b..e27d9e8bce 100644 ---- a/tests/library.at -+++ b/tests/library.at -@@ -247,7 +247,7 @@ AT_CHECK([ovstest test-ofpbuf], [0], []) - AT_CLEANUP - - AT_SETUP([rcu]) --AT_CHECK([ovstest test-rcu-quiesce], [0], []) -+AT_CHECK([ovstest test-rcu], [0], []) - AT_CLEANUP - - AT_SETUP([stopwatch module]) -diff --git a/tests/mcast-snooping.at b/tests/mcast-snooping.at -index 757cf7186e..fe475e7b38 100644 ---- a/tests/mcast-snooping.at -+++ b/tests/mcast-snooping.at -@@ -216,3 +216,70 @@ AT_CHECK([ovs-appctl mdb/show br0], [0], [dnl - ]) - - AT_CLEANUP -+ -+ -+AT_SETUP([mcast - igmp flood for non-snoop enabled]) -+OVS_VSWITCHD_START([]) -+ -+AT_CHECK([ -+ ovs-vsctl set bridge br0 \ -+ datapath_type=dummy], [0]) -+ -+add_of_ports br0 1 2 -+ -+AT_CHECK([ovs-ofctl add-flow br0 action=normal]) -+ -+ovs-appctl time/stop -+ -+dnl Basic scenario - needs to flood for IGMP followed by unicast ICMP -+dnl in reverse direction -+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ -+ '0101000c29a0aa55aa550001080046c00028000040000102d3494565eb4ae0000016940400002200f9020000000104000000e00000fb000000000000']) -+AT_CHECK([ovs-appctl netdev-dummy/receive p2 \ -+ 'aa55aa5500010101000c29a008004500001c00010000400164dc0a0101010a0101020800f7ffffffffff']) -+ -+ -+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -e .*ipv4 | sort | dnl -+ strip_stats | strip_used | strip_recirc | dnl -+ sed -e 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], -+ [0], [dnl -+recirc_id(),in_port(1),eth(src=aa:55:aa:55:00:01,dst=01:01:00:0c:29:a0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:100,2 -+recirc_id(),in_port(2),eth(src=01:01:00:0c:29:a0,dst=aa:55:aa:55:00:01),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:1 -+]) -+ -+ovs-appctl time/warp 100000 -+ -+dnl Next we should clear the flows and install a complex case -+AT_CHECK([ovs-ofctl del-flows br0]) -+ -+AT_DATA([flows.txt], [dnl -+table=0, arp actions=NORMAL -+table=0, ip,in_port=1 actions=ct(table=1,zone=64000) -+table=0, in_port=2 actions=output:1 -+table=1, ip,ct_state=+trk+inv actions=drop -+table=1 ip,in_port=1,icmp,ct_state=+trk+new actions=output:2 -+table=1, in_port=1,ip,ct_state=+trk+new actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) -+table=1, in_port=1,ip,ct_state=+trk+est actions=output:2 -+]) -+AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -+ -+ovs-appctl time/warp 100000 -+ -+dnl Send the IGMP, followed by a unicast ICMP - ensure we won't black hole -+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ -+ '0101000c29a0aa55aa550001080046c00028000040000102d3494565eb4ae0000016940400002200f9020000000104000000e00000fb000000000000']) -+AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ -+ 'aa55aa550001aa55aa55000208004500001c00010000400164dc0a0101010a0101020800f7ffffffffff']) -+ -+ -+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -e .*ipv4 | sort | dnl -+ strip_stats | strip_used | strip_recirc | dnl -+ sed 's/pid=[[0-9]]*,// -+ s/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], -+ [0], [dnl -+ct_state(+new-inv+trk),recirc_id(),in_port(1),eth_type(0x0800),ipv4(proto=1,frag=no), packets:0, bytes:0, used:never, actions:2 -+ct_state(+new-inv+trk),recirc_id(),in_port(1),eth_type(0x0800),ipv4(proto=2,frag=no), packets:0, bytes:0, used:never, actions:userspace(controller(reason=1,dont_send=0,continuation=0,recirc_id=,rule_cookie=0,controller_id=0,max_len=65535)) -+recirc_id(),in_port(1),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:0.0s, actions:ct(zone=64000),recirc() -+]) -+ -+AT_CLEANUP -diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at -index 956a69e1fa..266ed801e6 100644 ---- a/tests/ofproto-dpif.at -+++ b/tests/ofproto-dpif.at -@@ -81,11 +81,12 @@ recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff: - - ovs-appctl netdev-dummy/set-admin-state p1 up - ovs-appctl time/warp 100 --OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl -+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl - ---- bond0 ---- - bond_mode: active-backup - bond may use recirculation: no, - bond-hash-basis: 0 -+lb_output action: disabled, bond-id: -1 - updelay: 0 ms - downdelay: 0 ms - lacp_status: off -@@ -99,7 +100,6 @@ member p1: enabled - - member p2: enabled - may_enable: true -- - ]) - - OVS_VSWITCHD_STOP -@@ -129,11 +129,12 @@ ovs-appctl time/warp 100 - OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'member p1: disabled'`"]) - ovs-appctl netdev-dummy/set-admin-state p1 up - ovs-appctl time/warp 100 --OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl -+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl - ---- bond0 ---- - bond_mode: active-backup - bond may use recirculation: no, - bond-hash-basis: 0 -+lb_output action: disabled, bond-id: -1 - updelay: 0 ms - downdelay: 0 ms - lacp_status: off -@@ -150,7 +151,6 @@ member p2: enabled - - member p3: enabled - may_enable: true -- - ]) - - dnl Now delete the primary and verify that the output shows that the -@@ -171,11 +171,12 @@ ovs-vsctl \ - --id=@p1 create Interface name=p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ - set Port bond0 interfaces="$uuids, @p1]" - ovs-appctl time/warp 100 --OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl -+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl - ---- bond0 ---- - bond_mode: active-backup - bond may use recirculation: no, - bond-hash-basis: 0 -+lb_output action: disabled, bond-id: -1 - updelay: 0 ms - downdelay: 0 ms - lacp_status: off -@@ -192,17 +193,17 @@ member p2: enabled - - member p3: enabled - may_enable: true -- - ]) - - dnl Switch to another primary - ovs-vsctl set port bond0 other_config:bond-primary=p2 - ovs-appctl time/warp 100 --OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl -+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl - ---- bond0 ---- - bond_mode: active-backup - bond may use recirculation: no, - bond-hash-basis: 0 -+lb_output action: disabled, bond-id: -1 - updelay: 0 ms - downdelay: 0 ms - lacp_status: off -@@ -211,25 +212,25 @@ active-backup primary: p2 - - - member p1: enabled -- active member - may_enable: true - - member p2: enabled -+ active member - may_enable: true - - member p3: enabled - may_enable: true -- - ]) - - dnl Remove the "bond-primary" config directive from the bond. - AT_CHECK([ovs-vsctl remove Port bond0 other_config bond-primary]) - ovs-appctl time/warp 100 --OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl -+OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl - ---- bond0 ---- - bond_mode: active-backup - bond may use recirculation: no, - bond-hash-basis: 0 -+lb_output action: disabled, bond-id: -1 - updelay: 0 ms - downdelay: 0 ms - lacp_status: off -@@ -238,15 +239,14 @@ active-backup primary: - - - member p1: enabled -- active member - may_enable: true - - member p2: enabled -+ active member - may_enable: true - - member p3: enabled - may_enable: true -- - ]) - - OVS_VSWITCHD_STOP -@@ -4862,6 +4862,54 @@ recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,fr - OVS_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([ofproto-dpif - handling of malformed TCP packets]) -+OVS_VSWITCHD_START -+add_of_ports br0 1 90 -+ -+dnl drop packet has tcp port 0-f but allow other tcp packets -+AT_DATA([flows.txt], [dnl -+priority=75 tcp tp_dst=0/0xfff0 actions=drop -+priority=50 tcp actions=output:1 -+]) -+AT_CHECK([ovs-ofctl replace-flows br0 flows.txt]) -+ -+dnl good tcp pkt, tcp(sport=100,dpor=16) -+pkt1="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a141414140064001000000000000000005002200053330000" -+ -+dnl malformed tcp pkt(tcp_hdr < 20 byte), tcp(sport=100,dport=16,dataofs=1) -+pkt2="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a141414140064001000000000000000001002200093330000" -+ -+dnl malformed tcp pkt(tcp_hdr > pkt_len), tcp(sport=100,dport=16,dataofs=15) -+pkt3="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a14141414006400100000000000000000f002200093330000" -+ -+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) -+ -+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt1"], [0], [stdout]) -+dnl for good tcp pkt, ovs can extract the tp_dst=16 -+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl -+flow-dump from the main thread: -+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=16/0xfff0), packets:0, bytes:0, used:never, actions:1 -+]) -+ -+AT_CHECK([ovs-appctl revalidator/purge], [0], [stdout]) -+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt2"], [0], [stdout]) -+dnl for malformed tcp pkt(tcp_hdr < 20 byte), ovs uses default value tp_dst=0 -+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl -+flow-dump from the main thread: -+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=0/0xfff0), packets:0, bytes:0, used:never, actions:drop -+]) -+ -+AT_CHECK([ovs-appctl revalidator/purge], [0], [stdout]) -+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt3"], [0], [stdout]) -+dnl for malformed tcp pkt(tcp_hdr > pkt_len), ovs uses default value tp_dst=0 -+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl -+flow-dump from the main thread: -+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=0/0xfff0), packets:0, bytes:0, used:never, actions:drop -+]) -+ -+OVS_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_SETUP([ofproto-dpif - exit]) - OVS_VSWITCHD_START - add_of_ports br0 1 2 3 10 11 12 13 14 -@@ -5525,7 +5573,36 @@ check_flows () { - echo "n_packets=$n" - test "$n" = 1 - } --OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0]) -+OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) -+ -+OVS_VSWITCHD_STOP -+AT_CLEANUP -+ -+# Checks for regression against a bug in which OVS crashed -+# with in_port=OFPP_NONE or in_port=OFPP_CONTROLLER and -+# recirculation is involved. -+AT_SETUP([ofproto-dpif - packet-out recirculation with OFPP_NONE and OFPP_CONTROLLER]) -+OVS_VSWITCHD_START -+add_of_ports br0 1 2 -+ -+AT_DATA([flows.txt], [dnl -+table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1) -+table=1 ip actions=ct(commit),normal -+]) -+AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -+ -+packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000 -+AT_CHECK([ovs-ofctl packet-out br0 "in_port=none,packet=$packet actions=table"]) -+AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller,packet=$packet actions=table"]) -+ -+# Dumps out the flow table, extracts the number of packets that have gone -+# through the (single) flow in table 1, and returns success if it's exactly 2. -+check_flows () { -+ n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p') -+ echo "n_packets=$n" -+ test "$n" = 2 -+} -+OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) - - OVS_VSWITCHD_STOP - AT_CLEANUP -@@ -7524,7 +7601,7 @@ dnl configure bridge IPFIX and ensure that sample action generation works at the - dnl datapath level. - AT_SETUP([ofproto-dpif - Bridge IPFIX sanity check]) - OVS_VSWITCHD_START --add_of_ports br0 1 2 -+add_of_ports br0 1 2 3 - - dnl Sample every packet using bridge-based sampling. - AT_CHECK([ovs-vsctl -- set bridge br0 ipfix=@fix -- \ -@@ -7540,6 +7617,28 @@ flow-dump from the main thread: - packets:2, bytes:68, used:0.001s, actions:userspace(pid=0,ipfix(output_port=4294967295)) - ]) - -+AT_CHECK([ovs-appctl revalidator/purge]) -+ -+dnl Check sample is performed even if only one of the ports is present. -+AT_DATA([flows.txt], [dnl -+table=0,in_port=3,tcp actions=load:0xffff->NXM_OF_IN_PORT[],ct(zone=1,table=1) -+table=1,tcp, actions=output:2 -+]) -+AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -+ -+for i in `seq 1 3`; do -+ AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:08,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no)']) -+done -+ -+AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/.*\(packets:\)/\1/' | sed 's/used:[[0-9]].[[0-9]]*s/used:0.001s/'], [0], [dnl -+flow-dump from the main thread: -+packets:2, bytes:236, used:0.001s, actions:userspace(pid=0,ipfix(output_port=2)),2 -+packets:2, bytes:236, used:0.001s, actions:userspace(pid=0,ipfix(output_port=4294967295)),ct(zone=1),recirc(0x1) -+]) -+ -+AT_CHECK([ovs-ofctl del-flows br0 in_port=3]) -+AT_CHECK([ovs-ofctl del-flows br0 table=1]) -+ - AT_CHECK([ovs-appctl revalidator/purge]) - dnl - dnl Add a slowpath meter. The userspace action should be metered. -@@ -8591,6 +8690,34 @@ AT_CHECK([sed -n 's/=[[0-9]][[0-9]]\(\.[[0-9]][[0-9]]*\)\{0,1\}s/=?s/p' stdout], - OVS_VSWITCHD_STOP - AT_CLEANUP - -+ -+AT_SETUP([ofproto-dpif - patch ports - meter (clone)]) -+ -+OVS_VSWITCHD_START( -+ [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 -- \ -+ add-port br0 p1 -- set Interface p1 type=patch \ -+ options:peer=p2 ofport_request=2 -- \ -+ add-br br1 -- \ -+ set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ -+ set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ -+ fail-mode=secure -- \ -+ add-port br1 p2 -- set Interface p2 type=patch \ -+ options:peer=p1 -- \ -+ add-port br1 p3 -- set Interface p3 type=dummy ofport_request=3]) -+ -+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br1 'meter=1 pktps stats bands=type=drop rate=2']) -+AT_CHECK([ovs-ofctl del-flows br0]) -+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 in_port=local,ip,actions=2,1]) -+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 in_port=1,ip,actions=meter:1,3]) -+ -+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=10.1.1.22,dst=10.0.0.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=53295,dst=8080)'], [0], [stdout]) -+AT_CHECK([tail -1 stdout], [0], -+ [Datapath actions: clone(meter(0),3),1 -+]) -+ -+OVS_VSWITCHD_STOP -+AT_CLEANUP -+ - dnl ---------------------------------------------------------------------- - AT_BANNER([ofproto-dpif -- megaflows]) - -@@ -9695,6 +9822,26 @@ OFPST_TABLE reply (OF1.3) (xid=0x2): - OVS_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([ofproto-dpif packet-out table meter drop]) -+OVS_VSWITCHD_START -+add_of_ports br0 1 2 -+ -+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps bands=type=drop rate=1']) -+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 'in_port=1 action=meter:1,output:2']) -+ -+ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)" -+ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)" -+ -+# Check that vswitchd hasn't crashed by dumping the meter added above -+AT_CHECK([ovs-ofctl -O OpenFlow13 dump-meters br0 | ofctl_strip], [0], [dnl -+OFPST_METER_CONFIG reply (OF1.3): -+meter=1 pktps bands= -+type=drop rate=1 -+]) -+ -+OVS_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_SETUP([ofproto-dpif - ICMPv6]) - OVS_VSWITCHD_START - add_of_ports br0 1 -@@ -11404,6 +11551,23 @@ Megaflow: recirc_id=0x3,eth,ip,in_port=1,nw_frag=no - Datapath actions: 4 - ]) - -+ovs-ofctl del-flows br0 -+ -+AT_DATA([flows.txt], [dnl -+table=0,in_port=1 actions=load:0x1->NXM_NX_REG1[[]],resubmit(,1),load:0x2->NXM_NX_REG1[[]],resubmit(,1),load:0x3->NXM_NX_REG1[[]],resubmit(,1) -+table=1,in_port=1,reg1=0x1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,4) -+table=1,in_port=1,reg1=0x2 actions=output:2 -+table=1,in_port=1,reg1=0x3 actions=output:4 -+table=4,in_port=1 actions=output:3 -+]) -+ -+AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) -+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) -+AT_CHECK([cat stdout | grep Datapath -B1], [0], [dnl -+Megaflow: recirc_id=0,eth,ip,in_port=1,nw_frag=no -+Datapath actions: check_pkt_len(size=200,gt(3),le(3)),2,4 -+]) -+ - OVS_VSWITCHD_STOP - AT_CLEANUP - -diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at -index 736d9809cb..f906b5c3b5 100644 ---- a/tests/ofproto-macros.at -+++ b/tests/ofproto-macros.at -@@ -134,6 +134,21 @@ strip_ufid () { - sed 's/mega_ufid:[[-0-9a-f]]* // - s/ufid:[[-0-9a-f]]* //' - } -+ -+# Strips packets: and bytes: from output -+strip_stats () { -+ sed 's/packets:[[0-9]]*/packets:0/ -+ s/bytes:[[0-9]]*/bytes:0/' -+} -+ -+# Changes all 'recirc(...)' and 'recirc=...' to say 'recirc()' and -+# 'recirc=' respectively. This should make output easier to -+# compare. -+strip_recirc() { -+ sed 's/recirc_id([[x0-9]]*)/recirc_id()/ -+ s/recirc_id=[[x0-9]]*/recirc_id=/ -+ s/recirc([[x0-9]]*)/recirc()/' -+} - m4_divert_pop([PREPARE_TESTS]) - - m4_define([TESTABLE_LOG], [-vPATTERN:ANY:'%c|%p|%m']) -diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at -index 66545da572..e6c5bc6e94 100644 ---- a/tests/ovs-macros.at -+++ b/tests/ovs-macros.at -@@ -259,7 +259,20 @@ dnl Executes shell COMMAND in a loop until it returns zero. If COMMAND does - dnl not return zero within a reasonable time limit, executes the commands - dnl in IF-FAILED (if provided) and fails the test. - m4_define([OVS_WAIT_UNTIL], -- [OVS_WAIT([$1], [$2], [AT_LINE], [until $1])]) -+ [AT_FAIL_IF([test "$#" -ge 3]) -+ dnl The second argument should not be a number (confused with AT_CHECK ?). -+ AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null]) -+ OVS_WAIT([$1], [$2], [AT_LINE], [until $1])]) -+ -+dnl OVS_WAIT_UNTIL_EQUAL(COMMAND, OUTPUT) -+dnl -+dnl Executes shell COMMAND in a loop until it returns zero and the output -+dnl equals OUTPUT. If COMMAND does not return zero or a desired output within -+dnl a reasonable time limit, fails the test. -+m4_define([OVS_WAIT_UNTIL_EQUAL], -+ [AT_FAIL_IF([test "$#" -ge 3]) -+ echo "$2" > wait_until_expected -+ OVS_WAIT_UNTIL([$1 | diff -u wait_until_expected - ])]) - - dnl OVS_WAIT_WHILE(COMMAND, [IF-FAILED]) - dnl -@@ -267,7 +280,10 @@ dnl Executes shell COMMAND in a loop until it returns nonzero. If COMMAND does - dnl not return nonzero within a reasonable time limit, executes the commands - dnl in IF-FAILED (if provided) and fails the test. - m4_define([OVS_WAIT_WHILE], -- [OVS_WAIT([if $1; then return 1; else return 0; fi], [$2], -+ [AT_FAIL_IF([test "$#" -ge 3]) -+ dnl The second argument should not be a number (confused with AT_CHECK ?). -+ AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null]) -+ OVS_WAIT([if $1; then return 1; else return 0; fi], [$2], - [AT_LINE], [while $1])]) - - dnl OVS_APP_EXIT_AND_WAIT(DAEMON) -diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at -index 604f15c2d1..c93cb9f16c 100644 ---- a/tests/ovs-ofctl.at -+++ b/tests/ovs-ofctl.at -@@ -3246,3 +3246,22 @@ dnl because we need ovs-vswitchd to have the controller config before starting - dnl the controller to 'snoop' the OpenFlow messages from beginning - OVS_VSWITCHD_STOP(["/connection failed (No such file or directory)/d"]) - AT_CLEANUP -+ -+ -+AT_SETUP([ovs-ofctl show-flows - Oversized flow]) -+OVS_VSWITCHD_START -+ -+printf " priority=90,icmp,reg15=0x8005,metadata=0x1,nw_dst=11.0.0.1,icmp_type=8,icmp_code=0 actions=" > flow.txt -+for i in `seq 1 1022`; do printf "set_field:0x399->reg13,set_field:0x$i->reg15,resubmit(,39),"; done >> flow.txt -+printf "resubmit(,39)\n" >> flow.txt -+ -+AT_CHECK([ovs-ofctl -O OpenFlow15 add-flows br0 flow.txt]) -+ -+AT_CHECK([ovs-ofctl -O OpenFlow10 dump-flows br0 | ofctl_strip | sed '/NXST_FLOW/d' | sort], [0], []) -+OVS_WAIT_UNTIL([grep -q "ofp_flow|WARN|Flow exceeded the maximum flow statistics reply size and was excluded from the response set" ovs-vswitchd.log]) -+ -+cat flow.txt > expout -+AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sed '/OFPST_FLOW/d' | sort], [0], [expout]) -+ -+OVS_VSWITCHD_STOP(["/Flow exceeded the maximum flow statistics reply size and was excluded from the response set/d"]) -+AT_CLEANUP -diff --git a/tests/ovsdb-client.at b/tests/ovsdb-client.at -index 06b671df8c..2d14f1ac26 100644 ---- a/tests/ovsdb-client.at -+++ b/tests/ovsdb-client.at -@@ -3,6 +3,7 @@ AT_BANNER([OVSDB -- ovsdb-client commands]) - AT_SETUP([ovsdb-client get-schema-version and get-schema-cksum]) - AT_KEYWORDS([ovsdb client positive]) - ordinal_schema > schema -+on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) - AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) - AT_CHECK([ovsdb-client get-schema-version unix:socket ordinals], [0], [5.1.3 -@@ -14,6 +15,7 @@ AT_CLEANUP - - AT_SETUP([ovsdb-client needs-conversion (no conversion needed)]) - AT_KEYWORDS([ovsdb client file positive]) -+on_exit 'kill `cat *.pid`' - ordinal_schema > schema - touch .db.~lock~ - AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) -@@ -27,6 +29,7 @@ AT_SETUP([ovsdb-client needs-conversion (conversion needed)]) - AT_KEYWORDS([ovsdb client file positive]) - ordinal_schema > schema - touch .db.~lock~ -+on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) - AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) - sed 's/5\.1\.3/5.1.4/' < schema > schema2 -diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at -index fc6253cfe9..0f7076a052 100644 ---- a/tests/ovsdb-cluster.at -+++ b/tests/ovsdb-cluster.at -@@ -400,6 +400,61 @@ done - - AT_CLEANUP - -+AT_BANNER([OVSDB - cluster failure while joining]) -+AT_SETUP([OVSDB cluster - follower crash while joining]) -+AT_KEYWORDS([ovsdb server negative unix cluster join]) -+ -+n=3 -+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` -+ordinal_schema > schema -+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db dnl -+ $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) -+cid=`ovsdb-tool db-cid s1.db` -+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` -+for i in `seq 2 $n`; do -+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) -+done -+ -+on_exit 'kill `cat *.pid`' -+ -+dnl Starting followers first, so we can configure them to crash on join. -+for j in `seq $n`; do -+ i=$(($n + 1 - $j)) -+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl -+ --detach --no-chdir --log-file=s$i.log dnl -+ --pidfile=s$i.pid --unixctl=s$i dnl -+ --remote=punix:s$i.ovsdb s$i.db]) -+ if test $i != 1; then -+ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i dnl -+ cluster/failure-test crash-before-sending-install-snapshot-reply dnl -+ | grep -q "engaged"]) -+ fi -+done -+ -+dnl Make sure that followers really crashed. -+for i in `seq 2 $n`; do -+ OVS_WAIT_WHILE([test -s s$i.pid]) -+done -+ -+dnl Bring them back. -+for i in `seq 2 $n`; do -+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl -+ --detach --no-chdir --log-file=s$i.log dnl -+ --pidfile=s$i.pid --unixctl=s$i dnl -+ --remote=punix:s$i.ovsdb s$i.db]) -+done -+ -+dnl Make sure that all servers joined the cluster. -+for i in `seq $n`; do -+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) -+done -+ -+for i in `seq $n`; do -+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) -+done -+ -+AT_CLEANUP -+ - - - OVS_START_SHELL_HELPERS -@@ -413,12 +468,12 @@ ovsdb_cluster_failure_test () { - if test "$crash_node" == "1"; then - new_leader=$5 - fi -+ log_grep=$6 - - cp $top_srcdir/vswitchd/vswitch.ovsschema schema - schema=`ovsdb-tool schema-name schema` -- AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl --ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns --]) -+ AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) -+ AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"]) - - n=3 - join_cluster() { -@@ -434,7 +489,7 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral - start_server() { - local i=$1 - printf "\ns$i: starting\n" -- AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) -+ AT_CHECK([ovsdb-server -vjsonrpc -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) - } - connect_server() { - local i=$1 -@@ -460,14 +515,23 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral - fi - AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore]) - fi -+ -+ # Initializing the database separately to avoid extra 'wait' operation -+ # in later transactions. -+ AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait init], [0], [ignore], [ignore]) -+ - AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore]) - AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait create QoS type=x], [0], [ignore], [ignore]) - -- # Make sure that the node really crashed. -- AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore]) -- # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed). -- if test "$remote_1" = "$crash_node"; then -- db=unix:s$remote_2.ovsdb -+ # Make sure that the node really crashed or has specific log message. -+ if test -z "$log_grep"; then -+ AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore]) -+ # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed). -+ if test "$remote_1" = "$crash_node"; then -+ db=unix:s$remote_2.ovsdb -+ fi -+ else -+ OVS_WAIT_UNTIL([grep -q "$log_grep" s${crash_node}.log]) - fi - AT_CHECK([ovs-vsctl --db="$db" --no-leader-only --no-wait --columns=type --bare list QoS], [0], [x - ]) -@@ -563,6 +627,11 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) - ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update - AT_CLEANUP - -+AT_SETUP([OVSDB cluster - txn on leader, leader transfers leadership after sending appendReq]) -+AT_KEYWORDS([ovsdb server negative unix cluster pending-txn transfer]) -+ovsdb_cluster_failure_test 1 2 1 transfer-leadership-after-sending-append-request -1 "Transferring leadership" -+AT_CLEANUP -+ - - AT_SETUP([OVSDB cluster - competing candidates]) - AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates]) -@@ -629,9 +698,8 @@ ovsdb_torture_test () { - local variant=$3 # 'kill' and restart or 'remove' and add - cp $top_srcdir/vswitchd/vswitch.ovsschema schema - schema=`ovsdb-tool schema-name schema` -- AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl --ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns --]) -+ AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) -+ AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"]) - - join_cluster() { - local i=$1 -diff --git a/tests/ovsdb-data.at b/tests/ovsdb-data.at -index 8cd2a26cb3..25c6acdac6 100644 ---- a/tests/ovsdb-data.at -+++ b/tests/ovsdb-data.at -@@ -846,18 +846,21 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- integer], - [[diff-data '["integer"]' '[0]' '[2]']], - [[diff: 2 - apply diff: 2 -+apply diff in place: 2 - OK]]) - - OVSDB_CHECK_POSITIVE([generate and apply diff -- boolean], - [[diff-data '["boolean"]' '[true]' '[false]']], - [[diff: false - apply diff: false -+apply diff in place: false - OK]]) - - OVSDB_CHECK_POSITIVE([generate and apply diff -- string], - [[diff-data '["string"]' '["AAA"]' '["BBB"]']], - [[diff: "BBB" - apply diff: "BBB" -+apply diff in place: "BBB" - OK]]) - - dnl Test set modifications. -@@ -870,15 +873,19 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- set], - ]], - [[diff: ["set",[0,2]] - apply diff: ["set",[1,2]] -+apply diff in place: ["set",[1,2]] - OK - diff: 0 - apply diff: 1 -+apply diff in place: 1 - OK - diff: ["set",[0,1]] - apply diff: ["set",[0,1]] -+apply diff in place: ["set",[0,1]] - OK - diff: ["set",[0,1]] - apply diff: ["set",[]] -+apply diff in place: ["set",[]] - OK]]) - - dnl Test set modifications causes data to violate set size constrain. -@@ -898,18 +905,23 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- map], - ]], - [[diff: ["map",[["2 gills","1 chopin"],["2 pints","1 quart"]]] - apply diff: ["map",[["2 pints","1 quart"]]] -+apply diff in place: ["map",[["2 pints","1 quart"]]] - OK - diff: ["map",[]] - apply diff: ["map",[["2 gills","1 chopin"]]] -+apply diff in place: ["map",[["2 gills","1 chopin"]]] - OK - diff: ["map",[["2 gills","1 chopin"]]] - apply diff: ["map",[]] -+apply diff in place: ["map",[]] - OK - diff: ["map",[["2 pints","1 quart"]]] - apply diff: ["map",[["2 pints","1 quart"]]] -+apply diff in place: ["map",[["2 pints","1 quart"]]] - OK - diff: ["map",[["2 gills","1 gallon"]]] - apply diff: ["map",[["2 gills","1 gallon"]]] -+apply diff in place: ["map",[["2 gills","1 gallon"]]] - OK]]) - - OVSDB_CHECK_NEGATIVE([generate and apply diff with map -- size error], -diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at -index 1386f13770..91d34d0de6 100644 ---- a/tests/ovsdb-idl.at -+++ b/tests/ovsdb-idl.at -@@ -225,7 +225,7 @@ m4_define([OVSDB_CHECK_IDL_TCP6_MULTIPLE_REMOTES_PY], - m4_define([OVSDB_CHECK_IDL_SSL_PY], - [AT_SETUP([$1 - Python3 - SSL]) - AT_SKIP_IF([test "$HAVE_OPENSSL" = no]) -- $PYTHON3 -c "import OpenSSL.SSL" -+ $PYTHON3 -c "import ssl" - SSL_PRESENT=$? - AT_SKIP_IF([test $SSL_PRESENT != 0]) - AT_KEYWORDS([ovsdb server idl positive Python with ssl socket $5]) -@@ -2309,7 +2309,7 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect], - 'condition simple [["i","==",2]]' \ - 'condition simple [["i","==",1]]' \ - '+reconnect' \ -- '["idltest", -+ '?["idltest", - {"op": "update", - "table": "simple", - "where": [["i", "==", 1]], -@@ -2320,7 +2320,7 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect], - 003: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> - 004: change conditions - 005: reconnect --006: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> -+006: table simple - 007: {"error":null,"result":[{"count":1}]} - 008: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> - 009: done -diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at -index ac243d6a79..e672c13b27 100644 ---- a/tests/ovsdb-server.at -+++ b/tests/ovsdb-server.at -@@ -4,7 +4,7 @@ m4_define([OVSDB_SERVER_SHUTDOWN], - [OVS_APP_EXIT_AND_WAIT_BY_TARGET([ovsdb-server], [ovsdb-server.pid])]) - - m4_define([OVSDB_SERVER_SHUTDOWN_N], -- [cp pid$1 savepid$1 -+ [cp $1.pid savepid$1 - AT_CHECK([ovs-appctl -t "`pwd`"/unixctl$1 -e exit], [0], [ignore], [ignore]) - OVS_WAIT_WHILE([kill -0 `cat savepid$1`], [kill `cat savepid$1`])]) - -@@ -30,14 +30,13 @@ m4_define([OVSDB_CHECK_EXECUTION], - AT_KEYWORDS([ovsdb server positive unix $5]) - $2 > schema - AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) - m4_foreach([txn], [$3], -- [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore]) - cat stdout >> output - ]) -- AT_CHECK([uuidfilt output], [0], [$4], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ AT_CHECK([uuidfilt output], [0], [$4], [ignore]) - OVSDB_SERVER_SHUTDOWN - AT_CLEANUP]) - -@@ -88,8 +87,7 @@ AT_CHECK([uuidfilt output], [0], - [[[{"uuid":["uuid","<0>"]}] - [{"uuid":["uuid","<1>"]}] - [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] --]], [], -- [test ! -e pid || kill `cat pid`]) -+]], []) - AT_CLEANUP - - AT_SETUP([truncating database log with bad transaction]) -@@ -136,8 +134,7 @@ AT_CHECK([uuidfilt output], [0], - [[[{"uuid":["uuid","<0>"]}] - [{"uuid":["uuid","<1>"]}] - [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] --]], [], -- [test ! -e pid || kill `cat pid`]) -+]], []) - AT_CLEANUP - - dnl CHECK_DBS([databases]) -@@ -159,6 +156,7 @@ ordinal_schema > schema1 - constraint_schema > schema2 - AT_CHECK([ovsdb-tool create db1 schema1], [0], [ignore], [ignore]) - AT_CHECK([ovsdb-tool create db2 schema2], [0], [ignore], [ignore]) -+on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:db.sock db1 db2], [0], [ignore], [ignore]) - CHECK_DBS([constraints - ordinals -@@ -166,7 +164,7 @@ ordinals - AT_CHECK( - [[ovstest test-jsonrpc request unix:db.sock get_schema [\"nonexistent\"]]], [0], - [[{"error":{"details":"get_schema request specifies unknown database nonexistent","error":"unknown database","syntax":"[\"nonexistent\"]"},"id":0,"result":null} --]], [], [test ! -e pid || kill `cat pid`]) -+]], []) - OVSDB_SERVER_SHUTDOWN - AT_CLEANUP - -@@ -393,7 +391,7 @@ AT_CHECK( - "table": "Manager", - "uuid-name": "x", - "row": {"target": "punix:socket2"}}]']], [0], [ignore], [ignore]) --on_exit 'kill `cat ovsdb-server.pid`' -+on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore]) - ovs-appctl -t ovsdb-server time/warp 6000 1000 - AT_CHECK( -@@ -686,6 +684,7 @@ ovsdb_check_online_compaction() { - ovsdb-tool create-cluster db schema unix:s1.raft - fi]) - dnl Start ovsdb-server. -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server -vvlog:off -vconsole:off --detach --no-chdir --pidfile --remote=punix:socket --log-file db], [0]) - AT_CHECK([ovsdb_client_wait unix:socket ordinals connected]) - AT_CAPTURE_FILE([ovsdb-server.log]) -@@ -832,7 +831,7 @@ _uuid name number - <0> five 5 - <1> four 4 - <2> three 3 --], [], [test ! -e pid || kill `cat pid`]) -+], []) - OVSDB_SERVER_SHUTDOWN - } - OVS_END_SHELL_HELPERS -@@ -1228,6 +1227,71 @@ AT_CHECK([test $logged_updates -lt $logged_nonblock_updates]) - AT_CHECK_UNQUOTED([ovs-vsctl get open_vswitch . system_version], [0], - [xyzzy$counter - ]) -+OVS_APP_EXIT_AND_WAIT([ovsdb-server]) -+AT_CLEANUP -+ -+AT_SETUP([ovsdb-server transaction history size]) -+on_exit 'kill `cat *.pid`' -+ -+dnl Start an ovsdb-server with the clustered vswitchd schema. -+AT_CHECK([ovsdb-tool create-cluster db dnl -+ $abs_top_srcdir/vswitchd/vswitch.ovsschema unix:s1.raft], -+ [0], [ignore], [ignore]) -+AT_CHECK([ovsdb-server --detach --no-chdir --pidfile dnl -+ --log-file --remote=punix:db.sock db], -+ [0], [ignore], [ignore]) -+AT_CHECK([ovs-vsctl --no-wait init]) -+ -+dnl Create a bridge with N ports per transaction. Increase N every 4 -+dnl iterations. And then remove the bridges. By increasing the size of -+dnl transactions, ensuring that they take up a significant percentage of -+dnl the total database size, so the transaction history will not be able -+dnl to hold all of them. -+dnl -+dnl The test verifies that the number of atoms in the transaction history -+dnl is always less than the number of atoms in the database, except for -+dnl a case where there is only one transaction in a history. -+get_memory_value () { -+ n=$(ovs-appctl -t ovsdb-server memory/show dnl -+ | tr ' ' '\n' | grep "^$1:" | cut -d ':' -f 2) -+ if test X"$n" == "X"; then -+ n=0 -+ fi -+ echo $n -+} -+ -+check_atoms () { -+ if test $(get_memory_value txn-history) -eq 1; then return; fi -+ n_db_atoms=$(get_memory_value atoms) -+ n_txn_history_atoms=$(get_memory_value txn-history-atoms) -+ echo "n_db_atoms: $n_db_atoms" -+ echo "n_txn_history_atoms: $n_txn_history_atoms" -+ AT_CHECK([test $n_txn_history_atoms -le $n_db_atoms]) -+} -+ -+add_ports () { -+ for j in $(seq 1 $2); do -+ printf " -- add-port br$1 p$1-%d" $j -+ done -+} -+ -+initial_db_atoms=$(get_memory_value atoms) -+ -+for i in $(seq 1 100); do -+ cmd=$(add_ports $i $(($i / 4 + 1))) -+ AT_CHECK([ovs-vsctl --no-wait add-br br$i $cmd]) -+ check_atoms -+done -+ -+for i in $(seq 1 100); do -+ AT_CHECK([ovs-vsctl --no-wait del-br br$i]) -+ check_atoms -+done -+ -+dnl After removing all the bridges, the number of atoms in the database -+dnl should return to its initial value. -+AT_CHECK([test $(get_memory_value atoms) -eq $initial_db_atoms]) -+ - OVS_APP_EXIT_AND_WAIT([ovsdb-server]) - AT_CLEANUP - -@@ -1254,15 +1318,14 @@ m4_define([OVSDB_CHECK_EXECUTION], - $2 > schema - PKIDIR=$abs_top_builddir/tests - AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:127.0.0.1 db], [0], [ignore], [ignore]) - PARSE_LISTENING_PORT([ovsdb-server.log], [SSL_PORT]) - m4_foreach([txn], [$3], -- [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore]) - cat stdout >> output - ]) -- AT_CHECK([uuidfilt output], [0], [$4], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ AT_CHECK([uuidfilt output], [0], [$4], [ignore]) - OVSDB_SERVER_SHUTDOWN - AT_CLEANUP]) - -@@ -1291,16 +1354,15 @@ m4_define([OVSDB_CHECK_EXECUTION], - AT_SKIP_IF([test $HAVE_IPV6 = no]) - $2 > schema - PKIDIR=$abs_top_builddir/tests -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) - AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:[[::1]] db], [0], [ignore], [ignore]) - PARSE_LISTENING_PORT([ovsdb-server.log], [SSL_PORT]) - m4_foreach([txn], [$3], -- [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:[[::1]]:$SSL_PORT 'txn'], [0], [stdout], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:[[::1]]:$SSL_PORT 'txn'], [0], [stdout], [ignore]) - cat stdout >> output - ]) -- AT_CHECK([uuidfilt output], [0], [$4], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ AT_CHECK([uuidfilt output], [0], [$4], [ignore]) - OVSDB_SERVER_SHUTDOWN - AT_CLEANUP]) - -@@ -1327,16 +1389,15 @@ m4_define([OVSDB_CHECK_EXECUTION], - AT_KEYWORDS([ovsdb server positive tcp $5]) - $2 > schema - PKIDIR=$abs_top_builddir/tests -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) - AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --remote=ptcp:0:127.0.0.1 db], [0], [ignore], [ignore]) - PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT]) - m4_foreach([txn], [$3], -- [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore]) - cat stdout >> output - ]) -- AT_CHECK([uuidfilt output], [0], [$4], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ AT_CHECK([uuidfilt output], [0], [$4], [ignore]) - OVSDB_SERVER_SHUTDOWN - AT_CLEANUP]) - -@@ -1364,16 +1425,15 @@ m4_define([OVSDB_CHECK_EXECUTION], - AT_SKIP_IF([test $HAVE_IPV6 = no]) - $2 > schema - PKIDIR=$abs_top_builddir/tests -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) - AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --remote=ptcp:0:[[::1]] db], [0], [ignore], [ignore]) - PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT]) - m4_foreach([txn], [$3], -- [AT_CHECK([ovsdb-client transact tcp:[[::1]]:$TCP_PORT 'txn'], [0], [stdout], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ [AT_CHECK([ovsdb-client transact tcp:[[::1]]:$TCP_PORT 'txn'], [0], [stdout], [ignore]) - cat stdout >> output - ]) -- AT_CHECK([uuidfilt output], [0], [$4], [ignore], -- [test ! -e pid || kill `cat pid`]) -+ AT_CHECK([uuidfilt output], [0], [$4], [ignore]) - OVSDB_SERVER_SHUTDOWN - AT_CLEANUP]) - -@@ -1453,9 +1513,9 @@ m4_define([OVSDB_CHECK_EXECUTION], - target=4 - $2 > schema - schema_name=`ovsdb-tool schema-name schema` -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) - -- on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log dnl - --pidfile --remote=punix:db1.sock db1 - ], [0], [ignore], [ignore]) -@@ -1511,12 +1571,11 @@ m4_define([OVSDB_CHECK_EXECUTION], - AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) - AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) - -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) - i -- on_exit 'test ! -e pid || kill `cat pid`' - -- AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -- on_exit 'test ! -e pid2 || kill `cat pid2`' -+ AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) - - m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client transact 'txn'], [0], [stdout], [ignore]) -@@ -1557,11 +1616,10 @@ m4_define([OVSDB_CHECK_REPLICATION], - AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) - AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) - -+ on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) -- on_exit 'test ! -e pid || kill `cat pid`' - -- AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock --sync-exclude-tables=mydb:b db2], [0], [ignore], [ignore]) -- on_exit 'test ! -e pid2 || kill `cat pid2`' -+ AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock --sync-exclude-tables=mydb:b db2], [0], [ignore], [ignore]) - - m4_foreach([txn], [$3], - [AT_CHECK([ ovsdb-client transact 'txn' ], [0], [stdout], [ignore]) -@@ -1629,6 +1687,7 @@ AT_CLEANUP - - #ovsdb-server/set-sync-exclude-tables command - AT_SETUP([ovsdb-server/set-sync-exclude-tables]) -+on_exit 'kill `cat *.pid`' - AT_KEYWORDS([ovsdb server replication set-exclude-tables]) - AT_SKIP_IF([test $DIFF_SUPPORTS_NORMAL_FORMAT = no]) - -@@ -1637,12 +1696,10 @@ AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) - AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) - - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) --on_exit 'test ! -e pid || kill `cat pid`' - --AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) --on_exit 'test ! -e pid2 || kill `cat pid2`' -+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) - --AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-sync-exclude-tables mydb:b], [0], [ignore], [ignore], [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) -+AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-sync-exclude-tables mydb:b], [0], [ignore], [ignore]) - - AT_CHECK([ovsdb-client transact unix:db.sock \ - '[["mydb", -@@ -1651,11 +1708,9 @@ AT_CHECK([ovsdb-client transact unix:db.sock \ - "row": {"number": 0, "name": "zero"}}, - {"op": "insert", - "table": "b", -- "row": {"number": 1, "name": "one"}}]]'], [0], [stdout], [ignore], -- [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) -+ "row": {"number": 1, "name": "one"}}]]'], [0], [stdout], [ignore]) - --AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout], [ignore], -- [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) -+AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout], [ignore]) - cat stdout > dump1 - OVS_WAIT_UNTIL([ ovsdb-client dump unix:db2.sock | grep zero ]) - AT_CHECK([ovsdb-client dump unix:db2.sock], [0], [stdout], [ignore]) -@@ -1679,16 +1734,15 @@ AT_CLEANUP - - #ovsdb-server/connect-active-ovsdb-server - AT_SETUP([ovsdb-server/connect-active-server]) -+on_exit 'kill `cat *.pid`' - AT_KEYWORDS([ovsdb server replication connect-active-server]) - replication_schema > schema - AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) - AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) - - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) --on_exit 'test ! -e pid || kill `cat pid`' - --AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 db2], [0], [ignore], [ignore]) --on_exit 'test ! -e pid2 || kill `cat pid2`' -+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 db2], [0], [ignore], [ignore]) - - dnl Try to connect without specifying the active server. - AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/connect-active-ovsdb-server], [0], -@@ -1718,6 +1772,7 @@ AT_CLEANUP - - #ovsdb-server/disconnect-active-server command - AT_SETUP([ovsdb-server/disconnect-active-server]) -+on_exit 'kill `cat *.pid`' - AT_KEYWORDS([ovsdb server replication disconnect-active-server]) - AT_SKIP_IF([test $DIFF_SUPPORTS_NORMAL_FORMAT = no]) - -@@ -1726,10 +1781,8 @@ AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) - AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) - - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) --on_exit 'test ! -e pid || kill `cat pid`' - --AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) --on_exit 'test ! -e pid2 || kill `cat pid2`' -+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) - - AT_CHECK([ovsdb-client transact unix:db.sock \ - '[["mydb", -@@ -1775,7 +1828,7 @@ AT_CHECK([uuidfilt output], [0], [7,9c7,8 - --- - > _uuid name number - > ----- ---- ------ --], [ignore], [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) -+], [ignore]) - - dnl The backup server now become active, and can accept write transactions. - AT_CHECK([ovsdb-client transact unix:db2.sock \ -@@ -1826,13 +1879,12 @@ dnl Start both 'db1' and 'db2' in backup mode. Let them backup from each - dnl other. This is not an supported operation state, but to simulate a start - dnl up condition where an HA manger can select which one to be an active - dnl server soon after. --AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --sync-from=unix:db2.sock --active ], [0], [ignore], [ignore]) --on_exit 'test ! -e pid || kill `cat pid`' -+on_exit 'kill `cat *.pid`' -+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --sync-from=unix:db2.sock --active ], [0], [ignore], [ignore]) - - AT_CHECK([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/connect-active-ovsdb-server]) - --AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) --on_exit 'test ! -e pid2 || kill `cat pid2`' -+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) - - dnl - dnl make sure both servers reached the replication state -@@ -1900,8 +1952,8 @@ AT_CHECK([ovsdb-tool transact db \ - "row": {"number": 9, "name": "nine"}}]]'], [0], [ignore], [ignore]) - - dnl Start 'db', then try to be a back up server of itself. --AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db --sync-from=unix:db.sock --active ], [0], [ignore], [ignore]) --on_exit 'test ! -e pid || kill `cat pid`' -+on_exit 'kill `cat *.pid`' -+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db --sync-from=unix:db.sock --active ], [0], [ignore], [ignore]) - - dnl Save the current content - AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout]) -@@ -1919,6 +1971,7 @@ AT_CHECK([diff dump1 dump2]) - AT_CLEANUP - - AT_SETUP([ovsdb-server/read-only db:ptcp connection]) -+on_exit 'kill `cat *.pid`' - AT_KEYWORDS([ovsdb server read-only]) - AT_DATA([schema], - [[{"name": "mydb", -@@ -2007,12 +2060,10 @@ AT_CHECK([ovsdb-tool transact db2 \ - "row": {"number": 10, "name": "ten"}}]]'], [0], [ignore], [ignore]) - - dnl Start both 'db1' and 'db2'. --AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore]) --on_exit 'test ! -e pid || kill `cat pid`' -- -+on_exit 'kill `cat *.pid`' -+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore]) - --AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore]) --on_exit 'test ! -e pid2 || kill `cat pid2`' -+AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore]) - - OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/sync-status |grep active]) - OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/sync-status |grep active]) -@@ -2112,7 +2163,7 @@ dnl Starting a dummy server only to reserve some tcp port. - AT_CHECK([cp db db.tmp]) - AT_CHECK([ovsdb-server -vfile -vvlog:off --log-file=listener.log dnl - --detach --no-chdir dnl -- --pidfile=pid2 --unixctl=unixctl2 dnl -+ --pidfile=2.pid --unixctl=unixctl2 dnl - --remote=ptcp:0:127.0.0.1 dnl - db.tmp], [0], [stdout], [stderr]) - PARSE_LISTENING_PORT([listener.log], [BAD_TCP_PORT]) -diff --git a/tests/pmd.at b/tests/pmd.at -index 225d4ee3a4..a7cbf9a81b 100644 ---- a/tests/pmd.at -+++ b/tests/pmd.at -@@ -199,7 +199,7 @@ pmd thread numa_id core_id : - OVS_VSWITCHD_STOP - AT_CLEANUP - --AT_SETUP([PMD - pmd-cpu-mask - NUMA]) -+AT_SETUP([PMD - pmd-cpu-mask - dual NUMA]) - OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy-pmd options:n_rxq=8 options:numa_id=1 -- set Open_vSwitch . other_config:pmd-cpu-mask=1], - [], [], [--dummy-numa 1,1,0,0]) - -@@ -359,6 +359,44 @@ pmd thread numa_id 1 core_id 0: - OVS_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([PMD - pmd-cpu-mask - multi NUMA]) -+OVS_VSWITCHD_START([add-port br0 p0 \ -+ -- set Interface p0 type=dummy-pmd options:n_rxq=4 \ -+ -- set Interface p0 options:numa_id=0 \ -+ -- set Open_vSwitch . other_config:pmd-cpu-mask=0xf \ -+ -- set open_vswitch . other_config:pmd-rxq-assign=cycles], -+ [], [], [--dummy-numa 1,2,1,2]) -+ -+TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) -+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=group]) -+ -+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using group algorithm"]) -+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) -+ -+# check all pmds from both non-local numas are assigned an rxq -+AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) -+ -+TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) -+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=cycles]) -+ -+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using cycles algorithm"]) -+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) -+ -+# check all pmds from both non-local numas are assigned an rxq -+AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) -+ -+TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) -+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=roundrobin]) -+ -+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using roundrobin algorithm"]) -+OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) -+ -+# check all pmds from both non-local numas are assigned an rxq -+AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) -+ -+OVS_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_SETUP([PMD - stats]) - OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 ofport_request=7 type=dummy-pmd options:n_rxq=4], - [], [], [DUMMY_NUMA]) -diff --git a/tests/reconnect.at b/tests/reconnect.at -index 0f74709f5a..5bca84351c 100644 ---- a/tests/reconnect.at -+++ b/tests/reconnect.at -@@ -39,8 +39,19 @@ run - connected - - # Try timeout without noting that we tried to receive. --# (This does nothing since we never timeout in this case.) -+# Timeout should be scheduled to the next probe interval. - timeout -+run -+ -+# Once we reached the timeout, it should not expire until the receive actually -+# attempted. However, we still need to wake up as soon as possible in order to -+# have a chance to mark the receive attempt even if nothing was received. -+timeout -+run -+ -+# Short time advance past the original probe interval, but not expired still. -+timeout -+run - - # Now disable the receive-attempted feature and timeout again. - receive-attempted LLONG_MAX -@@ -67,18 +78,37 @@ connected - last connected 0 ms ago, connected 0 ms total - - # Try timeout without noting that we tried to receive. --# (This does nothing since we never timeout in this case.) --timeout -- no timeout -- --# Now disable the receive-attempted feature and timeout again. --receive-attempted LLONG_MAX -+# Timeout should be scheduled to the next probe interval. - timeout - advance 5000 ms - - ### t=6000 ### - in ACTIVE for 5000 ms (0 ms backoff) - run -+ -+# Once we reached the timeout, it should not expire until the receive actually -+# attempted. However, we still need to wake up as soon as possible in order to -+# have a chance to mark the receive attempt even if nothing was received. -+timeout -+ advance 1 ms -+ -+### t=6001 ### -+ in ACTIVE for 5001 ms (0 ms backoff) -+run -+ -+# Short time advance past the original probe interval, but not expired still. -+timeout -+ advance 1 ms -+ -+### t=6002 ### -+ in ACTIVE for 5002 ms (0 ms backoff) -+run -+ -+# Now disable the receive-attempted feature and timeout again. -+receive-attempted LLONG_MAX -+timeout -+ advance 0 ms -+run - should send probe - in IDLE for 0 ms (0 ms backoff) - -@@ -86,7 +116,7 @@ run - timeout - advance 5000 ms - --### t=11000 ### -+### t=11002 ### - in IDLE for 5000 ms (0 ms backoff) - run - should disconnect -@@ -94,7 +124,7 @@ disconnected - in BACKOFF for 0 ms (1000 ms backoff) - 1 successful connections out of 1 attempts, seqno 2 - disconnected -- disconnected at 11000 ms (0 ms ago) -+ disconnected at 11002 ms (0 ms ago) - ]) - - ###################################################################### -@@ -111,8 +141,19 @@ run - connected - - # Try timeout without noting that we tried to receive. --# (This does nothing since we never timeout in this case.) -+# Timeout should be scheduled to the next probe interval. -+timeout -+run -+ -+# Once we reached the timeout, it should not expire until the receive actually -+# attempted. However, we still need to wake up as soon as possible in order to -+# have a chance to mark the receive attempt even if nothing was received. -+timeout -+run -+ -+# Short time advance past the original probe interval, but not expired still. - timeout -+run - - # Now disable the receive-attempted feature and timeout again. - receive-attempted LLONG_MAX -@@ -148,18 +189,37 @@ connected - last connected 0 ms ago, connected 0 ms total - - # Try timeout without noting that we tried to receive. --# (This does nothing since we never timeout in this case.) --timeout -- no timeout -- --# Now disable the receive-attempted feature and timeout again. --receive-attempted LLONG_MAX -+# Timeout should be scheduled to the next probe interval. - timeout - advance 5000 ms - - ### t=6500 ### - in ACTIVE for 5000 ms (0 ms backoff) - run -+ -+# Once we reached the timeout, it should not expire until the receive actually -+# attempted. However, we still need to wake up as soon as possible in order to -+# have a chance to mark the receive attempt even if nothing was received. -+timeout -+ advance 1 ms -+ -+### t=6501 ### -+ in ACTIVE for 5001 ms (0 ms backoff) -+run -+ -+# Short time advance past the original probe interval, but not expired still. -+timeout -+ advance 1 ms -+ -+### t=6502 ### -+ in ACTIVE for 5002 ms (0 ms backoff) -+run -+ -+# Now disable the receive-attempted feature and timeout again. -+receive-attempted LLONG_MAX -+timeout -+ advance 0 ms -+run - should send probe - in IDLE for 0 ms (0 ms backoff) - -@@ -167,7 +227,7 @@ run - timeout - advance 5000 ms - --### t=11500 ### -+### t=11502 ### - in IDLE for 5000 ms (0 ms backoff) - run - should disconnect -@@ -175,7 +235,7 @@ disconnected - in BACKOFF for 0 ms (1000 ms backoff) - 1 successful connections out of 1 attempts, seqno 2 - disconnected -- disconnected at 11500 ms (0 ms ago) -+ disconnected at 11502 ms (0 ms ago) - ]) - - ###################################################################### -@@ -1271,14 +1331,14 @@ activity - created 1000, last activity 3000, last connected 2000 - - # Connection times out. --timeout -- no timeout --receive-attempted LLONG_MAX - timeout - advance 5000 ms - - ### t=8000 ### - in ACTIVE for 6000 ms (1000 ms backoff) -+receive-attempted LLONG_MAX -+timeout -+ advance 0 ms - run - should send probe - in IDLE for 0 ms (1000 ms backoff) -diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at -index 19a0b125b9..8b9f5c7525 100644 ---- a/tests/system-common-macros.at -+++ b/tests/system-common-macros.at -@@ -281,6 +281,14 @@ m4_define([OVS_START_L7], - # - m4_define([OFPROTO_CLEAR_DURATION_IDLE], [[sed -e 's/duration=.*s,/duration=,/g' -e 's/idle_age=[0-9]*,/idle_age=,/g']]) - -+# OVS_CHECK_TUNNEL_TSO() -+# -+# Macro to be used in general tunneling tests that could be also -+# used by system-tso. In that case, tunneling is not supported and -+# the test should be skipped. -+m4_define([OVS_CHECK_TUNNEL_TSO], -+ [m4_ifdef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)])]) -+ - # OVS_CHECK_VXLAN() - # - # Do basic check for vxlan functionality, skip the test if it's not there. -diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at -index e0e750fde5..512aa87d4c 100644 ---- a/tests/system-dpdk.at -+++ b/tests/system-dpdk.at -@@ -248,6 +248,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout]) - AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl - ]) - -+AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl -+DPIF implementation set to dpif_avx512. -+]) -+ - AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl - Miniflow extract implementation set to autovalidator. - ]) -@@ -275,6 +279,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout]) - AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl - ]) - -+AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl -+DPIF implementation set to dpif_avx512. -+]) -+ - AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl - Miniflow extract implementation set to autovalidator. - ]) -diff --git a/tests/system-route.at b/tests/system-route.at -index 1714273e35..270956d13f 100644 ---- a/tests/system-route.at -+++ b/tests/system-route.at -@@ -14,10 +14,9 @@ dnl Add ip address. - AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout]) - - dnl Check that OVS catches route updates. --OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [0], [dnl --Cached: 10.0.0.17/24 dev p1-route SRC 10.0.0.17 --Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local --]) -+OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [dnl -+Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17 -+Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local]) - - dnl Delete ip address. - AT_CHECK([ip addr del 10.0.0.17/24 dev p1-route], [0], [stdout]) -diff --git a/tests/system-traffic.at b/tests/system-traffic.at -index f400cfabc9..4c368eded4 100644 ---- a/tests/system-traffic.at -+++ b/tests/system-traffic.at -@@ -218,6 +218,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over vxlan tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_VXLAN() - - OVS_TRAFFIC_VSWITCHD_START() -@@ -258,7 +259,55 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI - OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([datapath - ping vlan over vxlan tunnel]) -+OVS_CHECK_TUNNEL_TSO() -+OVS_CHECK_VXLAN() -+ -+OVS_TRAFFIC_VSWITCHD_START() -+ADD_BR([br-underlay]) -+ -+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) -+AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) -+ -+ADD_NAMESPACES(at_ns0) -+ -+dnl Set up underlay link from host into the namespace using veth pair. -+ADD_VETH(p0, at_ns0, br-underlay, "172.31.2.1/24") -+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) -+AT_CHECK([ip link set dev br-underlay up]) -+ -+dnl Set up tunnel endpoints on OVS outside the namespace and with a native -+dnl linux device inside the namespace. -+ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24]) -+ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.2.1.1/24], -+ [id 0 dstport 4789]) -+ -+AT_CHECK([ovs-vsctl set port br0 tag=100]) -+AT_CHECK([ovs-vsctl set port br-underlay tag=42]) -+ -+ADD_VLAN(at_vxlan1, at_ns0, 100, "10.1.1.1/24") -+ADD_VLAN(p0, at_ns0, 42, "172.31.1.1/24") -+ -+dnl First, check the underlay -+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl -+3 packets transmitted, 3 received, 0% packet loss, time 0ms -+]) -+dnl Okay, now check the overlay with different packet sizes -+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl -+3 packets transmitted, 3 received, 0% packet loss, time 0ms -+]) -+NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl -+3 packets transmitted, 3 received, 0% packet loss, time 0ms -+]) -+NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl -+3 packets transmitted, 3 received, 0% packet loss, time 0ms -+]) -+ -+OVS_TRAFFIC_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_SETUP([datapath - ping over vxlan6 tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_VXLAN_UDP6ZEROCSUM() - - OVS_TRAFFIC_VSWITCHD_START() -@@ -302,6 +351,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over gre tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) - OVS_CHECK_GRE() - -@@ -343,6 +393,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over ip6gre L2 tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) - OVS_CHECK_GRE() - OVS_CHECK_ERSPAN() -@@ -383,6 +434,7 @@ AT_CLEANUP - - - AT_SETUP([datapath - ping over erspan v1 tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) - OVS_CHECK_GRE() - OVS_CHECK_ERSPAN() -@@ -419,6 +471,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over erspan v2 tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) - OVS_CHECK_GRE() - OVS_CHECK_ERSPAN() -@@ -455,6 +508,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over ip6erspan v1 tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) - OVS_CHECK_GRE() - OVS_CHECK_ERSPAN() -@@ -494,6 +548,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over ip6erspan v2 tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) - OVS_CHECK_GRE() - OVS_CHECK_ERSPAN() -@@ -534,6 +589,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over geneve tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_GENEVE() - - OVS_TRAFFIC_VSWITCHD_START() -@@ -575,6 +631,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over geneve tunnel, delete flow regression]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_GENEVE() - - OVS_TRAFFIC_VSWITCHD_START() -@@ -629,6 +686,7 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d - AT_CLEANUP - - AT_SETUP([datapath - flow resume with geneve tun_metadata]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_GENEVE() - - OVS_TRAFFIC_VSWITCHD_START() -@@ -680,6 +738,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over geneve6 tunnel]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_GENEVE_UDP6ZEROCSUM() - - OVS_TRAFFIC_VSWITCHD_START() -@@ -723,6 +782,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over gre tunnel by simulated packets]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_MIN_KERNEL(3, 10) - - OVS_TRAFFIC_VSWITCHD_START() -@@ -769,6 +829,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over erspan v1 tunnel by simulated packets]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_MIN_KERNEL(3, 10) - - OVS_TRAFFIC_VSWITCHD_START() -@@ -817,6 +878,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over erspan v2 tunnel by simulated packets]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_MIN_KERNEL(3, 10) - - OVS_TRAFFIC_VSWITCHD_START() -@@ -870,6 +932,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over ip6erspan v1 tunnel by simulated packets]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_MIN_KERNEL(3, 10) - - OVS_TRAFFIC_VSWITCHD_START() -@@ -925,6 +988,7 @@ OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - - AT_SETUP([datapath - ping over ip6erspan v2 tunnel by simulated packets]) -+OVS_CHECK_TUNNEL_TSO() - OVS_CHECK_MIN_KERNEL(3, 10) - - OVS_TRAFFIC_VSWITCHD_START() -@@ -1981,6 +2045,111 @@ tcp,orig=(src=10.1.1.3,dst=10.1.1.4,sport=,dport=),reply=(src= - OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([conntrack - zones from other field]) -+CHECK_CONNTRACK() -+OVS_TRAFFIC_VSWITCHD_START() -+ -+ADD_NAMESPACES(at_ns0, at_ns1) -+ -+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") -+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") -+ -+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. -+AT_DATA([flows.txt], [dnl -+priority=1,action=drop -+priority=10,arp,action=normal -+priority=10,icmp,action=normal -+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0) -+priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_ZONE[]),2 -+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=5) -+priority=100,in_port=2,ct_state=+trk,ct_zone=5,tcp,action=1 -+]) -+ -+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) -+ -+OVS_START_L7([at_ns1], [http]) -+ -+dnl HTTP requests from p0->p1 should work fine. -+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) -+ -+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl -+tcp,dnl -+orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),dnl -+reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=),dnl -+zone=5,protoinfo=(state=) -+]) -+ -+dnl This is to test when the zoneid is set by a field variable like -+dnl NXM_NX_CT_ZONE, the OVS xlate should generate a megaflow with a form of -+dnl "ct_zone(5), ... actions: ct(commit, zone=5)". The match "ct_zone(5)" -+dnl is needed as if we changes the zoneid into 15 in the following, the old -+dnl "ct_zone(5), ... actions: ct(commit, zone=5)" megaflow will not get hit, -+dnl and OVS will generate a new megaflow with the match "ct_zone(0xf)". -+dnl This will make sure that the new packets are committing to zoneid 15 -+dnl rather than old 5. -+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl -+ | grep "+trk" | grep -q "ct_zone(0x5)" ], [0], []) -+ -+AT_CHECK([ovs-ofctl mod-flows br0 dnl -+ 'priority=100,ct_state=-trk,tcp,in_port="ovs-p0" actions=ct(table=0,zone=15)']) -+ -+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) -+ -+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl -+ | grep "+trk" | grep -q "ct_zone(0xf)" ], [0], []) -+ -+OVS_TRAFFIC_VSWITCHD_STOP -+AT_CLEANUP -+ -+AT_SETUP([conntrack - zones from other field, more tests]) -+CHECK_CONNTRACK() -+OVS_TRAFFIC_VSWITCHD_START() -+ -+ADD_NAMESPACES(at_ns0, at_ns1) -+ -+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") -+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") -+ -+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. -+AT_DATA([flows.txt], [dnl -+priority=1,action=drop -+priority=10,arp,action=normal -+priority=10,icmp,action=normal -+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0,commit,exec(load:0xffff0005->NXM_NX_CT_LABEL[[0..31]])) -+priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_LABEL[[0..15]]),2 -+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=5) -+priority=100,in_port=2,ct_state=+trk,ct_zone=5,tcp,action=1 -+]) -+ -+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) -+ -+OVS_START_L7([at_ns1], [http]) -+ -+dnl HTTP requests from p0->p1 should work fine. -+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) -+ -+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl -+tcp,dnl -+orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),dnl -+reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=),dnl -+zone=5,labels=0xffff0005,protoinfo=(state=) -+]) -+ -+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl -+ | grep "+trk" | sed 's/0xffff0005\/0xffff/0x5\/0xffff/' dnl -+ | grep -q "ct_label(0x5/0xffff)" ], [0], []) -+ -+AT_CHECK([ovs-ofctl mod-flows br0 'priority=100,ct_state=-trk,tcp,in_port="ovs-p0" actions=ct(table=0,zone=15,commit,exec(load:0xffff000f->NXM_NX_CT_LABEL[[0..31]]))']) -+ -+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) -+ -+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl -+ | grep "+trk" | sed 's/0xffff000f\/0xffff/0xf\/0xffff/' dnl -+ | grep -q "ct_label(0xf/0xffff)" ], [0], []) -+ -+OVS_TRAFFIC_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_SETUP([conntrack - multiple bridges]) - CHECK_CONNTRACK() - OVS_TRAFFIC_VSWITCHD_START( -@@ -3305,6 +3474,46 @@ NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING - OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([conntrack - IPv4 Fragmentation + NAT]) -+AT_SKIP_IF([test $HAVE_TCPDUMP = no]) -+CHECK_CONNTRACK() -+ -+OVS_TRAFFIC_VSWITCHD_START( -+ [set-fail-mode br0 secure -- ]) -+ -+ADD_NAMESPACES(at_ns0, at_ns1) -+ -+ADD_VETH(p0, at_ns0, br0, "10.2.1.1/24") -+ADD_VETH(p1, at_ns1, br0, "10.2.1.2/24") -+ -+dnl Create a dummy route for NAT -+NS_CHECK_EXEC([at_ns1], [ip addr add 10.1.1.2/32 dev lo]) -+NS_CHECK_EXEC([at_ns0], [ip route add 10.1.1.0/24 via 10.2.1.2]) -+NS_CHECK_EXEC([at_ns1], [ip route add 10.1.1.0/24 via 10.2.1.1]) -+ -+dnl Solely for debugging when things go wrong -+NS_EXEC([at_ns0], [tcpdump -l -n -xx -U -i p0 -w p0.pcap >tcpdump.out 2>/dev/null &]) -+NS_EXEC([at_ns1], [tcpdump -l -n -xx -U -i p1 -w p1.pcap >tcpdump.out 2>/dev/null &]) -+ -+AT_DATA([flows.txt], [dnl -+table=0,arp,actions=normal -+table=0,ct_state=-trk,ip,in_port=ovs-p0, actions=ct(table=1, nat) -+table=0,ct_state=-trk,ip,in_port=ovs-p1, actions=ct(table=1, nat) -+table=1,ct_state=+trk+new,ip,in_port=ovs-p0, actions=ct(commit, nat(src=10.1.1.1)),ovs-p1 -+table=1,ct_state=+trk+est,ip,in_port=ovs-p0, actions=ovs-p1 -+table=1,ct_state=+trk+est,ip,in_port=ovs-p1, actions=ovs-p0 -+]) -+ -+AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -+ -+dnl Check connectivity -+NS_CHECK_EXEC([at_ns0], [ping -c 1 10.1.1.2 -M dont -s 4500 | FORMAT_PING], [0], [dnl -+1 packets transmitted, 1 received, 0% packet loss, time 0ms -+]) -+ -+OVS_TRAFFIC_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_SETUP([conntrack - resubmit to ct multiple times]) - CHECK_CONNTRACK() - -@@ -3464,15 +3673,15 @@ action=normal - - AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) - --AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88deb13891389080803136161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl -+AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88deb13891389080803136161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl - "16161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161"dnl - "61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl - "1616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161, actions=ct(table=1)"]) - --AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88deb6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl -+AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88deb6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl - "161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161, actions=ct(table=1)"]) - --AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033d923861616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl -+AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033d923861616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl - "1616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161610a, actions=ct(table=1)"]) - - AT_CHECK([ovs-appctl dpctl/dump-flows | head -2 | tail -1 | grep -q -e ["]udp[(]src=5001["]]) -@@ -5817,7 +6026,7 @@ on_exit 'ovs-appctl revalidator/purge' - on_exit 'ovs-appctl dpif/dump-flows br0' - - dnl Should work with the virtual IP address through NAT --for i in 1 2 3 4 5 6 7 8 9 10 11 12; do -+for i in $(seq 1 50); do - echo Request $i - NS_CHECK_EXEC([at_ns1], [wget 10.1.1.64 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) - done -@@ -6106,6 +6315,132 @@ AT_CHECK([ovs-ofctl dump-flows br0 | grep table=2, | OFPROTO_CLEAR_DURATION_IDLE - OVS_TRAFFIC_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([conntrack - can match and clear ct_state from outside OVS]) -+CHECK_CONNTRACK_LOCAL_STACK() -+OVS_CHECK_TUNNEL_TSO() -+OVS_CHECK_GENEVE() -+ -+OVS_TRAFFIC_VSWITCHD_START() -+ADD_BR([br-underlay], [set bridge br-underlay other-config:hwaddr=\"f0:00:00:01:01:02\"]) -+ -+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) -+AT_CHECK([ovs-ofctl add-flow br-underlay "priority=100,ct_state=+trk,actions=ct_clear,resubmit(,0)"]) -+AT_CHECK([ovs-ofctl add-flow br-underlay "priority=10,actions=normal"]) -+ -+ADD_NAMESPACES(at_ns0) -+ -+dnl Set up underlay link from host into the namespace using veth pair. -+ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24", "f0:00:00:01:01:01") -+AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) -+AT_CHECK([ip link set dev br-underlay up]) -+ -+dnl Set up tunnel endpoints on OVS outside the namespace and with a native -+dnl linux device inside the namespace. -+ADD_OVS_TUNNEL([geneve], [br0], [at_gnv0], [172.31.1.1], [10.1.1.100/24]) -+ADD_NATIVE_TUNNEL([geneve], [ns_gnv0], [at_ns0], [172.31.1.100], [10.1.1.1/24], -+ [vni 0]) -+ -+dnl First, check the underlay -+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl -+3 packets transmitted, 3 received, 0% packet loss, time 0ms -+]) -+ -+dnl Okay, now check the overlay -+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl -+3 packets transmitted, 3 received, 0% packet loss, time 0ms -+]) -+ -+dnl Confirm that the ct_state and ct_clear action found its way to the dp -+AT_CHECK([ovs-appctl dpctl/dump-flows --names | grep ct_clear | sort | dnl -+ grep 'eth(src=f0:00:00:01:01:02,dst=f0:00:00:01:01:01)' | dnl -+ strip_stats | strip_used | dnl -+ sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], -+ [0], [dnl -+recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f0:00:00:01:01:01),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:0.0s, actions:ct_clear,ovs-p0 -+]) -+ -+OVS_TRAFFIC_VSWITCHD_STOP -+AT_CLEANUP -+ -+AT_BANNER([IGMP]) -+ -+AT_SETUP([IGMP - flood under normal action]) -+ -+OVS_TRAFFIC_VSWITCHD_START() -+ADD_NAMESPACES(at_ns0, at_ns1) -+ -+ADD_VETH(p1, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01") -+ADD_VETH(p2, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02") -+ -+AT_CHECK([ovs-ofctl add-flow br0 "actions=NORMAL"]) -+ -+NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 01 00 5e 01 01 03 dnl -+f0 00 00 01 01 01 08 00 46 c0 00 28 00 00 40 00 01 02 d3 49 45 65 eb 4a e0 dnl -+00 00 16 94 04 00 00 22 00 f9 02 00 00 00 01 04 00 00 00 e0 00 00 fb 00 00 dnl -+00 00 00 00 > /dev/null]) -+ -+AT_CHECK([ovs-appctl dpctl/dump-flows --names | grep -e .*ipv4 | sort | dnl -+ strip_stats | strip_used | strip_recirc | dnl -+ sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], -+ [0], [dnl -+recirc_id(),in_port(ovs-p1),eth(src=f0:00:00:01:01:01,dst=01:00:5e:01:01:03),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:br0,ovs-p2 -+]) -+OVS_TRAFFIC_VSWITCHD_STOP -+AT_CLEANUP -+ -+AT_SETUP([IGMP - forward with ICMP]) -+ -+OVS_TRAFFIC_VSWITCHD_START() -+ADD_NAMESPACES(at_ns0, at_ns1) -+ -+ADD_VETH(p1, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01") -+ADD_VETH(p2, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02") -+ -+AT_DATA([flows.txt], [dnl -+table=0, arp actions=NORMAL -+table=0, ip,in_port=1 actions=ct(table=1,zone=64000) -+table=0, in_port=2 actions=output:1 -+table=1, ip,ct_state=+trk+inv actions=drop -+table=1 ip,in_port=1,icmp,ct_state=+trk+new actions=output:2 -+table=1, in_port=1,ip,ct_state=+trk+new actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) -+table=1, in_port=1,ip,ct_state=+trk+est actions=output:2 -+]) -+AT_CHECK([ovs-ofctl del-flows br0]) -+AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) -+ -+dnl Send the IGMP, followed by a unicast ICMP - ensure we won't black hole -+ -+NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 f0 00 00 01 01 02 dnl -+f0 00 00 01 01 01 08 00 46 c0 00 28 00 00 40 00 01 02 d3 49 45 65 eb 4a e0 dnl -+00 00 16 94 04 00 00 22 00 f9 02 00 00 00 01 04 00 00 00 e0 00 00 fb 00 00 dnl -+00 00 00 00 > /dev/null]) -+ -+NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 f0 00 00 01 01 02 dnl -+f0 00 00 01 01 01 08 00 45 00 00 1c 00 01 00 00 40 01 64 dc 0a 01 01 01 0a dnl -+01 01 02 08 00 f7 ff ff ff ff ff > /dev/null]) -+ -+sleep 1 -+ -+dnl Prefer the OpenFlow rules, because different datapaths will behave slightly -+dnl differently with respect to the exact dp rules. -+dnl -+dnl This is also why we clear n_bytes / n_packets - some kernels with ipv6 -+dnl enabled will bump some of these counters non-deterministically -+ -+AT_CHECK([ovs-ofctl dump-flows br0 | grep -v NXST | dnl -+ strip_duration | grep -v arp | grep -v n_packets=0 | dnl -+ grep -v 'in_port=2 actions=output:1' | dnl -+ sed 's/n_bytes=[[0-9]]*/n_bytes=0/ -+ s/idle_age=[[0-9]]*/idle_age=0/ -+ s/n_packets=[[1-9]]/n_packets=0/' | sort], [0], [dnl -+ cookie=0x0, table=0, n_packets=0, n_bytes=0, idle_age=0, ip,in_port=1 actions=ct(table=1,zone=64000) -+ cookie=0x0, table=1, n_packets=0, n_bytes=0, idle_age=0, ct_state=+new+trk,icmp,in_port=1 actions=output:2 -+ cookie=0x0, table=1, n_packets=0, n_bytes=0, idle_age=0, ct_state=+new+trk,ip,in_port=1 actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) -+]) -+ -+OVS_TRAFFIC_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_BANNER([802.1ad]) - - AT_SETUP([802.1ad - vlan_limit]) -diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at -index 406334f3e0..1a80047619 100644 ---- a/tests/system-tso-macros.at -+++ b/tests/system-tso-macros.at -@@ -29,3 +29,5 @@ m4_define([CONFIGURE_VETH_OFFLOADS], - [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])] - [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])] - ) -+ -+m4_define([CHECK_SYSTEM_TSO], []) -diff --git a/tests/test-cmap.c b/tests/test-cmap.c -index 0705475606..f8cc4dd80a 100644 ---- a/tests/test-cmap.c -+++ b/tests/test-cmap.c -@@ -74,6 +74,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, - cmap_values[i++] = e->value; - } - assert(i == n); -+ assert(e == NULL); - - /* Here we test iteration with cmap_next_position() */ - i = 0; -@@ -107,6 +108,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, - count += e->value == values[i]; - } - assert(count == 1); -+ assert(e == NULL); - } - - /* Check that all the values are there in batched lookup. */ -@@ -130,6 +132,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, - CMAP_NODE_FOR_EACH (e, node, nodes[k]) { - count += e->value == values[i + k]; - } -+ assert(e == NULL); - } - assert(count == j); /* j elements in a batch. */ - } -diff --git a/tests/test-hindex.c b/tests/test-hindex.c -index af06be5fcc..95e49284ee 100644 ---- a/tests/test-hindex.c -+++ b/tests/test-hindex.c -@@ -265,6 +265,11 @@ test_hindex_for_each_safe(hash_func *hash) - i = 0; - n_remaining = n; - HINDEX_FOR_EACH_SAFE (e, next, node, &hindex) { -+ if (hindex_next(&hindex, &e->node) == NULL) { -+ assert(next == NULL); -+ } else { -+ assert(&next->node == hindex_next(&hindex, &e->node)); -+ } - assert(i < n); - if (pattern & (1ul << e->value)) { - size_t j; -@@ -281,6 +286,7 @@ test_hindex_for_each_safe(hash_func *hash) - i++; - } - assert(i == n); -+ assert(next == NULL); - - for (i = 0; i < n; i++) { - if (pattern & (1ul << i)) { -diff --git a/tests/test-hmap.c b/tests/test-hmap.c -index 9259b0b3fc..47b4755386 100644 ---- a/tests/test-hmap.c -+++ b/tests/test-hmap.c -@@ -62,6 +62,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n, - hmap_values[i++] = e->value; - } - assert(i == n); -+ assert(e == NULL); - - memcpy(sort_values, values, sizeof *sort_values * n); - qsort(sort_values, n, sizeof *sort_values, compare_ints); -@@ -82,6 +83,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n, - count += e->value == values[i]; - } - assert(count == 1); -+ assert(e == NULL); - } - - /* Check counters. */ -@@ -243,6 +245,11 @@ test_hmap_for_each_safe(hash_func *hash) - i = 0; - n_remaining = n; - HMAP_FOR_EACH_SAFE (e, next, node, &hmap) { -+ if (hmap_next(&hmap, &e->node) == NULL) { -+ assert(next == NULL); -+ } else { -+ assert(&next->node == hmap_next(&hmap, &e->node)); -+ } - assert(i < n); - if (pattern & (1ul << e->value)) { - size_t j; -@@ -259,6 +266,8 @@ test_hmap_for_each_safe(hash_func *hash) - i++; - } - assert(i == n); -+ assert(next == NULL); -+ assert(e == NULL); - - for (i = 0; i < n; i++) { - if (pattern & (1ul << i)) { -@@ -308,6 +317,7 @@ test_hmap_for_each_pop(hash_func *hash) - i++; - } - assert(i == n); -+ assert(e == NULL); - - hmap_destroy(&hmap); - } -diff --git a/tests/test-json.c b/tests/test-json.c -index a7ee595e0b..072a537252 100644 ---- a/tests/test-json.c -+++ b/tests/test-json.c -@@ -22,6 +22,8 @@ - #include - #include - #include "ovstest.h" -+#include "random.h" -+#include "timeval.h" - #include "util.h" - - /* --pretty: If set, the JSON output is pretty-printed, instead of printed as -@@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[]) - } - - OVSTEST_REGISTER("test-json", test_json_main); -+ -+static void -+json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) -+{ -+ struct { -+ int n; -+ int quote_probablility; -+ int special_probability; -+ int iter; -+ } configs[] = { -+ { 100000, 0, 0, 1000, }, -+ { 100000, 2, 1, 1000, }, -+ { 100000, 10, 1, 1000, }, -+ { 10000000, 0, 0, 100, }, -+ { 10000000, 2, 1, 100, }, -+ { 10000000, 10, 1, 100, }, -+ { 100000000, 0, 0, 10. }, -+ { 100000000, 2, 1, 10, }, -+ { 100000000, 10, 1, 10, }, -+ }; -+ -+ printf(" SIZE Q S TIME\n"); -+ printf("--------------------------------------\n"); -+ -+ for (int i = 0; i < ARRAY_SIZE(configs); i++) { -+ int iter = configs[i].iter; -+ int n = configs[i].n; -+ char *str = xzalloc(n); -+ -+ for (int j = 0; j < n - 1; j++) { -+ int r = random_range(100); -+ -+ if (r < configs[i].special_probability) { -+ str[j] = random_range(' ' - 1) + 1; -+ } else if (r < (configs[i].special_probability -+ + configs[i].quote_probablility)) { -+ str[j] = '"'; -+ } else { -+ str[j] = random_range(256 - ' ') + ' '; -+ } -+ } -+ -+ printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility, -+ configs[i].special_probability); -+ fflush(stdout); -+ -+ struct json *json = json_string_create_nocopy(str); -+ uint64_t start = time_msec(); -+ -+ char **res = xzalloc(iter * sizeof *res); -+ for (int j = 0; j < iter; j++) { -+ res[j] = json_to_string(json, 0); -+ } -+ -+ printf("%16.3lf ms\n", (double) (time_msec() - start) / iter); -+ json_destroy(json); -+ for (int j = 0; j < iter; j++) { -+ free(res[j]); -+ } -+ free(res); -+ } -+ -+ exit(0); -+} -+ -+OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main); -diff --git a/tests/test-list.c b/tests/test-list.c -index 6f1fb059bc..648e02a5e2 100644 ---- a/tests/test-list.c -+++ b/tests/test-list.c -@@ -61,7 +61,7 @@ check_list(struct ovs_list *list, const int values[], size_t n) - assert(e->value == values[i]); - i++; - } -- assert(&e->node == list); -+ assert(e == NULL); - assert(i == n); - - i = 0; -@@ -70,7 +70,7 @@ check_list(struct ovs_list *list, const int values[], size_t n) - assert(e->value == values[n - i - 1]); - i++; - } -- assert(&e->node == list); -+ assert(e == NULL); - assert(i == n); - - assert(ovs_list_is_empty(list) == !n); -@@ -135,6 +135,13 @@ test_list_for_each_safe(void) - values_idx = 0; - n_remaining = n; - LIST_FOR_EACH_SAFE (e, next, node, &list) { -+ /* "next" is valid as long as it's not pointing to &list. */ -+ if (&e->node == list.prev) { -+ assert(next == NULL); -+ } else { -+ assert(&next->node == e->node.next); -+ } -+ - assert(i < n); - if (pattern & (1ul << i)) { - ovs_list_remove(&e->node); -@@ -148,7 +155,8 @@ test_list_for_each_safe(void) - i++; - } - assert(i == n); -- assert(&e->node == &list); -+ assert(e == NULL); -+ assert(next == NULL); - - for (i = 0; i < n; i++) { - if (pattern & (1ul << i)) { -diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c -index daa55dab7b..57572cd3ed 100644 ---- a/tests/test-ovsdb.c -+++ b/tests/test-ovsdb.c -@@ -512,6 +512,18 @@ do_diff_data(struct ovs_cmdl_context *ctx) - ovs_fatal(0, "failed to apply diff"); - } - -+ /* Apply diff to 'old' in place. */ -+ error = ovsdb_datum_apply_diff_in_place(&old, &diff, &type); -+ if (error) { -+ char *string = ovsdb_error_to_string_free(error); -+ ovs_fatal(0, "%s", string); -+ } -+ -+ /* Test to make sure 'old' equals 'new' now. */ -+ if (!ovsdb_datum_equals(&new, &old, &type)) { -+ ovs_fatal(0, "failed to apply diff in place"); -+ } -+ - /* Print diff */ - json = ovsdb_datum_to_json(&diff, &type); - printf ("diff: "); -@@ -522,6 +534,11 @@ do_diff_data(struct ovs_cmdl_context *ctx) - printf ("apply diff: "); - print_and_free_json(json); - -+ /* Print updated 'old' */ -+ json = ovsdb_datum_to_json(&old, &type); -+ printf ("apply diff in place: "); -+ print_and_free_json(json); -+ - ovsdb_datum_destroy(&new, &type); - ovsdb_datum_destroy(&old, &type); - ovsdb_datum_destroy(&diff, &type); -@@ -1862,7 +1879,8 @@ print_and_log(const char *format, ...) - } - - static char * --format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents) -+format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents, -+ bool terse) - { - const char *change_str = - !ovsdb_idl_track_is_set(row->table) -@@ -1873,9 +1891,13 @@ format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents) - ? "deleted row: " - : ""; - -- return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT, -- step, row->table->class_->name, change_str, contents, -- UUID_ARGS(&row->uuid)); -+ if (terse) { -+ return xasprintf("%03d: table %s", step, row->table->class_->name); -+ } else { -+ return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT, -+ step, row->table->class_->name, change_str, -+ contents, UUID_ARGS(&row->uuid)); -+ } - } - - static void -@@ -1998,7 +2020,7 @@ print_idl_row_updated_singleton(const struct idltest_singleton *sng, int step) - } - - static void --print_idl_row_simple(const struct idltest_simple *s, int step) -+print_idl_row_simple(const struct idltest_simple *s, int step, bool terse) - { - struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "i=%"PRId64" r=%g b=%s s=%s u="UUID_FMT" ia=[", -@@ -2025,7 +2047,7 @@ print_idl_row_simple(const struct idltest_simple *s, int step) - } - ds_put_cstr(&msg, "]"); - -- char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg)); -+ char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg), terse); - print_and_log("%s", row_msg); - ds_destroy(&msg); - free(row_msg); -@@ -2034,7 +2056,7 @@ print_idl_row_simple(const struct idltest_simple *s, int step) - } - - static void --print_idl_row_link1(const struct idltest_link1 *l1, int step) -+print_idl_row_link1(const struct idltest_link1 *l1, int step, bool terse) - { - struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "i=%"PRId64" k=", l1->i); -@@ -2053,7 +2075,7 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step) - ds_put_format(&msg, "%"PRId64, l1->l2->i); - } - -- char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg)); -+ char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg), terse); - print_and_log("%s", row_msg); - ds_destroy(&msg); - free(row_msg); -@@ -2062,7 +2084,7 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step) - } - - static void --print_idl_row_link2(const struct idltest_link2 *l2, int step) -+print_idl_row_link2(const struct idltest_link2 *l2, int step, bool terse) - { - struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "i=%"PRId64" l1=", l2->i); -@@ -2070,7 +2092,7 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step) - ds_put_format(&msg, "%"PRId64, l2->l1->i); - } - -- char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg)); -+ char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg), terse); - print_and_log("%s", row_msg); - ds_destroy(&msg); - free(row_msg); -@@ -2079,7 +2101,7 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step) - } - - static void --print_idl_row_simple3(const struct idltest_simple3 *s3, int step) -+print_idl_row_simple3(const struct idltest_simple3 *s3, int step, bool terse) - { - struct ds msg = DS_EMPTY_INITIALIZER; - size_t i; -@@ -2098,7 +2120,7 @@ print_idl_row_simple3(const struct idltest_simple3 *s3, int step) - } - ds_put_cstr(&msg, "]"); - -- char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg)); -+ char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg), terse); - print_and_log("%s", row_msg); - ds_destroy(&msg); - free(row_msg); -@@ -2107,12 +2129,12 @@ print_idl_row_simple3(const struct idltest_simple3 *s3, int step) - } - - static void --print_idl_row_simple4(const struct idltest_simple4 *s4, int step) -+print_idl_row_simple4(const struct idltest_simple4 *s4, int step, bool terse) - { - struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "name=%s", s4->name); - -- char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg)); -+ char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg), terse); - print_and_log("%s", row_msg); - ds_destroy(&msg); - free(row_msg); -@@ -2121,7 +2143,7 @@ print_idl_row_simple4(const struct idltest_simple4 *s4, int step) - } - - static void --print_idl_row_simple6(const struct idltest_simple6 *s6, int step) -+print_idl_row_simple6(const struct idltest_simple6 *s6, int step, bool terse) - { - struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "name=%s ", s6->name); -@@ -2132,7 +2154,7 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step) - } - ds_put_cstr(&msg, "]"); - -- char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg)); -+ char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg), terse); - print_and_log("%s", row_msg); - ds_destroy(&msg); - free(row_msg); -@@ -2141,12 +2163,13 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step) - } - - static void --print_idl_row_singleton(const struct idltest_singleton *sng, int step) -+print_idl_row_singleton(const struct idltest_singleton *sng, int step, -+ bool terse) - { - struct ds msg = DS_EMPTY_INITIALIZER; - ds_put_format(&msg, "name=%s", sng->name); - -- char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg)); -+ char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg), terse); - print_and_log("%s", row_msg); - ds_destroy(&msg); - free(row_msg); -@@ -2155,7 +2178,7 @@ print_idl_row_singleton(const struct idltest_singleton *sng, int step) - } - - static void --print_idl(struct ovsdb_idl *idl, int step) -+print_idl(struct ovsdb_idl *idl, int step, bool terse) - { - const struct idltest_simple3 *s3; - const struct idltest_simple4 *s4; -@@ -2167,31 +2190,31 @@ print_idl(struct ovsdb_idl *idl, int step) - int n = 0; - - IDLTEST_SIMPLE_FOR_EACH (s, idl) { -- print_idl_row_simple(s, step); -+ print_idl_row_simple(s, step, terse); - n++; - } - IDLTEST_LINK1_FOR_EACH (l1, idl) { -- print_idl_row_link1(l1, step); -+ print_idl_row_link1(l1, step, terse); - n++; - } - IDLTEST_LINK2_FOR_EACH (l2, idl) { -- print_idl_row_link2(l2, step); -+ print_idl_row_link2(l2, step, terse); - n++; - } - IDLTEST_SIMPLE3_FOR_EACH (s3, idl) { -- print_idl_row_simple3(s3, step); -+ print_idl_row_simple3(s3, step, terse); - n++; - } - IDLTEST_SIMPLE4_FOR_EACH (s4, idl) { -- print_idl_row_simple4(s4, step); -+ print_idl_row_simple4(s4, step, terse); - n++; - } - IDLTEST_SIMPLE6_FOR_EACH (s6, idl) { -- print_idl_row_simple6(s6, step); -+ print_idl_row_simple6(s6, step, terse); - n++; - } - IDLTEST_SINGLETON_FOR_EACH (sng, idl) { -- print_idl_row_singleton(sng, step); -+ print_idl_row_singleton(sng, step, terse); - n++; - } - if (!n) { -@@ -2200,7 +2223,7 @@ print_idl(struct ovsdb_idl *idl, int step) - } - - static void --print_idl_track(struct ovsdb_idl *idl, int step) -+print_idl_track(struct ovsdb_idl *idl, int step, bool terse) - { - const struct idltest_simple3 *s3; - const struct idltest_simple4 *s4; -@@ -2211,27 +2234,27 @@ print_idl_track(struct ovsdb_idl *idl, int step) - int n = 0; - - IDLTEST_SIMPLE_FOR_EACH_TRACKED (s, idl) { -- print_idl_row_simple(s, step); -+ print_idl_row_simple(s, step, terse); - n++; - } - IDLTEST_LINK1_FOR_EACH_TRACKED (l1, idl) { -- print_idl_row_link1(l1, step); -+ print_idl_row_link1(l1, step, terse); - n++; - } - IDLTEST_LINK2_FOR_EACH_TRACKED (l2, idl) { -- print_idl_row_link2(l2, step); -+ print_idl_row_link2(l2, step, terse); - n++; - } - IDLTEST_SIMPLE3_FOR_EACH_TRACKED (s3, idl) { -- print_idl_row_simple3(s3, step); -+ print_idl_row_simple3(s3, step, terse); - n++; - } - IDLTEST_SIMPLE4_FOR_EACH_TRACKED (s4, idl) { -- print_idl_row_simple4(s4, step); -+ print_idl_row_simple4(s4, step, terse); - n++; - } - IDLTEST_SIMPLE6_FOR_EACH_TRACKED (s6, idl) { -- print_idl_row_simple6(s6, step); -+ print_idl_row_simple6(s6, step, terse); - n++; - } - -@@ -2634,6 +2657,13 @@ do_idl(struct ovs_cmdl_context *ctx) - char *arg = ctx->argv[i]; - struct jsonrpc_msg *request, *reply; - -+ bool terse = false; -+ if (*arg == '?') { -+ /* We're only interested in terse table contents. */ -+ terse = true; -+ arg++; -+ } -+ - if (*arg == '+') { - /* The previous transaction didn't change anything. */ - arg++; -@@ -2654,10 +2684,10 @@ do_idl(struct ovs_cmdl_context *ctx) - - /* Print update. */ - if (track) { -- print_idl_track(idl, step++); -+ print_idl_track(idl, step++, terse); - ovsdb_idl_track_clear(idl); - } else { -- print_idl(idl, step++); -+ print_idl(idl, step++, terse); - } - } - seqno = ovsdb_idl_get_seqno(idl); -@@ -2710,7 +2740,7 @@ do_idl(struct ovs_cmdl_context *ctx) - ovsdb_idl_wait(idl); - poll_block(); - } -- print_idl(idl, step++); -+ print_idl(idl, step++, false); - ovsdb_idl_track_clear(idl); - ovsdb_idl_destroy(idl); - print_and_log("%03d: done", step); -@@ -2727,13 +2757,15 @@ print_idl_row_simple2(const struct idltest_simple2 *s, int step) - printf("%03d: name=%s smap=[", - step, s->name); - for (i = 0; i < smap->n; i++) { -- printf("[%s : %s]%s", smap->keys[i].string, smap->values[i].string, -- i < smap->n-1? ",": ""); -+ printf("[%s : %s]%s", -+ smap->keys[i].s->string, smap->values[i].s->string, -+ i < smap->n - 1 ? "," : ""); - } - printf("] imap=["); - for (i = 0; i < imap->n; i++) { -- printf("[%"PRId64" : %s]%s", imap->keys[i].integer, imap->values[i].string, -- i < imap->n-1? ",":""); -+ printf("[%"PRId64" : %s]%s", -+ imap->keys[i].integer, imap->values[i].s->string, -+ i < imap->n - 1 ? "," : ""); - } - printf("]\n"); - } -@@ -2802,8 +2834,8 @@ do_idl_partial_update_map_column(struct ovs_cmdl_context *ctx) - myTxn = ovsdb_idl_txn_create(idl); - smap = idltest_simple2_get_smap(myRow, OVSDB_TYPE_STRING, - OVSDB_TYPE_STRING); -- strcpy(key_to_delete, smap->keys[0].string); -- idltest_simple2_update_smap_delkey(myRow, smap->keys[0].string); -+ ovs_strlcpy(key_to_delete, smap->keys[0].s->string, sizeof key_to_delete); -+ idltest_simple2_update_smap_delkey(myRow, smap->keys[0].s->string); - ovsdb_idl_txn_commit_block(myTxn); - ovsdb_idl_txn_destroy(myTxn); - ovsdb_idl_get_initial_snapshot(idl); -@@ -2829,7 +2861,7 @@ dump_simple3(struct ovsdb_idl *idl, - int step) - { - IDLTEST_SIMPLE3_FOR_EACH(myRow, idl) { -- print_idl_row_simple3(myRow, step); -+ print_idl_row_simple3(myRow, step, false); - } - } - -@@ -2971,7 +3003,7 @@ do_idl_compound_index_with_ref(struct ovs_cmdl_context *ctx) - idltest_simple3_index_set_uref(equal, &myRow2, 1); - printf("%03d: Query using index with reference\n", step++); - IDLTEST_SIMPLE3_FOR_EACH_EQUAL (myRow, equal, index) { -- print_idl_row_simple3(myRow, step++); -+ print_idl_row_simple3(myRow, step++, false); - } - idltest_simple3_index_destroy_row(equal); - -diff --git a/tests/test-ovsdb.py b/tests/test-ovsdb.py -index 5bc0bf6814..853264f22b 100644 ---- a/tests/test-ovsdb.py -+++ b/tests/test-ovsdb.py -@@ -232,75 +232,87 @@ def get_singleton_table_printable_row(row): - return "name=%s" % row.name - - --def print_row(table, row, step, contents): -- s = "%03d: table %s: %s " % (step, table, contents) -- s += get_simple_printable_row_string(row, ["uuid"]) -+def print_row(table, row, step, contents, terse): -+ if terse: -+ s = "%03d: table %s" % (step, table) -+ else: -+ s = "%03d: table %s: %s " % (step, table, contents) -+ s += get_simple_printable_row_string(row, ["uuid"]) - print(s) - - --def print_idl(idl, step): -+def print_idl(idl, step, terse=False): - n = 0 - if "simple" in idl.tables: - simple = idl.tables["simple"].rows - for row in simple.values(): - print_row("simple", row, step, -- get_simple_table_printable_row(row)) -+ get_simple_table_printable_row(row), -+ terse) - n += 1 - - if "simple2" in idl.tables: - simple2 = idl.tables["simple2"].rows - for row in simple2.values(): - print_row("simple2", row, step, -- get_simple2_table_printable_row(row)) -+ get_simple2_table_printable_row(row), -+ terse) - n += 1 - - if "simple3" in idl.tables: - simple3 = idl.tables["simple3"].rows - for row in simple3.values(): - print_row("simple3", row, step, -- get_simple3_table_printable_row(row)) -+ get_simple3_table_printable_row(row), -+ terse) - n += 1 - - if "simple4" in idl.tables: - simple4 = idl.tables["simple4"].rows - for row in simple4.values(): - print_row("simple4", row, step, -- get_simple4_table_printable_row(row)) -+ get_simple4_table_printable_row(row), -+ terse) - n += 1 - - if "simple5" in idl.tables: - simple5 = idl.tables["simple5"].rows - for row in simple5.values(): - print_row("simple5", row, step, -- get_simple5_table_printable_row(row)) -+ get_simple5_table_printable_row(row), -+ terse) - n += 1 - - if "simple6" in idl.tables: - simple6 = idl.tables["simple6"].rows - for row in simple6.values(): - print_row("simple6", row, step, -- get_simple6_table_printable_row(row)) -+ get_simple6_table_printable_row(row), -+ terse) - n += 1 - - if "link1" in idl.tables: - l1 = idl.tables["link1"].rows - for row in l1.values(): - print_row("link1", row, step, -- get_link1_table_printable_row(row)) -+ get_link1_table_printable_row(row), -+ terse) - n += 1 - - if "link2" in idl.tables: - l2 = idl.tables["link2"].rows - for row in l2.values(): - print_row("link2", row, step, -- get_link2_table_printable_row(row)) -+ get_link2_table_printable_row(row), -+ terse) - n += 1 - - if "singleton" in idl.tables: - sng = idl.tables["singleton"].rows - for row in sng.values(): - print_row("singleton", row, step, -- get_singleton_table_printable_row(row)) -+ get_singleton_table_printable_row(row), -+ terse) - n += 1 - - if not n: -@@ -701,6 +713,12 @@ def do_idl(schema_file, remote, *commands): - step += 1 - - for command in commands: -+ terse = False -+ if command.startswith("?"): -+ # We're only interested in terse table contents. -+ terse = True -+ command = command[1:] -+ - if command.startswith("+"): - # The previous transaction didn't change anything. - command = command[1:] -@@ -714,7 +732,7 @@ def do_idl(schema_file, remote, *commands): - rpc.wait(poller) - poller.block() - -- print_idl(idl, step) -+ print_idl(idl, step, terse) - step += 1 - - seqno = idl.change_seqno -diff --git a/tests/test-rcu.c b/tests/test-rcu.c -index 965f3c49f3..bb17092bf0 100644 ---- a/tests/test-rcu.c -+++ b/tests/test-rcu.c -@@ -35,7 +35,7 @@ quiescer_main(void *aux OVS_UNUSED) - } - - static void --test_rcu_quiesce(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) -+test_rcu_quiesce(void) - { - pthread_t quiescer; - -@@ -48,4 +48,29 @@ test_rcu_quiesce(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) - xpthread_join(quiescer, NULL); - } - --OVSTEST_REGISTER("test-rcu-quiesce", test_rcu_quiesce); -+static void -+add_count(void *_count) -+{ -+ unsigned *count = (unsigned *)_count; -+ (*count) ++; -+} -+ -+static void -+test_rcu_barrier(void) -+{ -+ unsigned count = 0; -+ for (int i = 0; i < 10; i ++) { -+ ovsrcu_postpone(add_count, &count); -+ } -+ -+ ovsrcu_barrier(); -+ ovs_assert(count == 10); -+} -+ -+static void -+test_rcu(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { -+ test_rcu_quiesce(); -+ test_rcu_barrier(); -+} -+ -+OVSTEST_REGISTER("test-rcu", test_rcu); -diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at -index 59723e63b8..c7665a1aeb 100644 ---- a/tests/tunnel-push-pop-ipv6.at -+++ b/tests/tunnel-push-pop-ipv6.at -@@ -432,6 +432,42 @@ AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl - tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535)) - ]) - -+dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated -+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc1244cafe86dd60000000003a11402001cafe0000000000000000000000922001cafe000000000000000000000088c85312b5003abc700c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172']) -+ -+ovs-appctl time/warp 1000 -+ovs-appctl time/warp 1000 -+ -+dnl Check VXLAN tunnel push -+AT_CHECK([ovs-ofctl add-flow int-br action=2]) -+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) -+AT_CHECK([tail -1 stdout], [0], -+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:ca:fe,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1) -+]) -+ -+AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl -+2001:cafe::92 f8:bc:12:44:ca:fe br0 -+2001:cafe::93 f8:bc:12:44:34:b7 br0 -+]) -+ -+dnl Restore and check the cache entries -+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc124434b686dd60000000003a11402001cafe0000000000000000000000922001cafe000000000000000000000088c85312b5003abc700c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172']) -+ -+ovs-appctl time/warp 1000 -+ovs-appctl time/warp 1000 -+ -+dnl Check VXLAN tunnel push -+AT_CHECK([ovs-ofctl add-flow int-br action=2]) -+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) -+AT_CHECK([tail -1 stdout], [0], -+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1) -+]) -+ -+AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl -+2001:cafe::92 f8:bc:12:44:34:b6 br0 -+2001:cafe::93 f8:bc:12:44:34:b7 br0 -+]) -+ - ovs-appctl time/warp 10000 - - AT_CHECK([ovs-vsctl del-port int-br t3 \ -diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at -index 48c5de9d19..a441de3ef2 100644 ---- a/tests/tunnel-push-pop.at -+++ b/tests/tunnel-push-pop.at -@@ -499,6 +499,28 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port [[37]]' | sort], [0], [dnl - port 7: rx pkts=5, bytes=434, drop=?, errs=?, frame=?, over=?, crc=? - ]) - -+dnl Send out packets received from L3GRE tunnel back to L3GRE tunnel -+AT_CHECK([ovs-ofctl del-flows int-br]) -+AT_CHECK([ovs-ofctl add-flow int-br "in_port=7,actions=set_field:3->in_port,7"]) -+AT_CHECK([ovs-vsctl -- set Interface br0 options:pcap=br0.pcap]) -+ -+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) -+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) -+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) -+ -+ovs-appctl time/warp 1000 -+ -+AT_CHECK([ovs-pcap p0.pcap > p0.pcap.txt 2>&1]) -+AT_CHECK([tail -6 p0.pcap.txt], [0], [dnl -+aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 -+001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 -+aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 -+001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 -+aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 -+001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 -+]) -+ -+ - dnl Check decapsulation of Geneve packet with options - AT_CAPTURE_FILE([ofctl_monitor.log]) - AT_CHECK([ovs-ofctl monitor int-br 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) -@@ -518,8 +540,43 @@ icmp,vlan_tci=0x0000,dl_src=be:b6:f4:e1:49:4a,dl_dst=fe:71:d8:83:72:4f,nw_src=30 - AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], [0], [dnl - port 5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? - ]) --AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl --tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=2,rule_cookie=0,controller_id=0,max_len=65535)) -+AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)' | sed -e 's/recirc_id=[[0-9]]*/recirc_id=/g'], [0], [dnl -+tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=,rule_cookie=0,controller_id=0,max_len=65535)) -+]) -+ -+dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated -+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc1244cafe08004500004e00010000401173e90101025c01010258c85312b5003a8cd40c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172']) -+ -+ovs-appctl time/warp 1000 -+ovs-appctl time/warp 1000 -+ -+dnl Check VXLAN tunnel push -+AT_CHECK([ovs-ofctl add-flow int-br action=2]) -+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) -+AT_CHECK([tail -1 stdout], [0], -+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:ca:fe,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1) -+]) -+ -+AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl -+1.1.2.92 f8:bc:12:44:ca:fe br0 -+1.1.2.93 f8:bc:12:44:34:b7 br0 -+]) -+ -+dnl Restore and check the cache entries -+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc124434b608004500004e00010000401173e90101025c01010258c85312b5003a8cd40c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172']) -+ -+ovs-appctl time/warp 1000 -+ovs-appctl time/warp 1000 -+ -+dnl Check VXLAN tunnel push -+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) -+AT_CHECK([tail -1 stdout], [0], -+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1) -+]) -+ -+AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl -+1.1.2.92 f8:bc:12:44:34:b6 br0 -+1.1.2.93 f8:bc:12:44:34:b7 br0 - ]) - - ovs-appctl time/warp 10000 -@@ -595,6 +652,64 @@ OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep 50540000000a5054000000091235 | wc - OVS_VSWITCHD_STOP - AT_CLEANUP - -+AT_SETUP([tunnel_push_pop - packet_out debug_slow]) -+ -+OVS_VSWITCHD_START( -+ [add-port br0 p0 dnl -+ -- set Interface p0 type=dummy ofport_request=1 dnl -+ other-config:hwaddr=aa:55:aa:55:00:00]) -+AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg]) -+AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy]) -+AT_CHECK([ovs-vsctl add-port int-br t2 dnl -+ -- set Interface t2 type=geneve options:remote_ip=1.1.2.92 dnl -+ options:key=123 ofport_request=2]) -+ -+dnl First setup dummy interface IP address, then add the route -+dnl so that tnl-port table can get valid IP address for the device. -+AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK -+]) -+AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK -+]) -+AT_CHECK([ovs-ofctl add-flow br0 action=normal]) -+ -+dnl This ARP reply from p0 has two effects: -+dnl 1. The ARP cache will learn that 1.1.2.92 is at f8:bc:12:44:34:b6. -+dnl 2. The br0 mac learning will learn that f8:bc:12:44:34:b6 is on p0. -+AT_CHECK([ -+ ovs-appctl netdev-dummy/receive p0 dnl -+ 'recirc_id(0),in_port(2),dnl -+ eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl -+ arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)' -+]) -+ -+AT_CHECK([ovs-vsctl -- set Interface p0 options:tx_pcap=p0.pcap]) -+ -+packet=50540000000a505400000009123 -+dnl Source port is based on a packet hash, so it may differ depending on the -+dnl compiler flags and CPU type. Masked with '....'. -+encap=f8bc124434b6aa55aa5500000800450000320000400040113406010102580101025c....17c1001e00000000655800007b00 -+ -+dnl Output to tunnel from a int-br internal port. -+dnl Checking that the packet arrived and it was correctly encapsulated. -+AT_CHECK([ovs-ofctl add-flow int-br "in_port=LOCAL,actions=debug_slow,output:2"]) -+AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"]) -+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 1]) -+dnl Sending again to exercise the non-miss upcall path. -+AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"]) -+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 2]) -+ -+dnl Output to tunnel from the controller. -+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out int-br CONTROLLER "debug_slow,output:2" "${packet}5"]) -+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}5" | wc -l` -ge 1]) -+ -+dnl Datapath actions should not have tunnel push action. -+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q tnl_push], [1]) -+dnl There should be slow_path action instead. -+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q 'slow_path(action)'], [0]) -+ -+OVS_VSWITCHD_STOP -+AT_CLEANUP -+ - AT_SETUP([tunnel_push_pop - underlay bridge match]) - - OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00]) -@@ -645,3 +760,54 @@ NXST_FLOW reply: - - OVS_VSWITCHD_STOP - AT_CLEANUP -+ -+AT_SETUP([tunnel_push_pop - VXLAN access port]) -+ -+dnl Create bridge that has a MAC address. -+OVS_VSWITCHD_START([set bridge br0 datapath_type=dummy dnl -+ -- set Interface br0 other-config:hwaddr=aa:55:aa:55:00:00]) -+AT_CHECK([ovs-vsctl add-port br0 p8 dnl -+ -- set Interface p8 type=dummy ofport_request=8]) -+ -+dnl Create another bridge. -+AT_CHECK([ovs-vsctl add-br ovs-tun0 -- set bridge ovs-tun0 datapath_type=dummy]) -+ -+dnl Add VXLAN port to this bridge. -+AT_CHECK([ovs-vsctl add-port ovs-tun0 tun0 dnl -+ -- set int tun0 type=vxlan options:remote_ip=10.0.0.11 dnl -+ -- add-port ovs-tun0 p7 dnl -+ -- set interface p7 type=dummy ofport_request=7]) -+ -+dnl Set VLAN tags, so that br0 and its port p8 have the same tag, -+dnl but ovs-tun0's port p7 has a different tag. -+AT_CHECK([ovs-vsctl set port p8 tag=42 dnl -+ -- set port br0 tag=42 dnl -+ -- set port p7 tag=200]) -+ -+dnl Set IP address and route for br0. -+AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 10.0.0.2/24], [0], [OK -+]) -+AT_CHECK([ovs-appctl ovs/route/add 10.0.0.11/24 br0], [0], [OK -+]) -+ -+dnl Send an ARP reply to port b8 on br0, so that packets will be forwarded -+dnl to learned port. -+AT_CHECK([ovs-ofctl add-flow br0 action=normal]) -+ -+AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),dnl -+ eth(src=aa:55:aa:66:00:00,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl -+ arp(sip=10.0.0.11,tip=10.0.0.2,op=2,sha=aa:55:aa:66:00:00,tha=00:00:00:00:00:00)']) -+ -+AT_CHECK([ovs-appctl ofproto/trace ovs-tun0 in_port=p7], [0], [stdout]) -+AT_CHECK([tail -2 stdout], [0], [dnl -+Megaflow: recirc_id=0,eth,in_port=7,dl_src=00:00:00:00:00:00,dnl -+dl_dst=00:00:00:00:00:00,dl_type=0x0000 -+Datapath actions: push_vlan(vid=200,pcp=0),1,clone(tnl_push(tnl_port(4789),dnl -+header(size=50,type=4,eth(dst=aa:55:aa:66:00:00,src=aa:55:aa:55:00:00,dnl -+dl_type=0x0800),ipv4(src=10.0.0.2,dst=10.0.0.11,proto=17,tos=0,ttl=64,dnl -+frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x0)),dnl -+out_port(100)),8) -+]) -+ -+OVS_VSWITCHD_STOP -+AT_CLEANUP -diff --git a/tests/tunnel.at b/tests/tunnel.at -index b8ae7caa9b..fd482aa872 100644 ---- a/tests/tunnel.at -+++ b/tests/tunnel.at -@@ -126,7 +126,7 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl - AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"]) - - AT_CHECK([ovs-appctl dpctl/dump-flows | tail -1], [0], [dnl --tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 -+tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 - ]) - - OVS_VSWITCHD_STOP -diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in -index 71800795c0..e6e07f4763 100644 ---- a/utilities/ovs-ctl.in -+++ b/utilities/ovs-ctl.in -@@ -421,7 +421,9 @@ Less important options for "start", "restart" and "force-reload-kmod": - --no-force-corefiles do not force on core dumps for OVS daemons - --no-mlockall do not lock all of ovs-vswitchd into memory - --ovsdb-server-priority=NICE set ovsdb-server's niceness (default: $OVSDB_SERVER_PRIORITY) -+ --ovsdb-server-options=OPTIONS additional options for ovsdb-server (example: '-vconsole:dbg -vfile:dbg') - --ovs-vswitchd-priority=NICE set ovs-vswitchd's niceness (default: $OVS_VSWITCHD_PRIORITY) -+ --ovs-vswitchd-options=OPTIONS additional options for ovs-vswitchd (example: '-vconsole:dbg -vfile:dbg') - --no-full-hostname set short hostname instead of full hostname - --no-record-hostname do not attempt to determine/record system - hostname as part of start command -diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in -index 3eda01d3c1..13477a6a9e 100644 ---- a/utilities/ovs-lib.in -+++ b/utilities/ovs-lib.in -@@ -519,13 +519,13 @@ join_cluster() { - LOCAL_ADDR="$3" - REMOTE_ADDR="$4" - -- if test ! -e "$DB_FILE"; then -- ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" "$REMOTE_ADDR" -- elif ovsdb_tool db-is-standalone "$DB_FILE"; then -- # Backup standalone database and join cluster. -+ if test -e "$DB_FILE" && ovsdb_tool db-is-standalone "$DB_FILE"; then - backup_db || return 1 -+ rm $DB_FILE -+ fi -+ if test ! -e "$DB_FILE"; then - action "Joining $DB_FILE to cluster" \ -- ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" -+ ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" "$REMOTE_ADDR" - fi - } - -diff --git a/utilities/ovs-save b/utilities/ovs-save -index 27ce3a9aad..a190902f4d 100755 ---- a/utilities/ovs-save -+++ b/utilities/ovs-save -@@ -102,7 +102,7 @@ save_interfaces () { - get_highest_ofp_version() { - ovs-vsctl get bridge "$1" protocols | \ - sed 's/[][]//g' | sed 's/\ //g' | \ -- awk -F ',' '{ print (NF>1)? $(NF) : "OpenFlow14" }' -+ awk -F ',' '{ print (NF>0)? $(NF) : "OpenFlow14" }' - } - - save_flows () { -@@ -150,7 +150,10 @@ save_flows () { - ovs-ofctl -O $ofp_version dump-flows --no-names --no-stats "$bridge" | \ - sed -e '/NXST_FLOW/d' \ - -e '/OFPST_FLOW/d' \ -- -e 's/\(idle\|hard\)_age=[^,]*,//g' > \ -+ -e 's/\(idle\|hard\)_age=[^,]*,//g' \ -+ -e 's/igmp_type/tp_src/g' \ -+ -e 's/igmp_code/tp_dst/g' \ -+ -e 's/igmp/ip,nw_proto=2/g' > \ - "$workdir/$bridge.flows.dump" - done - echo "rm -rf \"$workdir\"" -diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c -index cb7c5cb769..c790a56adf 100644 ---- a/vswitchd/bridge.c -+++ b/vswitchd/bridge.c -@@ -4229,7 +4229,7 @@ bridge_configure_aa(struct bridge *br) - union ovsdb_atom atom; - - atom.integer = m->isid; -- if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER) == UINT_MAX) { -+ if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER, NULL)) { - VLOG_INFO("Deleting isid=%"PRIu32", vlan=%"PRIu16, - m->isid, m->vlan); - bridge_aa_mapping_destroy(m); -@@ -4826,7 +4826,7 @@ queue_ids_include(const struct ovsdb_datum *queues, int64_t target) - union ovsdb_atom atom; - - atom.integer = target; -- return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER) != UINT_MAX; -+ return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER, NULL); - } - - static void -@@ -5020,7 +5020,7 @@ bridge_configure_mirrors(struct bridge *br) - union ovsdb_atom atom; - - atom.uuid = m->uuid; -- if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID) == UINT_MAX) { -+ if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID, NULL)) { - mirror_destroy(m); - } - } diff --git a/SOURCES/openvswitch-2.17.0.patch b/SOURCES/openvswitch-2.17.0.patch new file mode 100644 index 0000000..c624213 --- /dev/null +++ b/SOURCES/openvswitch-2.17.0.patch @@ -0,0 +1,7288 @@ +diff --git a/.cirrus.yml b/.cirrus.yml +index a7ae793bc4..a4d2a5bbcd 100644 +--- a/.cirrus.yml ++++ b/.cirrus.yml +@@ -2,8 +2,8 @@ freebsd_build_task: + + freebsd_instance: + matrix: +- image_family: freebsd-12-2-snap +- image_family: freebsd-11-4-snap ++ image_family: freebsd-12-3-snap ++ image_family: freebsd-13-0-snap + cpu: 4 + memory: 4G + +diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst +index af524251ff..530d36e25a 100644 +--- a/Documentation/faq/releases.rst ++++ b/Documentation/faq/releases.rst +@@ -208,8 +208,8 @@ Q: What DPDK version does each Open vSwitch release work with? + 2.12.x 18.11.9 + 2.13.x 19.11.10 + 2.14.x 19.11.10 +- 2.15.x 20.11.1 +- 2.16.x 20.11.1 ++ 2.15.x 20.11.4 ++ 2.16.x 20.11.4 + 2.17.x 21.11.0 + ============ ======== + +diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst +index c4300cd53e..a297aadac8 100644 +--- a/Documentation/intro/install/general.rst ++++ b/Documentation/intro/install/general.rst +@@ -169,7 +169,7 @@ other than plain text, only if you have the following: + If you are going to extensively modify Open vSwitch, consider installing the + following to obtain better warnings: + +-- "sparse" version 0.5.1 or later ++- "sparse" version 0.6.2 or later + (https://git.kernel.org/pub/scm/devel/sparse/sparse.git/). + + - GNU make. +diff --git a/NEWS b/NEWS +index c10e9bfacc..8cae5f7de7 100644 +--- a/NEWS ++++ b/NEWS +@@ -1,3 +1,21 @@ ++v2.17.2 - xx xxx xxxx ++--------------------- ++ ++v2.17.1 - 08 Apr 2022 ++--------------------- ++ - Bug fixes ++ - libopenvswitch API change: ++ * To fix the Undefined Behavior issue causing the compiler to incorrectly ++ optimize important parts of code, container iteration macros (e.g., ++ LIST_FOR_EACH) have been re-implemented in a UB-safe way. ++ * Backwards compatibility has mostly been preserved, however the ++ user-provided pointer is now set to NULL after the loop (unless it ++ exited via "break;") ++ * Users of libopenvswitch will need to double-check the use of such loop ++ macros before compiling with a new version. ++ * Since the change is limited to the definitions within the headers, the ++ ABI is not affected. ++ + v2.17.0 - 17 Feb 2022 + --------------------- + - Userspace datapath: +diff --git a/acinclude.m4 b/acinclude.m4 +index 0c360fd1ef..61e88105f5 100644 +--- a/acinclude.m4 ++++ b/acinclude.m4 +@@ -305,6 +305,13 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [ + ])], + [AC_DEFINE([HAVE_TCA_SKBEDIT_FLAGS], [1], + [Define to 1 if TCA_SKBEDIT_FLAGS is available.])]) ++ ++ AC_COMPILE_IFELSE([ ++ AC_LANG_PROGRAM([#include ], [ ++ int x = TCA_STATS_PKT64; ++ ])], ++ [AC_DEFINE([HAVE_TCA_STATS_PKT64], [1], ++ [Define to 1 if TCA_STATS_PKT64 is available.])]) + ]) + + dnl OVS_CHECK_LINUX_SCTP_CT +@@ -1424,7 +1431,7 @@ AC_DEFUN([OVS_ENABLE_SPARSE], + : ${SPARSE=sparse} + AC_SUBST([SPARSE]) + AC_CONFIG_COMMANDS_PRE( +- [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')']) ++ [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse -I $(top_srcdir)/include $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')']) + + AC_ARG_ENABLE( + [sparse], +diff --git a/configure.ac b/configure.ac +index 4e9bcce272..9ba141b223 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -13,7 +13,7 @@ + # limitations under the License. + + AC_PREREQ(2.63) +-AC_INIT(openvswitch, 2.17.0, bugs@openvswitch.org) ++AC_INIT(openvswitch, 2.17.2, bugs@openvswitch.org) + AC_CONFIG_SRCDIR([datapath/datapath.c]) + AC_CONFIG_MACRO_DIR([m4]) + AC_CONFIG_AUX_DIR([build-aux]) +diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c +index 70ac0a0e56..218e7db814 100644 +--- a/datapath-windows/ovsext/Actions.c ++++ b/datapath-windows/ovsext/Actions.c +@@ -1712,6 +1712,15 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + ipHdr->ttl = ipAttr->ipv4_ttl; + key->ipKey.nwTtl = ipAttr->ipv4_ttl; + } ++ if (ipHdr->dscp != (ipAttr->ipv4_tos & 0xfc)) { ++ /* ECN + DSCP */ ++ UINT8 newTos = (ipHdr->tos & 0x3) | (ipAttr->ipv4_tos & 0xfc); ++ if (ipHdr->check != 0) { ++ ipHdr->check = ChecksumUpdate16(ipHdr->check, ipHdr->tos, newTos); ++ } ++ ipHdr->tos = newTos; ++ key->ipKey.nwTos = newTos; ++ } + + return NDIS_STATUS_SUCCESS; + } +diff --git a/debian/changelog b/debian/changelog +index 3e0d3a66e3..c3b2852c28 100644 +--- a/debian/changelog ++++ b/debian/changelog +@@ -1,3 +1,15 @@ ++openvswitch (2.17.2-1) unstable; urgency=low ++ [ Open vSwitch team ] ++ * New upstream version ++ ++ -- Open vSwitch team Fri, 08 Apr 2022 14:57:49 +0200 ++ ++openvswitch (2.17.1-1) unstable; urgency=low ++ [ Open vSwitch team ] ++ * New upstream version ++ ++ -- Open vSwitch team Fri, 08 Apr 2022 14:57:49 +0200 ++ + openvswitch (2.17.0-1) unstable; urgency=low + + * New upstream version +diff --git a/dpdk/lib/vhost/vhost_user.c b/dpdk/lib/vhost/vhost_user.c +index a781346c4d..550b0ee8b5 100644 +--- a/dpdk/lib/vhost/vhost_user.c ++++ b/dpdk/lib/vhost/vhost_user.c +@@ -1603,6 +1603,9 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, + int numa_node = SOCKET_ID_ANY; + void *addr; + ++ if (validate_msg_fds(msg, 0) != 0) ++ return RTE_VHOST_MSG_RESULT_ERR; ++ + if (msg->size != sizeof(msg->payload.inflight)) { + VHOST_LOG_CONFIG(ERR, + "invalid get_inflight_fd message size is %d\n", +@@ -1704,6 +1707,9 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, + int fd, i; + int numa_node = SOCKET_ID_ANY; + ++ if (validate_msg_fds(msg, 1) != 0) ++ return RTE_VHOST_MSG_RESULT_ERR; ++ + fd = msg->fds[0]; + if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { + VHOST_LOG_CONFIG(ERR, +@@ -2873,6 +2879,9 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, + case VHOST_USER_SET_VRING_ADDR: + vring_idx = msg->payload.addr.index; + break; ++ case VHOST_USER_SET_INFLIGHT_FD: ++ vring_idx = msg->payload.inflight.num_queues - 1; ++ break; + default: + return 0; + } +diff --git a/include/linux/automake.mk b/include/linux/automake.mk +index 8f063f482e..f857c7e088 100644 +--- a/include/linux/automake.mk ++++ b/include/linux/automake.mk +@@ -2,6 +2,7 @@ noinst_HEADERS += \ + include/linux/netlink.h \ + include/linux/netfilter/nf_conntrack_sctp.h \ + include/linux/pkt_cls.h \ ++ include/linux/gen_stats.h \ + include/linux/tc_act/tc_mpls.h \ + include/linux/tc_act/tc_pedit.h \ + include/linux/tc_act/tc_skbedit.h \ +diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h +new file mode 100644 +index 0000000000..6fae6f727c +--- /dev/null ++++ b/include/linux/gen_stats.h +@@ -0,0 +1,81 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef __LINUX_GEN_STATS_WRAPPER_H ++#define __LINUX_GEN_STATS_WRAPPER_H 1 ++ ++#if defined(__KERNEL__) || defined(HAVE_TCA_STATS_PKT64) ++#include_next ++#else ++#include ++ ++enum { ++ TCA_STATS_UNSPEC, ++ TCA_STATS_BASIC, ++ TCA_STATS_RATE_EST, ++ TCA_STATS_QUEUE, ++ TCA_STATS_APP, ++ TCA_STATS_RATE_EST64, ++ TCA_STATS_PAD, ++ TCA_STATS_BASIC_HW, ++ TCA_STATS_PKT64, ++ __TCA_STATS_MAX, ++}; ++#define TCA_STATS_MAX (__TCA_STATS_MAX - 1) ++ ++/** ++ * struct gnet_stats_basic - byte/packet throughput statistics ++ * @bytes: number of seen bytes ++ * @packets: number of seen packets ++ */ ++struct gnet_stats_basic { ++ __u64 bytes; ++ __u32 packets; ++}; ++ ++/** ++ * struct gnet_stats_rate_est - rate estimator ++ * @bps: current byte rate ++ * @pps: current packet rate ++ */ ++struct gnet_stats_rate_est { ++ __u32 bps; ++ __u32 pps; ++}; ++ ++/** ++ * struct gnet_stats_rate_est64 - rate estimator ++ * @bps: current byte rate ++ * @pps: current packet rate ++ */ ++struct gnet_stats_rate_est64 { ++ __u64 bps; ++ __u64 pps; ++}; ++ ++/** ++ * struct gnet_stats_queue - queuing statistics ++ * @qlen: queue length ++ * @backlog: backlog size of queue ++ * @drops: number of dropped packets ++ * @requeues: number of requeues ++ * @overlimits: number of enqueues over the limit ++ */ ++struct gnet_stats_queue { ++ __u32 qlen; ++ __u32 backlog; ++ __u32 drops; ++ __u32 requeues; ++ __u32 overlimits; ++}; ++ ++/** ++ * struct gnet_estimator - rate estimator configuration ++ * @interval: sampling period ++ * @ewma_log: the log of measurement window weight ++ */ ++struct gnet_estimator { ++ signed char interval; ++ unsigned char ewma_log; ++}; ++ ++#endif /* __KERNEL__ || !HAVE_TCA_STATS_PKT64 */ ++#endif /* __LINUX_GEN_STATS_WRAPPER_H */ +diff --git a/include/openvswitch/hmap.h b/include/openvswitch/hmap.h +index 4e001cc692..beb48295b9 100644 +--- a/include/openvswitch/hmap.h ++++ b/include/openvswitch/hmap.h +@@ -134,17 +134,17 @@ struct hmap_node *hmap_random_node(const struct hmap *); + * without using 'break', NODE will be NULL. This is true for all of the + * HMAP_FOR_EACH_*() macros. + */ +-#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ +- for (INIT_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \ +- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ +- || ((NODE = NULL), false); \ +- ASSIGN_CONTAINER(NODE, hmap_next_with_hash(&(NODE)->MEMBER), \ +- MEMBER)) +-#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ +- for (INIT_CONTAINER(NODE, hmap_first_in_bucket(HMAP, HASH), MEMBER); \ +- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ +- || ((NODE = NULL), false); \ +- ASSIGN_CONTAINER(NODE, hmap_next_in_bucket(&(NODE)->MEMBER), MEMBER)) ++#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ ++ for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_with_hash(HMAP, HASH), \ ++ struct hmap_node); \ ++ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ ++ UPDATE_MULTIVAR(NODE, hmap_next_with_hash(ITER_VAR(NODE)))) ++ ++#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ ++ for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_in_bucket(HMAP, HASH), \ ++ struct hmap_node); \ ++ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ ++ UPDATE_MULTIVAR(NODE, hmap_next_in_bucket(ITER_VAR(NODE)))) + + static inline struct hmap_node *hmap_first_with_hash(const struct hmap *, + size_t hash); +@@ -170,54 +170,80 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *); + /* Iterates through every node in HMAP. */ + #define HMAP_FOR_EACH(NODE, MEMBER, HMAP) \ + HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, (void) 0) +-#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ +- for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ +- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ +- || ((NODE = NULL), false); \ +- ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) ++#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ ++ for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_first(HMAP), struct hmap_node, \ ++ __VA_ARGS__); \ ++ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ ++ UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE)))) + + /* Safe when NODE may be freed (not needed when NODE may be removed from the + * hash map but its members remain accessible and intact). */ +-#define HMAP_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HMAP) \ +- HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, (void) 0) +-#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ +- for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ +- ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ +- || ((NODE = NULL), false) \ +- ? INIT_CONTAINER(NEXT, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), 1 \ +- : 0); \ +- (NODE) = (NEXT)) ++#define HMAP_FOR_EACH_SAFE_LONG(NODE, NEXT, MEMBER, HMAP) \ ++ HMAP_FOR_EACH_SAFE_LONG_INIT (NODE, NEXT, MEMBER, HMAP, (void) NEXT) ++ ++#define HMAP_FOR_EACH_SAFE_LONG_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ ++ for (INIT_MULTIVAR_SAFE_LONG_EXP(NODE, NEXT, MEMBER, hmap_first(HMAP), \ ++ struct hmap_node, __VA_ARGS__); \ ++ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ ++ ITER_VAR(NODE) != NULL, \ ++ ITER_VAR(NEXT) = hmap_next(HMAP, ITER_VAR(NODE)), \ ++ ITER_VAR(NEXT) != NULL); \ ++ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) ++ ++/* Short versions of HMAP_FOR_EACH_SAFE. */ ++#define HMAP_FOR_EACH_SAFE_SHORT(NODE, MEMBER, HMAP) \ ++ HMAP_FOR_EACH_SAFE_SHORT_INIT (NODE, MEMBER, HMAP, (void) 0) ++ ++#define HMAP_FOR_EACH_SAFE_SHORT_INIT(NODE, MEMBER, HMAP, ...) \ ++ for (INIT_MULTIVAR_SAFE_SHORT_EXP(NODE, MEMBER, hmap_first(HMAP), \ ++ struct hmap_node, __VA_ARGS__); \ ++ CONDITION_MULTIVAR_SAFE_SHORT(NODE, MEMBER, \ ++ ITER_VAR(NODE) != NULL, \ ++ ITER_NEXT_VAR(NODE) = hmap_next(HMAP, ITER_VAR(NODE))); \ ++ UPDATE_MULTIVAR_SAFE_SHORT(NODE)) ++ ++#define HMAP_FOR_EACH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(HMAP_FOR_EACH_SAFE_LONG, \ ++ HMAP_FOR_EACH_SAFE_SHORT, \ ++ 4, __VA_ARGS__) ++ + + /* Continues an iteration from just after NODE. */ + #define HMAP_FOR_EACH_CONTINUE(NODE, MEMBER, HMAP) \ + HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, (void) 0) +-#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ +- for (ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), \ +- __VA_ARGS__; \ +- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ +- || ((NODE = NULL), false); \ +- ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) ++#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ ++ for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_next(HMAP, &(NODE)->MEMBER), \ ++ struct hmap_node, __VA_ARGS__); \ ++ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ ++ UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE)))) ++ ++struct hmap_pop_helper_iter__ { ++ size_t bucket; ++ struct hmap_node *node; ++}; + +-static inline struct hmap_node * +-hmap_pop_helper__(struct hmap *hmap, size_t *bucket) { ++static inline void ++hmap_pop_helper__(struct hmap *hmap, struct hmap_pop_helper_iter__ *iter) { + +- for (; *bucket <= hmap->mask; (*bucket)++) { +- struct hmap_node *node = hmap->buckets[*bucket]; ++ for (; iter->bucket <= hmap->mask; (iter->bucket)++) { ++ struct hmap_node *node = hmap->buckets[iter->bucket]; + + if (node) { + hmap_remove(hmap, node); +- return node; ++ iter->node = node; ++ return; + } + } +- +- return NULL; ++ iter->node = NULL; + } + +-#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ +- for (size_t bucket__ = 0; \ +- INIT_CONTAINER(NODE, hmap_pop_helper__(HMAP, &bucket__), MEMBER), \ +- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ +- || ((NODE = NULL), false);) ++#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ ++ for (struct hmap_pop_helper_iter__ ITER_VAR(NODE) = { 0, NULL }; \ ++ hmap_pop_helper__(HMAP, &ITER_VAR(NODE)), \ ++ (ITER_VAR(NODE).node != NULL) ? \ ++ (((NODE) = OBJECT_CONTAINING(ITER_VAR(NODE).node, \ ++ NODE, MEMBER)),1): \ ++ (((NODE) = NULL), 0);) + + static inline struct hmap_node *hmap_first(const struct hmap *); + static inline struct hmap_node *hmap_next(const struct hmap *, +diff --git a/include/openvswitch/list.h b/include/openvswitch/list.h +index 8ad5eeb327..6272d340cf 100644 +--- a/include/openvswitch/list.h ++++ b/include/openvswitch/list.h +@@ -72,37 +72,74 @@ static inline bool ovs_list_is_empty(const struct ovs_list *); + static inline bool ovs_list_is_singleton(const struct ovs_list *); + static inline bool ovs_list_is_short(const struct ovs_list *); + +-#define LIST_FOR_EACH(ITER, MEMBER, LIST) \ +- for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ +- &(ITER)->MEMBER != (LIST); \ +- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) +-#define LIST_FOR_EACH_CONTINUE(ITER, MEMBER, LIST) \ +- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER); \ +- &(ITER)->MEMBER != (LIST); \ +- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) +-#define LIST_FOR_EACH_REVERSE(ITER, MEMBER, LIST) \ +- for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \ +- &(ITER)->MEMBER != (LIST); \ +- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) +-#define LIST_FOR_EACH_REVERSE_SAFE(ITER, PREV, MEMBER, LIST) \ +- for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \ +- (&(ITER)->MEMBER != (LIST) \ +- ? INIT_CONTAINER(PREV, (ITER)->MEMBER.prev, MEMBER), 1 \ +- : 0); \ +- (ITER) = (PREV)) +-#define LIST_FOR_EACH_REVERSE_CONTINUE(ITER, MEMBER, LIST) \ +- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ +- &(ITER)->MEMBER != (LIST); \ +- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) +-#define LIST_FOR_EACH_SAFE(ITER, NEXT, MEMBER, LIST) \ +- for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ +- (&(ITER)->MEMBER != (LIST) \ +- ? INIT_CONTAINER(NEXT, (ITER)->MEMBER.next, MEMBER), 1 \ +- : 0); \ +- (ITER) = (NEXT)) +-#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \ +- while (!ovs_list_is_empty(LIST) \ +- && (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1)) ++#define LIST_FOR_EACH(VAR, MEMBER, LIST) \ ++ for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->next, struct ovs_list); \ ++ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ ++ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next)) ++ ++#define LIST_FOR_EACH_CONTINUE(VAR, MEMBER, LIST) \ ++ for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.next, struct ovs_list); \ ++ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ ++ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next)) ++ ++#define LIST_FOR_EACH_REVERSE(VAR, MEMBER, LIST) \ ++ for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->prev, struct ovs_list); \ ++ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ ++ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev)) ++ ++#define LIST_FOR_EACH_REVERSE_CONTINUE(VAR, MEMBER, LIST) \ ++ for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.prev, struct ovs_list); \ ++ CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ ++ UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev)) ++ ++/* LONG version of SAFE iterators. */ ++#define LIST_FOR_EACH_REVERSE_SAFE_LONG(VAR, PREV, MEMBER, LIST) \ ++ for (INIT_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, (LIST)->prev, \ ++ struct ovs_list); \ ++ CONDITION_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, \ ++ ITER_VAR(VAR) != (LIST), \ ++ ITER_VAR(PREV) = ITER_VAR(VAR)->prev, \ ++ ITER_VAR(PREV) != (LIST)); \ ++ UPDATE_MULTIVAR_SAFE_LONG(VAR, PREV)) ++ ++#define LIST_FOR_EACH_SAFE_LONG(VAR, NEXT, MEMBER, LIST) \ ++ for (INIT_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, (LIST)->next, \ ++ struct ovs_list); \ ++ CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, \ ++ ITER_VAR(VAR) != (LIST), \ ++ ITER_VAR(NEXT) = ITER_VAR(VAR)->next, \ ++ ITER_VAR(NEXT) != (LIST)); \ ++ UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT)) ++ ++/* SHORT version of SAFE iterators. */ ++#define LIST_FOR_EACH_REVERSE_SAFE_SHORT(VAR, MEMBER, LIST) \ ++ for (INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, (LIST)->prev, struct ovs_list);\ ++ CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, \ ++ ITER_VAR(VAR) != (LIST), \ ++ ITER_NEXT_VAR(VAR) = ITER_VAR(VAR)->prev); \ ++ UPDATE_MULTIVAR_SAFE_SHORT(VAR)) ++ ++#define LIST_FOR_EACH_SAFE_SHORT(VAR, MEMBER, LIST) \ ++ for (INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, (LIST)->next, struct ovs_list);\ ++ CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, \ ++ ITER_VAR(VAR) != (LIST), \ ++ ITER_NEXT_VAR(VAR) = ITER_VAR(VAR)->next); \ ++ UPDATE_MULTIVAR_SAFE_SHORT(VAR)) ++ ++#define LIST_FOR_EACH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(LIST_FOR_EACH_SAFE_LONG, \ ++ LIST_FOR_EACH_SAFE_SHORT, \ ++ 4, __VA_ARGS__) ++ ++#define LIST_FOR_EACH_REVERSE_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(LIST_FOR_EACH_REVERSE_SAFE_LONG, \ ++ LIST_FOR_EACH_REVERSE_SAFE_SHORT, \ ++ 4, __VA_ARGS__) ++ ++#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \ ++ while (!ovs_list_is_empty(LIST) ? \ ++ (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1) : \ ++ (ITER = NULL, 0)) + + /* Inline implementations. */ + +diff --git a/include/openvswitch/shash.h b/include/openvswitch/shash.h +index c249e13e1f..4e7badd4dc 100644 +--- a/include/openvswitch/shash.h ++++ b/include/openvswitch/shash.h +@@ -41,13 +41,24 @@ struct shash { + BUILD_ASSERT_TYPE(SHASH_NODE, struct shash_node *), \ + BUILD_ASSERT_TYPE(SHASH, struct shash *)) + +-#define SHASH_FOR_EACH_SAFE(SHASH_NODE, NEXT, SHASH) \ +- HMAP_FOR_EACH_SAFE_INIT ( \ ++#define SHASH_FOR_EACH_SAFE_SHORT(SHASH_NODE, SHASH) \ ++ HMAP_FOR_EACH_SAFE_SHORT_INIT ( \ ++ SHASH_NODE, node, &(SHASH)->map, \ ++ BUILD_ASSERT_TYPE(SHASH_NODE, struct shash_node *), \ ++ BUILD_ASSERT_TYPE(SHASH, struct shash *)) ++ ++#define SHASH_FOR_EACH_SAFE_LONG(SHASH_NODE, NEXT, SHASH) \ ++ HMAP_FOR_EACH_SAFE_LONG_INIT ( \ + SHASH_NODE, NEXT, node, &(SHASH)->map, \ + BUILD_ASSERT_TYPE(SHASH_NODE, struct shash_node *), \ + BUILD_ASSERT_TYPE(NEXT, struct shash_node *), \ + BUILD_ASSERT_TYPE(SHASH, struct shash *)) + ++#define SHASH_FOR_EACH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(SHASH_FOR_EACH_SAFE_LONG, \ ++ SHASH_FOR_EACH_SAFE_SHORT, \ ++ 3, __VA_ARGS__) ++ + void shash_init(struct shash *); + void shash_destroy(struct shash *); + void shash_destroy_free_data(struct shash *); +diff --git a/include/openvswitch/util.h b/include/openvswitch/util.h +index 228b185c3a..96f600160b 100644 +--- a/include/openvswitch/util.h ++++ b/include/openvswitch/util.h +@@ -145,6 +145,150 @@ OVS_NO_RETURN void ovs_assert_failure(const char *, const char *, const char *); + #define INIT_CONTAINER(OBJECT, POINTER, MEMBER) \ + ((OBJECT) = NULL, ASSIGN_CONTAINER(OBJECT, POINTER, MEMBER)) + ++/* Multi-variable container iterators. ++ * ++ * The following macros facilitate safe iteration over data structures ++ * contained in objects. It does so by using an internal iterator variable of ++ * the type of the member object pointer (i.e: pointer to the data structure). ++ */ ++ ++/* Multi-variable iterator variable name. ++ * Returns the name of the internal iterator variable. ++ */ ++#define ITER_VAR(NAME) NAME ## __iterator__ ++ ++/* Multi-variable initialization. Creates an internal iterator variable that ++ * points to the provided pointer. The type of the iterator variable is ++ * ITER_TYPE*. It must be the same type as &VAR->MEMBER. ++ * ++ * The _EXP version evaluates the extra expressions once. ++ */ ++#define INIT_MULTIVAR(VAR, MEMBER, POINTER, ITER_TYPE) \ ++ INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0) ++ ++#define INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \ ++ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER) ++ ++/* Multi-variable condition. ++ * Evaluates the condition expression (that must be based on the internal ++ * iterator variable). Only if the result of expression is true, the OBJECT is ++ * set to the object containing the current value of the iterator variable. ++ * ++ * It is up to the caller to make sure it is safe to run OBJECT_CONTAINING on ++ * the pointers that verify the condition. ++ */ ++#define CONDITION_MULTIVAR(VAR, MEMBER, EXPR) \ ++ ((EXPR) ? \ ++ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), 1) : \ ++ (((VAR) = NULL), 0)) ++ ++/* Multi-variable update. ++ * Sets the iterator value to NEXT_ITER. ++ */ ++#define UPDATE_MULTIVAR(VAR, NEXT_ITER) \ ++ (ITER_VAR(VAR) = NEXT_ITER) ++ ++/* In the safe version of the multi-variable container iteration, the next ++ * value of the iterator is precalculated on the condition expression. ++ * This allows for the iterator to be freed inside the loop. ++ * ++ * Two versions of the macros are provided: ++ * ++ * * In the _SHORT version, the user does not have to provide a variable to ++ * store the next value of the iterator. Instead, a second iterator variable ++ * is declared in the INIT_ macro and its name is determined by ++ * ITER_NEXT_VAR(OBJECT). ++ * ++ * * In the _LONG version, the user provides another variable of the same type ++ * as the iterator object variable to store the next containing object. ++ * We still declare an iterator variable inside the loop but in this case it's ++ * name is derived from the name of the next containing variable. ++ * The value of the next containing object will only be set ++ * (via OBJECT_CONTAINING) if an additional condition is statisfied. This ++ * second condition must ensure it is safe to call OBJECT_CONTAINING on the ++ * next iterator variable. ++ * With respect to the value of the next containing object: ++ * - Inside of the loop: the variable is either NULL or safe to use. ++ * - Outside of the loop: the variable is NULL if the loop ends normally. ++ * If the loop ends with a "break;" statement, rules of Inside the loop ++ * apply. ++ */ ++#define ITER_NEXT_VAR(NAME) NAME ## __iterator__next__ ++ ++/* Safe initialization declares both iterators. */ ++#define INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, POINTER, ITER_TYPE) \ ++ INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0) ++ ++#define INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \ ++ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \ ++ *ITER_NEXT_VAR(VAR) = NULL ++ ++/* Evaluate the condition expression and, if satisfied, update the _next_ ++ * iterator with the NEXT_EXPR. ++ * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and ++ * ITER_NEXT_VAR(VAR). ++ */ ++#define CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, EXPR, NEXT_EXPR) \ ++ ((EXPR) ? \ ++ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \ ++ (NEXT_EXPR), 1) : \ ++ (((VAR) = NULL), 0)) ++ ++#define UPDATE_MULTIVAR_SAFE_SHORT(VAR) \ ++ UPDATE_MULTIVAR(VAR, ITER_NEXT_VAR(VAR)) ++ ++/* _LONG versions of the macros. */ ++ ++#define INIT_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE) \ ++ INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE, \ ++ (void) 0) \ ++ ++#define INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, \ ++ ITER_TYPE, ...) \ ++ ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \ ++ *ITER_VAR(NEXT_VAR) = NULL ++ ++/* Evaluate the condition expression and, if satisfied, update the _next_ ++ * iterator with the NEXT_EXPR. After, evaluate the NEXT_COND and, if ++ * satisfied, set the value to NEXT_VAR. NEXT_COND must use ITER_VAR(NEXT_VAR). ++ * ++ * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and ++ * ITER_VAR(NEXT_VAR). ++ */ ++#define CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, EXPR, NEXT_EXPR, \ ++ NEXT_COND) \ ++ ((EXPR) ? \ ++ (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \ ++ (NEXT_EXPR), ((NEXT_COND) ? \ ++ ((NEXT_VAR) = \ ++ OBJECT_CONTAINING(ITER_VAR(NEXT_VAR), NEXT_VAR, MEMBER)) : \ ++ ((NEXT_VAR) = NULL)), 1) : \ ++ (((VAR) = NULL), ((NEXT_VAR) = NULL), 0)) ++ ++#define UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR) \ ++ UPDATE_MULTIVAR(VAR, ITER_VAR(NEXT_VAR)) ++ ++/* Helpers to allow overloading the *_SAFE iterator macros and select either ++ * the LONG or the SHORT version depending on the number of arguments. ++ */ ++#define GET_SAFE_MACRO2(_1, _2, NAME, ...) NAME ++#define GET_SAFE_MACRO3(_1, _2, _3, NAME, ...) NAME ++#define GET_SAFE_MACRO4(_1, _2, _3, _4, NAME, ...) NAME ++#define GET_SAFE_MACRO5(_1, _2, _3, _4, _5, NAME, ...) NAME ++#define GET_SAFE_MACRO6(_1, _2, _3, _4, _5, _6, NAME, ...) NAME ++#define GET_SAFE_MACRO(MAX_ARGS) GET_SAFE_MACRO ## MAX_ARGS ++ ++/* MSVC treats __VA_ARGS__ as a simple token in argument lists. Introduce ++ * a level of indirection to work around that. */ ++#define EXPAND_MACRO(name, args) name args ++ ++/* Overload the LONG and the SHORT version of the macros. MAX_ARGS is the ++ * maximum number of arguments (i.e: the number of arguments of the LONG ++ * version). */ ++#define OVERLOAD_SAFE_MACRO(LONG, SHORT, MAX_ARGS, ...) \ ++ EXPAND_MACRO(GET_SAFE_MACRO(MAX_ARGS), \ ++ (__VA_ARGS__, LONG, SHORT))(__VA_ARGS__) ++ + /* Returns the number of elements in ARRAY. */ + #define ARRAY_SIZE(ARRAY) __ARRAY_SIZE(ARRAY) + +diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in +index a8b0705d9f..631a8fca80 100755 +--- a/ipsec/ovs-monitor-ipsec.in ++++ b/ipsec/ovs-monitor-ipsec.in +@@ -337,7 +337,14 @@ conn prevent_unencrypted_vxlan + Once strongSwan vici bindings will be distributed with major + Linux distributions this function could be simplified.""" + vlog.info("Refreshing StrongSwan configuration") +- subprocess.call([self.IPSEC, "update"]) ++ proc = subprocess.Popen([self.IPSEC, "update"], ++ stdout=subprocess.PIPE, ++ stderr=subprocess.PIPE) ++ outs, errs = proc.communicate() ++ if proc.returncode != 0: ++ vlog.err("StrongSwan failed to update configuration:\n" ++ "%s \n %s" % (str(outs), str(errs))) ++ + subprocess.call([self.IPSEC, "rereadsecrets"]) + # "ipsec update" command does not remove those tunnels that were + # updated or that disappeared from the ipsec.conf file. So, we have +@@ -708,6 +715,11 @@ conn prevent_unencrypted_vxlan + not re.match(r".*need --listen.*", pout): + break + ++ if re.match(r".*[F|f]ailed to initiate connection.*", pout): ++ vlog.err('Failed to initiate connection through' ++ ' Interface %s.\n' % (conn.split('-')[0])) ++ vlog.err(pout) ++ + def _nss_clear_database(self): + """Remove all OVS IPsec related state from the NSS database""" + try: +diff --git a/lib/cfm.c b/lib/cfm.c +index cc43e70e31..c3742f3de2 100644 +--- a/lib/cfm.c ++++ b/lib/cfm.c +@@ -416,7 +416,7 @@ cfm_run(struct cfm *cfm) OVS_EXCLUDED(mutex) + ovs_mutex_lock(&mutex); + if (timer_expired(&cfm->fault_timer)) { + long long int interval = cfm_fault_interval(cfm); +- struct remote_mp *rmp, *rmp_next; ++ struct remote_mp *rmp; + enum cfm_fault_reason old_cfm_fault = cfm->fault; + uint64_t old_flap_count = cfm->flap_count; + int old_health = cfm->health; +@@ -475,7 +475,7 @@ cfm_run(struct cfm *cfm) OVS_EXCLUDED(mutex) + cfm->rx_packets = rx_packets; + } + +- HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->remote_mps) { ++ HMAP_FOR_EACH_SAFE (rmp, node, &cfm->remote_mps) { + if (!rmp->recv) { + VLOG_INFO("%s: Received no CCM from RMP %"PRIu64" in the last" + " %lldms", cfm->name, rmp->mpid, +diff --git a/lib/classifier.c b/lib/classifier.c +index c4790ee6ba..0a89626cc3 100644 +--- a/lib/classifier.c ++++ b/lib/classifier.c +@@ -916,9 +916,9 @@ free_conjunctive_matches(struct hmap *matches, + struct conjunctive_match *cm_stubs, size_t n_cm_stubs) + { + if (hmap_count(matches) > n_cm_stubs) { +- struct conjunctive_match *cm, *next; ++ struct conjunctive_match *cm; + +- HMAP_FOR_EACH_SAFE (cm, next, hmap_node, matches) { ++ HMAP_FOR_EACH_SAFE (cm, hmap_node, matches) { + if (!(cm >= cm_stubs && cm < &cm_stubs[n_cm_stubs])) { + free(cm); + } +diff --git a/lib/cmap.h b/lib/cmap.h +index c502d23112..72e2ec5f71 100644 +--- a/lib/cmap.h ++++ b/lib/cmap.h +@@ -108,6 +108,8 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node, + * + * CMAP and HASH are evaluated only once. NODE is evaluated many times. + * ++ * After a normal exit of the loop (not through a "break;" statement) NODE is ++ * NULL. + * + * Thread-safety + * ============= +@@ -128,15 +130,15 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node, + * CMAP_FOR_EACH_WITH_HASH_PROTECTED may only be used if CMAP is guaranteed not + * to change during iteration. It may be very slightly faster. + */ +-#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \ +- for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \ +- (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ +- ASSIGN_CONTAINER(NODE, cmap_node_next(&(NODE)->MEMBER), MEMBER)) +-#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \ +- for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \ +- (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ +- ASSIGN_CONTAINER(NODE, cmap_node_next_protected(&(NODE)->MEMBER), \ +- MEMBER)) ++#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \ ++ for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \ ++ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ ++ UPDATE_MULTIVAR(NODE, cmap_node_next(ITER_VAR(NODE)))) ++#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \ ++ for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \ ++ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ ++ UPDATE_MULTIVAR(NODE, cmap_node_next_protected(ITER_VAR(NODE)))) ++ + #define CMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, CMAP) \ + CMAP_NODE_FOR_EACH(NODE, MEMBER, cmap_find(CMAP, HASH)) + #define CMAP_FOR_EACH_WITH_HASH_PROTECTED(NODE, MEMBER, HASH, CMAP) \ +@@ -223,7 +225,7 @@ unsigned long cmap_find_batch(const struct cmap *cmap, unsigned long map, + ? (INIT_CONTAINER(NODE, (CURSOR)->node, MEMBER), \ + cmap_cursor_advance(CURSOR), \ + true) \ +- : false) ++ : (NODE = NULL, false)) + + #define CMAP_CURSOR_FOR_EACH(NODE, MEMBER, CURSOR, CMAP) \ + for (*(CURSOR) = cmap_cursor_start(CMAP); \ +diff --git a/lib/conntrack.c b/lib/conntrack.c +index 33a1a92953..08da4ddf79 100644 +--- a/lib/conntrack.c ++++ b/lib/conntrack.c +@@ -1526,14 +1526,14 @@ set_label(struct dp_packet *pkt, struct conn *conn, + static long long + ct_sweep(struct conntrack *ct, long long now, size_t limit) + { +- struct conn *conn, *next; ++ struct conn *conn; + long long min_expiration = LLONG_MAX; + size_t count = 0; + + ovs_mutex_lock(&ct->ct_lock); + + for (unsigned i = 0; i < N_CT_TM; i++) { +- LIST_FOR_EACH_SAFE (conn, next, exp_node, &ct->exp_lists[i]) { ++ LIST_FOR_EACH_SAFE (conn, exp_node, &ct->exp_lists[i]) { + ovs_mutex_lock(&conn->lock); + if (now < conn->expiration || count >= limit) { + min_expiration = MIN(min_expiration, conn->expiration); +@@ -2265,8 +2265,16 @@ set_sport_range(const struct nat_action_info_t *ni, const struct conn_key *k, + if (((ni->nat_action & NAT_ACTION_SNAT_ALL) == NAT_ACTION_SRC) || + ((ni->nat_action & NAT_ACTION_DST))) { + *curr = ntohs(k->src.port); +- *min = MIN_NAT_EPHEMERAL_PORT; +- *max = MAX_NAT_EPHEMERAL_PORT; ++ if (*curr < 512) { ++ *min = 1; ++ *max = 511; ++ } else if (*curr < 1024) { ++ *min = 600; ++ *max = 1023; ++ } else { ++ *min = MIN_NAT_EPHEMERAL_PORT; ++ *max = MAX_NAT_EPHEMERAL_PORT; ++ } + } else { + *min = ni->min_port; + *max = ni->max_port; +@@ -2389,6 +2397,26 @@ next_addr_in_range_guarded(union ct_addr *curr, union ct_addr *min, + return exhausted; + } + ++static bool ++nat_get_unique_l4(struct conntrack *ct, struct conn *nat_conn, ++ ovs_be16 *port, uint16_t curr, uint16_t min, ++ uint16_t max) ++{ ++ uint16_t orig = curr; ++ ++ FOR_EACH_PORT_IN_RANGE (curr, min, max) { ++ *port = htons(curr); ++ if (!conn_lookup(ct, &nat_conn->rev_key, ++ time_msec(), NULL, NULL)) { ++ return true; ++ } ++ } ++ ++ *port = htons(orig); ++ ++ return false; ++} ++ + /* This function tries to get a unique tuple. + * Every iteration checks that the reverse tuple doesn't + * collide with any existing one. +@@ -2403,9 +2431,11 @@ next_addr_in_range_guarded(union ct_addr *curr, union ct_addr *min, + * + * In case of DNAT: + * - For each dst IP address in the range (if any). +- * - For each dport in range (if any). +- * - Try to find a source port in the ephemeral range +- * (after testing the port used by the sender). ++ * - For each dport in range (if any) tries to find ++ * an unique tuple. ++ * - Eventually, if the previous attempt fails, ++ * tries to find a source port in the ephemeral ++ * range (after testing the port used by the sender). + * + * If none can be found, return exhaustion to the caller. */ + static bool +@@ -2436,6 +2466,11 @@ nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, + set_dport_range(nat_info, &conn->key, hash, &curr_dport, + &min_dport, &max_dport); + ++ if (pat_proto) { ++ nat_conn->rev_key.src.port = htons(curr_dport); ++ nat_conn->rev_key.dst.port = htons(curr_sport); ++ } ++ + another_round: + store_addr_to_key(&curr_addr, &nat_conn->rev_key, + nat_info->nat_action); +@@ -2449,15 +2484,19 @@ another_round: + goto next_addr; + } + +- FOR_EACH_PORT_IN_RANGE(curr_dport, min_dport, max_dport) { +- nat_conn->rev_key.src.port = htons(curr_dport); +- FOR_EACH_PORT_IN_RANGE(curr_sport, min_sport, max_sport) { +- nat_conn->rev_key.dst.port = htons(curr_sport); +- if (!conn_lookup(ct, &nat_conn->rev_key, +- time_msec(), NULL, NULL)) { +- return true; +- } +- } ++ bool found = false; ++ if (nat_info->nat_action & NAT_ACTION_DST_PORT) { ++ found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.src.port, ++ curr_dport, min_dport, max_dport); ++ } ++ ++ if (!found) { ++ found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.dst.port, ++ curr_sport, min_sport, max_sport); ++ } ++ ++ if (found) { ++ return true; + } + + /* Check if next IP is in range and respin. Otherwise, notify +@@ -2857,8 +2896,8 @@ expectation_clean(struct conntrack *ct, const struct conn_key *parent_key) + { + ovs_rwlock_wrlock(&ct->resources_lock); + +- struct alg_exp_node *node, *next; +- HINDEX_FOR_EACH_WITH_HASH_SAFE (node, next, node_ref, ++ struct alg_exp_node *node; ++ HINDEX_FOR_EACH_WITH_HASH_SAFE (node, node_ref, + conn_key_hash(parent_key, ct->hash_basis), + &ct->alg_expectation_refs) { + if (!conn_key_cmp(&node->parent_key, parent_key)) { +diff --git a/lib/dns-resolve.c b/lib/dns-resolve.c +index d344514343..1afcc65adb 100644 +--- a/lib/dns-resolve.c ++++ b/lib/dns-resolve.c +@@ -189,8 +189,8 @@ dns_resolve_destroy(void) + ub_ctx_delete(ub_ctx__); + ub_ctx__ = NULL; + +- struct resolve_request *req, *next; +- HMAP_FOR_EACH_SAFE (req, next, hmap_node, &all_reqs__) { ++ struct resolve_request *req; ++ HMAP_FOR_EACH_SAFE (req, hmap_node, &all_reqs__) { + ub_resolve_free(req->ub_result); + free(req->addr); + free(req->name); +@@ -265,7 +265,7 @@ resolve_callback__(void *req_, int err, struct ub_result *result) + if (err != 0 || (result->qtype == ns_t_aaaa && !result->havedata)) { + ub_resolve_free(result); + req->state = RESOLVE_ERROR; +- VLOG_ERR_RL(&rl, "%s: failed to resolve", req->name); ++ VLOG_WARN_RL(&rl, "%s: failed to resolve", req->name); + return; + } + +diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c +index b7131ba3f1..11d9a00052 100644 +--- a/lib/dpif-netdev-avx512.c ++++ b/lib/dpif-netdev-avx512.c +@@ -159,7 +159,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, + mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd); + } + +- uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1; ++ uint32_t lookup_pkts_bitmask = (UINT64_C(1) << batch_size) - 1; + uint32_t iter = lookup_pkts_bitmask; + while (iter) { + uint32_t i = raw_ctz(iter); +@@ -183,7 +183,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, + * classifed by vector mfex else do a scalar miniflow extract + * for that packet. + */ +- bool mfex_hit = !!(mf_mask & (1 << i)); ++ bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i)); + + /* Check for a partial hardware offload match. */ + if (hwol_enabled) { +@@ -204,7 +204,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, + + pkt_meta[i].bytes = dp_packet_size(packet); + phwol_hits++; +- hwol_emc_smc_hitmask |= (1 << i); ++ hwol_emc_smc_hitmask |= (UINT32_C(1) << i); + continue; + } + } +@@ -227,7 +227,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, + if (f) { + rules[i] = &f->cr; + emc_hits++; +- hwol_emc_smc_hitmask |= (1 << i); ++ hwol_emc_smc_hitmask |= (UINT32_C(1) << i); + continue; + } + } +@@ -237,7 +237,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, + if (f) { + rules[i] = &f->cr; + smc_hits++; +- smc_hitmask |= (1 << i); ++ smc_hitmask |= (UINT32_C(1) << i); + continue; + } + } +diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c +index c1c1fefb6a..a0fedb1376 100644 +--- a/lib/dpif-netdev-extract-avx512.c ++++ b/lib/dpif-netdev-extract-avx512.c +@@ -619,7 +619,7 @@ mfex_avx512_process(struct dp_packet_batch *packets, + }; + + /* This packet has its miniflow created, add to hitmask. */ +- hitmask |= 1 << i; ++ hitmask |= UINT32_C(1) << i; + } + + return hitmask; +diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h +index 66016eb099..7425dd44e7 100644 +--- a/lib/dpif-netdev-private-flow.h ++++ b/lib/dpif-netdev-private-flow.h +@@ -104,6 +104,7 @@ struct dp_netdev_flow { + bool dead; + uint32_t mark; /* Unique flow mark for netdev offloading. */ + uint64_t simple_match_mark; /* Unique flow mark for the simple match. */ ++ odp_port_t orig_in_port; + + /* Statistics. */ + struct dp_netdev_flow_stats stats; +diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c +index 9f35713ef5..51c276b1d8 100644 +--- a/lib/dpif-netdev.c ++++ b/lib/dpif-netdev.c +@@ -1932,13 +1932,13 @@ static void + dp_netdev_free(struct dp_netdev *dp) + OVS_REQUIRES(dp_netdev_mutex) + { +- struct dp_netdev_port *port, *next; ++ struct dp_netdev_port *port; + struct tx_bond *bond; + + shash_find_and_delete(&dp_netdevs, dp->name); + + ovs_rwlock_wrlock(&dp->port_rwlock); +- HMAP_FOR_EACH_SAFE (port, next, node, &dp->ports) { ++ HMAP_FOR_EACH_SAFE (port, node, &dp->ports) { + do_del_port(dp, port); + } + ovs_rwlock_unlock(&dp->port_rwlock); +@@ -3006,7 +3006,7 @@ static void + queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, + struct dp_netdev_flow *flow, struct match *match, + const struct nlattr *actions, size_t actions_len, +- odp_port_t orig_in_port, int op) ++ int op) + { + struct dp_offload_thread_item *item; + struct dp_offload_flow_item *flow_offload; +@@ -3021,7 +3021,7 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, + flow_offload->actions = xmalloc(actions_len); + memcpy(flow_offload->actions, actions, actions_len); + flow_offload->actions_len = actions_len; +- flow_offload->orig_in_port = orig_in_port; ++ flow_offload->orig_in_port = flow->orig_in_port; + + item->timestamp = pmd->ctx.now; + dp_netdev_offload_flow_enqueue(item); +@@ -4095,6 +4095,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, + flow->dead = false; + flow->batch = NULL; + flow->mark = INVALID_FLOW_MARK; ++ flow->orig_in_port = orig_in_port; + *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id; + *CONST_CAST(struct flow *, &flow->flow) = match->flow; + *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid; +@@ -4129,7 +4130,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, + } + + queue_netdev_flow_put(pmd, flow, match, actions, actions_len, +- orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD); ++ DP_NETDEV_FLOW_OFFLOAD_OP_ADD); + log_netdev_flow_change(flow, match, NULL, actions, actions_len); + + return flow; +@@ -4171,7 +4172,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, + ovsrcu_set(&netdev_flow->actions, new_actions); + + queue_netdev_flow_put(pmd, netdev_flow, match, +- put->actions, put->actions_len, ODPP_NONE, ++ put->actions, put->actions_len, + DP_NETDEV_FLOW_OFFLOAD_OP_MOD); + log_netdev_flow_change(netdev_flow, match, old_actions, + put->actions, put->actions_len); +@@ -5684,23 +5685,28 @@ sched_numa_list_put_in_place(struct sched_numa_list *numa_list) + } + } + ++/* Returns 'true' if OVS rxq scheduling algorithm assigned any unpinned rxq to ++ * a PMD thread core on a non-local numa node. */ + static bool + sched_numa_list_cross_numa_polling(struct sched_numa_list *numa_list) + { + struct sched_numa *numa; + +- /* For each numa */ + HMAP_FOR_EACH (numa, node, &numa_list->numas) { +- /* For each pmd */ + for (int i = 0; i < numa->n_pmds; i++) { + struct sched_pmd *sched_pmd; + + sched_pmd = &numa->pmds[i]; +- /* For each rxq. */ ++ if (sched_pmd->isolated) { ++ /* All rxqs on this PMD thread core are pinned. */ ++ continue; ++ } + for (unsigned k = 0; k < sched_pmd->n_rxq; k++) { + struct dp_netdev_rxq *rxq = sched_pmd->rxqs[k]; +- +- if (!sched_pmd->isolated && ++ /* Check if the rxq is not pinned to a specific PMD thread core ++ * by the user AND the PMD thread core that OVS assigned is ++ * non-local to the rxq port. */ ++ if (rxq->core_id == OVS_CORE_UNSPEC && + rxq->pmd->numa_id != + netdev_get_numa_id(rxq->port->netdev)) { + return true; +@@ -6000,10 +6006,10 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list, + /* Find any numa with available PMDs. */ + for (int j = 0; j < n_numa; j++) { + numa = sched_numa_list_next(numa_list, last_cross_numa); ++ last_cross_numa = numa; + if (sched_numa_noniso_pmd_count(numa)) { + break; + } +- last_cross_numa = numa; + numa = NULL; + } + } +@@ -6111,7 +6117,7 @@ sched_numa_list_variance(struct sched_numa_list *numa_list) + * pmd_rebalance_dry_run() can be avoided when it is not needed. + */ + static bool +-pmd_reblance_dry_run_needed(struct dp_netdev *dp) ++pmd_rebalance_dry_run_needed(struct dp_netdev *dp) + OVS_REQ_RDLOCK(dp->port_rwlock) + { + struct dp_netdev_pmd_thread *pmd; +@@ -6342,11 +6348,11 @@ pmd_remove_stale_ports(struct dp_netdev *dp, + OVS_EXCLUDED(pmd->port_mutex) + OVS_REQ_RDLOCK(dp->port_rwlock) + { +- struct rxq_poll *poll, *poll_next; +- struct tx_port *tx, *tx_next; ++ struct rxq_poll *poll; ++ struct tx_port *tx; + + ovs_mutex_lock(&pmd->port_mutex); +- HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) { ++ HMAP_FOR_EACH_SAFE (poll, node, &pmd->poll_list) { + struct dp_netdev_port *port = poll->rxq->port; + + if (port->need_reconfigure +@@ -6354,7 +6360,7 @@ pmd_remove_stale_ports(struct dp_netdev *dp, + dp_netdev_del_rxq_from_pmd(pmd, poll); + } + } +- HMAP_FOR_EACH_SAFE (tx, tx_next, node, &pmd->tx_ports) { ++ HMAP_FOR_EACH_SAFE (tx, node, &pmd->tx_ports) { + struct dp_netdev_port *port = tx->port; + + if (port->need_reconfigure +@@ -6430,8 +6436,7 @@ reconfigure_datapath(struct dp_netdev *dp) + /* We only reconfigure the ports that we determined above, because they're + * not being used by any pmd thread at the moment. If a port fails to + * reconfigure we remove it from the datapath. */ +- struct dp_netdev_port *next_port; +- HMAP_FOR_EACH_SAFE (port, next_port, node, &dp->ports) { ++ HMAP_FOR_EACH_SAFE (port, node, &dp->ports) { + int err; + + if (!port->need_reconfigure) { +@@ -6487,10 +6492,10 @@ reconfigure_datapath(struct dp_netdev *dp) + } + + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { +- struct rxq_poll *poll, *poll_next; ++ struct rxq_poll *poll; + + ovs_mutex_lock(&pmd->port_mutex); +- HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) { ++ HMAP_FOR_EACH_SAFE (poll, node, &pmd->poll_list) { + if (poll->rxq->pmd != pmd) { + dp_netdev_del_rxq_from_pmd(pmd, poll); + +@@ -6682,7 +6687,7 @@ dpif_netdev_run(struct dpif *dpif) + if (pmd_rebalance && + !dp_netdev_is_reconf_required(dp) && + !ports_require_restart(dp) && +- pmd_reblance_dry_run_needed(dp) && ++ pmd_rebalance_dry_run_needed(dp) && + pmd_rebalance_dry_run(dp)) { + VLOG_INFO("PMD auto load balance dry run. " + "Requesting datapath reconfigure."); +@@ -7364,15 +7369,15 @@ static struct dp_netdev_pmd_thread * + dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id) + { + struct dp_netdev_pmd_thread *pmd; +- const struct cmap_node *pnode; + +- pnode = cmap_find(&dp->poll_threads, hash_int(core_id, 0)); +- if (!pnode) { +- return NULL; ++ CMAP_FOR_EACH_WITH_HASH (pmd, node, hash_int(core_id, 0), ++ &dp->poll_threads) { ++ if (pmd->core_id == core_id) { ++ return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL; ++ } + } +- pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node); + +- return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL; ++ return NULL; + } + + /* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */ +diff --git a/lib/fat-rwlock.c b/lib/fat-rwlock.c +index d913b2088f..771ccc9737 100644 +--- a/lib/fat-rwlock.c ++++ b/lib/fat-rwlock.c +@@ -97,14 +97,14 @@ fat_rwlock_init(struct fat_rwlock *rwlock) + void + fat_rwlock_destroy(struct fat_rwlock *rwlock) + { +- struct fat_rwlock_slot *slot, *next; ++ struct fat_rwlock_slot *slot; + + /* Order is important here. By destroying the thread-specific data first, + * before we destroy the slots, we ensure that the thread-specific + * data destructor can't race with our loop below. */ + ovsthread_key_delete(rwlock->key); + +- LIST_FOR_EACH_SAFE (slot, next, list_node, &rwlock->threads) { ++ LIST_FOR_EACH_SAFE (slot, list_node, &rwlock->threads) { + free_slot(slot); + } + ovs_mutex_destroy(&rwlock->mutex); +diff --git a/lib/hindex.h b/lib/hindex.h +index 876c5a9e39..ea7402587e 100644 +--- a/lib/hindex.h ++++ b/lib/hindex.h +@@ -128,18 +128,38 @@ void hindex_remove(struct hindex *, struct hindex_node *); + * Evaluates HASH only once. + */ + #define HINDEX_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HINDEX) \ +- for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ +- NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ +- ASSIGN_CONTAINER(NODE, (NODE)->MEMBER.s, MEMBER)) ++ for (INIT_MULTIVAR(NODE, MEMBER, hindex_node_with_hash(HINDEX, HASH), \ ++ struct hindex_node); \ ++ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ ++ UPDATE_MULTIVAR(NODE, ITER_VAR(NODE)->s)) + + /* Safe when NODE may be freed (not needed when NODE may be removed from the + * hash map but its members remain accessible and intact). */ +-#define HINDEX_FOR_EACH_WITH_HASH_SAFE(NODE, NEXT, MEMBER, HASH, HINDEX) \ +- for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ +- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ +- ? INIT_CONTAINER(NEXT, (NODE)->MEMBER.s, MEMBER), 1 \ +- : 0); \ +- (NODE) = (NEXT)) ++#define HINDEX_FOR_EACH_WITH_HASH_SAFE_LONG(NODE, NEXT, MEMBER, HASH, HINDEX) \ ++ for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ ++ hindex_node_with_hash(HINDEX, HASH), \ ++ struct hindex_node); \ ++ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ ++ ITER_VAR(NODE) != NULL, \ ++ ITER_VAR(NEXT) = ITER_VAR(NODE)->s, \ ++ ITER_VAR(NEXT) != NULL); \ ++ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) ++ ++/* Short version of HINDEX_FOR_EACH_WITH_HASH_SAFE. */ ++#define HINDEX_FOR_EACH_WITH_HASH_SAFE_SHORT(NODE, MEMBER, HASH, HINDEX) \ ++ for (INIT_MULTIVAR_SAFE_SHORT(NODE, MEMBER, \ ++ hindex_node_with_hash(HINDEX, HASH), \ ++ struct hindex_node); \ ++ CONDITION_MULTIVAR_SAFE_SHORT(NODE, MEMBER, \ ++ ITER_VAR(NODE) != NULL, \ ++ ITER_NEXT_VAR(NODE) = ITER_VAR(NODE)->s); \ ++ UPDATE_MULTIVAR_SAFE_SHORT(NODE)) ++ ++#define HINDEX_FOR_EACH_WITH_HASH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(HINDEX_FOR_EACH_WITH_HASH_SAFE_LONG, \ ++ HINDEX_FOR_EACH_WITH_HASH_SAFE_SHORT, \ ++ 5, __VA_ARGS__) ++ + + /* Returns the head node in 'hindex' with the given 'hash', or a null pointer + * if no nodes have that hash value. */ +@@ -157,19 +177,36 @@ hindex_node_with_hash(const struct hindex *hindex, size_t hash) + /* Iteration. */ + + /* Iterates through every node in HINDEX. */ +-#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ +- for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ +- NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ +- ASSIGN_CONTAINER(NODE, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER)) ++#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ ++ for (INIT_MULTIVAR(NODE, MEMBER, hindex_first(HINDEX), \ ++ struct hindex_node); \ ++ CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ ++ UPDATE_MULTIVAR(NODE, hindex_next(HINDEX, ITER_VAR(NODE)))) + + /* Safe when NODE may be freed (not needed when NODE may be removed from the + * hash index but its members remain accessible and intact). */ +-#define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \ +- for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ +- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ +- ? INIT_CONTAINER(NEXT, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER), 1 \ +- : 0); \ +- (NODE) = (NEXT)) ++#define HINDEX_FOR_EACH_SAFE_LONG(NODE, NEXT, MEMBER, HINDEX) \ ++ for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, hindex_first(HINDEX), \ ++ struct hindex_node); \ ++ CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ ++ ITER_VAR(NODE) != NULL, \ ++ ITER_VAR(NEXT) = hindex_next(HINDEX, ITER_VAR(NODE)), \ ++ ITER_VAR(NEXT) != NULL); \ ++ UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) ++ ++/* Short version of HINDEX_FOR_EACH_SAFE. */ ++#define HINDEX_FOR_EACH_SAFE_SHORT(NODE, MEMBER, HINDEX) \ ++ for (INIT_MULTIVAR_SAFE_SHORT(NODE, MEMBER, hindex_first(HINDEX), \ ++ struct hindex_node); \ ++ CONDITION_MULTIVAR_SAFE_SHORT(NODE, MEMBER, \ ++ ITER_VAR(NODE) != NULL, \ ++ ITER_NEXT_VAR(NODE) = hindex_next(HINDEX, ITER_VAR(NODE))); \ ++ UPDATE_MULTIVAR_SAFE_SHORT(NODE)) ++ ++#define HINDEX_FOR_EACH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(HINDEX_FOR_EACH_SAFE_LONG, \ ++ HINDEX_FOR_EACH_SAFE_SHORT, \ ++ 4, __VA_ARGS__) + + struct hindex_node *hindex_first(const struct hindex *); + struct hindex_node *hindex_next(const struct hindex *, +diff --git a/lib/hmapx.c b/lib/hmapx.c +index eadfe640ac..68192fc2c5 100644 +--- a/lib/hmapx.c ++++ b/lib/hmapx.c +@@ -123,9 +123,9 @@ hmapx_add_assert(struct hmapx *map, void *data) + void + hmapx_clear(struct hmapx *map) + { +- struct hmapx_node *node, *next; ++ struct hmapx_node *node; + +- HMAPX_FOR_EACH_SAFE (node, next, map) { ++ HMAPX_FOR_EACH_SAFE (node, map) { + hmapx_delete(map, node); + } + } +diff --git a/lib/hmapx.h b/lib/hmapx.h +index 06a6bbe67d..0b03411491 100644 +--- a/lib/hmapx.h ++++ b/lib/hmapx.h +@@ -67,10 +67,20 @@ bool hmapx_equals(const struct hmapx *, const struct hmapx *); + + /* Safe when NODE may be freed (not needed when NODE may be removed from the + * hash map but its members remain accessible and intact). */ +-#define HMAPX_FOR_EACH_SAFE(NODE, NEXT, HMAPX) \ +- HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, hmap_node, &(HMAPX)->map, \ ++#define HMAPX_FOR_EACH_SAFE_SHORT(NODE, HMAPX) \ ++ HMAP_FOR_EACH_SAFE_SHORT_INIT (NODE, hmap_node, &(HMAPX)->map, \ ++ BUILD_ASSERT_TYPE(NODE, struct hmapx_node *), \ ++ BUILD_ASSERT_TYPE(HMAPX, struct hmapx *)) ++ ++#define HMAPX_FOR_EACH_SAFE_LONG(NODE, NEXT, HMAPX) \ ++ HMAP_FOR_EACH_SAFE_LONG_INIT (NODE, NEXT, hmap_node, &(HMAPX)->map, \ + BUILD_ASSERT_TYPE(NODE, struct hmapx_node *), \ + BUILD_ASSERT_TYPE(NEXT, struct hmapx_node *), \ + BUILD_ASSERT_TYPE(HMAPX, struct hmapx *)) + ++#define HMAPX_FOR_EACH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(HMAPX_FOR_EACH_SAFE_LONG, \ ++ HMAPX_FOR_EACH_SAFE_SHORT, \ ++ 3, __VA_ARGS__) ++ + #endif /* hmapx.h */ +diff --git a/lib/id-fpool.c b/lib/id-fpool.c +index 15cef5d003..7108c104a4 100644 +--- a/lib/id-fpool.c ++++ b/lib/id-fpool.c +@@ -166,11 +166,10 @@ void + id_fpool_destroy(struct id_fpool *pool) + { + struct id_slab *slab; +- struct id_slab *next; + size_t i; + + id_fpool_lock(&pool->pool_lock); +- LIST_FOR_EACH_SAFE (slab, next, node, &pool->free_slabs) { ++ LIST_FOR_EACH_SAFE (slab, node, &pool->free_slabs) { + free(slab); + } + ovs_list_poison(&pool->free_slabs); +diff --git a/lib/ipf.c b/lib/ipf.c +index 507db2aea2..d452663743 100644 +--- a/lib/ipf.c ++++ b/lib/ipf.c +@@ -1058,9 +1058,9 @@ ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb, + } + + ovs_mutex_lock(&ipf->ipf_lock); +- struct ipf_list *ipf_list, *next; ++ struct ipf_list *ipf_list; + +- LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_complete_list) { ++ LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_complete_list) { + if (ipf_send_frags_in_list(ipf, ipf_list, pb, IPF_FRAG_COMPLETED_LIST, + v6, now)) { + ipf_completed_list_clean(&ipf->frag_lists, ipf_list); +@@ -1090,10 +1090,10 @@ ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb, + } + + ovs_mutex_lock(&ipf->ipf_lock); +- struct ipf_list *ipf_list, *next; ++ struct ipf_list *ipf_list; + size_t lists_removed = 0; + +- LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_exp_list) { ++ LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_exp_list) { + if (now <= ipf_list->expiration || + lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) { + break; +@@ -1121,9 +1121,9 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb) + } + + ovs_mutex_lock(&ipf->ipf_lock); +- struct reassembled_pkt *rp, *next; ++ struct reassembled_pkt *rp; + +- LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &ipf->reassembled_pkt_list) { ++ LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) { + if (!rp->list->reass_execute_ctx && + ipf_dp_packet_batch_add(pb, rp->pkt, false)) { + rp->list->reass_execute_ctx = rp->pkt; +@@ -1144,9 +1144,9 @@ ipf_post_execute_reass_pkts(struct ipf *ipf, + } + + ovs_mutex_lock(&ipf->ipf_lock); +- struct reassembled_pkt *rp, *next; ++ struct reassembled_pkt *rp; + +- LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &ipf->reassembled_pkt_list) { ++ LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) { + const size_t pb_cnt = dp_packet_batch_size(pb); + int pb_idx; + struct dp_packet *pkt; +@@ -1271,15 +1271,15 @@ ipf_clean_thread_main(void *f) + + ovs_mutex_lock(&ipf->ipf_lock); + +- struct ipf_list *ipf_list, *next; +- LIST_FOR_EACH_SAFE (ipf_list, next, list_node, ++ struct ipf_list *ipf_list; ++ LIST_FOR_EACH_SAFE (ipf_list, list_node, + &ipf->frag_exp_list) { + if (ipf_purge_list_check(ipf, ipf_list, now)) { + ipf_expiry_list_clean(&ipf->frag_lists, ipf_list); + } + } + +- LIST_FOR_EACH_SAFE (ipf_list, next, list_node, ++ LIST_FOR_EACH_SAFE (ipf_list, list_node, + &ipf->frag_complete_list) { + if (ipf_purge_list_check(ipf, ipf_list, now)) { + ipf_completed_list_clean(&ipf->frag_lists, ipf_list); +diff --git a/lib/json.c b/lib/json.c +index 720c73d940..042aab83b3 100644 +--- a/lib/json.c ++++ b/lib/json.c +@@ -397,9 +397,9 @@ json_destroy__(struct json *json) + static void + json_destroy_object(struct shash *object) + { +- struct shash_node *node, *next; ++ struct shash_node *node; + +- SHASH_FOR_EACH_SAFE (node, next, object) { ++ SHASH_FOR_EACH_SAFE (node, object) { + struct json *value = node->data; + + json_destroy(value); +diff --git a/lib/lacp.c b/lib/lacp.c +index 89d711225f..3252f17ebf 100644 +--- a/lib/lacp.c ++++ b/lib/lacp.c +@@ -280,10 +280,10 @@ void + lacp_unref(struct lacp *lacp) OVS_EXCLUDED(mutex) + { + if (lacp && ovs_refcount_unref_relaxed(&lacp->ref_cnt) == 1) { +- struct member *member, *next; ++ struct member *member; + + lacp_lock(); +- HMAP_FOR_EACH_SAFE (member, next, node, &lacp->members) { ++ HMAP_FOR_EACH_SAFE (member, node, &lacp->members) { + member_destroy(member); + } + +diff --git a/lib/lldp/lldpd-structs.c b/lib/lldp/lldpd-structs.c +index 499b441746..a8c7fad098 100644 +--- a/lib/lldp/lldpd-structs.c ++++ b/lib/lldp/lldpd-structs.c +@@ -64,11 +64,11 @@ lldpd_remote_cleanup(struct lldpd_hardware *hw, + struct lldpd_port *), + bool all) + { +- struct lldpd_port *port, *port_next; ++ struct lldpd_port *port; + time_t now = time_now(); + + VLOG_DBG("cleanup remote port on %s", hw->h_ifname); +- LIST_FOR_EACH_SAFE (port, port_next, p_entries, &hw->h_rports) { ++ LIST_FOR_EACH_SAFE (port, p_entries, &hw->h_rports) { + bool del = all; + if (!all && expire && + (now >= port->p_lastupdate + port->p_chassis->c_ttl)) { +@@ -99,11 +99,10 @@ static void + lldpd_aa_maps_cleanup(struct lldpd_port *port) + { + struct lldpd_aa_isid_vlan_maps_tlv *isid_vlan_map = NULL; +- struct lldpd_aa_isid_vlan_maps_tlv *isid_vlan_map_next = NULL; + + if (!ovs_list_is_empty(&port->p_isid_vlan_maps)) { + +- LIST_FOR_EACH_SAFE (isid_vlan_map, isid_vlan_map_next, m_entries, ++ LIST_FOR_EACH_SAFE (isid_vlan_map, m_entries, + &port->p_isid_vlan_maps) { + + ovs_list_remove(&isid_vlan_map->m_entries); +diff --git a/lib/lldp/lldpd.c b/lib/lldp/lldpd.c +index a024dc5e58..403f1f525a 100644 +--- a/lib/lldp/lldpd.c ++++ b/lib/lldp/lldpd.c +@@ -134,12 +134,12 @@ lldpd_hardware_cleanup(struct lldpd *cfg, struct lldpd_hardware *hardware) + void + lldpd_cleanup(struct lldpd *cfg) + { +- struct lldpd_hardware *hw, *hw_next; +- struct lldpd_chassis *chassis, *chassis_next; ++ struct lldpd_hardware *hw; ++ struct lldpd_chassis *chassis; + + VLOG_DBG("cleanup all ports"); + +- LIST_FOR_EACH_SAFE (hw, hw_next, h_entries, &cfg->g_hardware) { ++ LIST_FOR_EACH_SAFE (hw, h_entries, &cfg->g_hardware) { + if (!hw->h_flags) { + ovs_list_remove(&hw->h_entries); + lldpd_remote_cleanup(hw, NULL, true); +@@ -151,7 +151,7 @@ lldpd_cleanup(struct lldpd *cfg) + + VLOG_DBG("cleanup all chassis"); + +- LIST_FOR_EACH_SAFE (chassis, chassis_next, list, &cfg->g_chassis) { ++ LIST_FOR_EACH_SAFE (chassis, list, &cfg->g_chassis) { + if (chassis->c_refcount == 0) { + ovs_list_remove(&chassis->list); + lldpd_chassis_cleanup(chassis, 1); +diff --git a/lib/mac-learning.c b/lib/mac-learning.c +index 3fcd7d9b77..a60794fb26 100644 +--- a/lib/mac-learning.c ++++ b/lib/mac-learning.c +@@ -244,10 +244,10 @@ void + mac_learning_unref(struct mac_learning *ml) + { + if (ml && ovs_refcount_unref(&ml->ref_cnt) == 1) { +- struct mac_entry *e, *next; ++ struct mac_entry *e; + + ovs_rwlock_wrlock(&ml->rwlock); +- HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) { ++ HMAP_FOR_EACH_SAFE (e, hmap_node, &ml->table) { + mac_learning_expire(ml, e); + } + hmap_destroy(&ml->table); +diff --git a/lib/mcast-snooping.c b/lib/mcast-snooping.c +index 6730301b67..029ca28558 100644 +--- a/lib/mcast-snooping.c ++++ b/lib/mcast-snooping.c +@@ -356,11 +356,11 @@ mcast_snooping_prune_expired(struct mcast_snooping *ms, + OVS_REQ_WRLOCK(ms->rwlock) + { + int expired; +- struct mcast_group_bundle *b, *next_b; ++ struct mcast_group_bundle *b; + time_t timenow = time_now(); + + expired = 0; +- LIST_FOR_EACH_SAFE (b, next_b, bundle_node, &grp->bundle_lru) { ++ LIST_FOR_EACH_SAFE (b, bundle_node, &grp->bundle_lru) { + /* This list is sorted on expiration time. */ + if (b->expires > timenow) { + break; +@@ -946,15 +946,15 @@ mcast_snooping_wait(struct mcast_snooping *ms) + void + mcast_snooping_flush_bundle(struct mcast_snooping *ms, void *port) + { +- struct mcast_group *g, *next_g; +- struct mcast_mrouter_bundle *m, *next_m; ++ struct mcast_group *g; ++ struct mcast_mrouter_bundle *m; + + if (!mcast_snooping_enabled(ms)) { + return; + } + + ovs_rwlock_wrlock(&ms->rwlock); +- LIST_FOR_EACH_SAFE (g, next_g, group_node, &ms->group_lru) { ++ LIST_FOR_EACH_SAFE (g, group_node, &ms->group_lru) { + if (mcast_group_delete_bundle(ms, g, port)) { + ms->need_revalidate = true; + +@@ -964,7 +964,7 @@ mcast_snooping_flush_bundle(struct mcast_snooping *ms, void *port) + } + } + +- LIST_FOR_EACH_SAFE (m, next_m, mrouter_node, &ms->mrouter_lru) { ++ LIST_FOR_EACH_SAFE (m, mrouter_node, &ms->mrouter_lru) { + if (m->port == port) { + mcast_snooping_flush_mrouter(m); + ms->need_revalidate = true; +diff --git a/lib/namemap.c b/lib/namemap.c +index 785cda4c27..dd317ea52e 100644 +--- a/lib/namemap.c ++++ b/lib/namemap.c +@@ -90,9 +90,9 @@ void + namemap_destroy(struct namemap *map) + { + if (map) { +- struct namemap_node *node, *next; ++ struct namemap_node *node; + +- HMAP_FOR_EACH_SAFE (node, next, name_node, &map->by_name) { ++ HMAP_FOR_EACH_SAFE (node, name_node, &map->by_name) { + hmap_remove(&map->by_name, &node->name_node); + hmap_remove(&map->by_number, &node->number_node); + free(node->name); +diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c +index 482400d8d1..ca3f2431ea 100644 +--- a/lib/netdev-afxdp.c ++++ b/lib/netdev-afxdp.c +@@ -235,11 +235,11 @@ netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool) + static void + netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED) + { +- struct unused_pool *pool, *next; ++ struct unused_pool *pool; + unsigned int count; + + ovs_mutex_lock(&unused_pools_mutex); +- LIST_FOR_EACH_SAFE (pool, next, list_node, &unused_pools) { ++ LIST_FOR_EACH_SAFE (pool, list_node, &unused_pools) { + + count = umem_pool_count(&pool->umem_info->mpool); + ovs_assert(count + pool->lost_in_rings <= NUM_FRAMES); +diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c +index b6b29c75e3..4d32781a95 100644 +--- a/lib/netdev-dpdk.c ++++ b/lib/netdev-dpdk.c +@@ -622,9 +622,9 @@ dpdk_mp_full(const struct rte_mempool *mp) OVS_REQUIRES(dpdk_mp_mutex) + static void + dpdk_mp_sweep(void) OVS_REQUIRES(dpdk_mp_mutex) + { +- struct dpdk_mp *dmp, *next; ++ struct dpdk_mp *dmp; + +- LIST_FOR_EACH_SAFE (dmp, next, list_node, &dpdk_mp_list) { ++ LIST_FOR_EACH_SAFE (dmp, list_node, &dpdk_mp_list) { + if (!dmp->refcount && dpdk_mp_full(dmp->mp)) { + VLOG_DBG("Freeing mempool \"%s\"", dmp->mp->name); + ovs_list_remove(&dmp->list_node); +@@ -4686,11 +4686,11 @@ trtcm_policer_qos_construct(const struct smap *details, + static void + trtcm_policer_qos_destruct(struct qos_conf *conf) + { +- struct trtcm_policer_queue *queue, *next_queue; ++ struct trtcm_policer_queue *queue; + struct trtcm_policer *policer = CONTAINER_OF(conf, struct trtcm_policer, + qos_conf); + +- HMAP_FOR_EACH_SAFE (queue, next_queue, hmap_node, &policer->queues) { ++ HMAP_FOR_EACH_SAFE (queue, hmap_node, &policer->queues) { + hmap_remove(&policer->queues, &queue->hmap_node); + free(queue); + } +diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c +index 620a451dec..9d125029de 100644 +--- a/lib/netdev-linux.c ++++ b/lib/netdev-linux.c +@@ -5331,11 +5331,11 @@ static void + hfsc_tc_destroy(struct tc *tc) + { + struct hfsc *hfsc; +- struct hfsc_class *hc, *next; ++ struct hfsc_class *hc; + + hfsc = CONTAINER_OF(tc, struct hfsc, tc); + +- HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, &hfsc->tc.queues) { ++ HMAP_FOR_EACH_SAFE (hc, tc_queue.hmap_node, &hfsc->tc.queues) { + hmap_remove(&hfsc->tc.queues, &hc->tc_queue.hmap_node); + free(hc); + } +diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c +index 9845e8d3fe..a41b627585 100644 +--- a/lib/netdev-offload-tc.c ++++ b/lib/netdev-offload-tc.c +@@ -417,11 +417,11 @@ delete_chains_from_netdev(struct netdev *netdev, struct tcf_id *id) + static int + netdev_tc_flow_flush(struct netdev *netdev) + { +- struct ufid_tc_data *data, *next; ++ struct ufid_tc_data *data; + int err; + + ovs_mutex_lock(&ufid_lock); +- HMAP_FOR_EACH_SAFE (data, next, tc_to_ufid_node, &tc_to_ufid) { ++ HMAP_FOR_EACH_SAFE (data, tc_to_ufid_node, &tc_to_ufid) { + if (data->netdev != netdev) { + continue; + } +@@ -481,10 +481,10 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump) + + static void + parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf, +- struct tc_flower *flower) ++ struct tc_action *action) + { +- char *mask = (char *) &flower->rewrite.mask; +- char *data = (char *) &flower->rewrite.key; ++ char *mask = (char *) &action->rewrite.mask; ++ char *data = (char *) &action->rewrite.key; + + for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) { + char *put = NULL; +@@ -585,8 +585,10 @@ parse_tc_flower_to_stats(struct tc_flower *flower, + } + + memset(stats, 0, sizeof *stats); +- stats->n_packets = get_32aligned_u64(&flower->stats.n_packets); +- stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes); ++ stats->n_packets = get_32aligned_u64(&flower->stats_sw.n_packets); ++ stats->n_packets += get_32aligned_u64(&flower->stats_hw.n_packets); ++ stats->n_bytes = get_32aligned_u64(&flower->stats_sw.n_bytes); ++ stats->n_bytes += get_32aligned_u64(&flower->stats_hw.n_bytes); + stats->used = flower->lastused; + } + +@@ -877,7 +879,7 @@ parse_tc_flower_to_match(struct tc_flower *flower, + } + break; + case TC_ACT_PEDIT: { +- parse_flower_rewrite_to_netlink_action(buf, flower); ++ parse_flower_rewrite_to_netlink_action(buf, action); + } + break; + case TC_ACT_ENCAP: { +@@ -1222,8 +1224,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, + uint64_t set_stub[1024 / 8]; + struct ofpbuf set_buf = OFPBUF_STUB_INITIALIZER(set_stub); + char *set_data, *set_mask; +- char *key = (char *) &flower->rewrite.key; +- char *mask = (char *) &flower->rewrite.mask; ++ char *key = (char *) &action->rewrite.key; ++ char *mask = (char *) &action->rewrite.mask; + const struct nlattr *attr; + int i, j, type; + size_t size; +@@ -1265,14 +1267,6 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, + } + } + +- if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) { +- if (flower->rewrite.rewrite == false) { +- flower->rewrite.rewrite = true; +- action->type = TC_ACT_PEDIT; +- flower->action_count++; +- } +- } +- + if (hasmask && !is_all_zeros(set_mask, size)) { + VLOG_DBG_RL(&rl, "unsupported sub attribute of set action type %d", + type); +@@ -1281,6 +1275,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, + } + + ofpbuf_uninit(&set_buf); ++ action->type = TC_ACT_PEDIT; ++ flower->action_count++; + return 0; + } + +@@ -1541,6 +1537,12 @@ parse_match_ct_state_to_flower(struct tc_flower *flower, struct match *match) + flower->key.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); + flower->mask.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); + } ++ ++ if (flower->key.ct_state && ++ !(flower->key.ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { ++ flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; ++ flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; ++ } + } + + if (mask->ct_zone) { +@@ -1841,7 +1843,25 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, + VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port); + return ENODEV; + } ++ ++ if (!netdev_flow_api_equals(netdev, outdev)) { ++ VLOG_DBG_RL(&rl, ++ "Flow API provider mismatch between ingress (%s) " ++ "and egress (%s) ports", ++ netdev_get_name(netdev), netdev_get_name(outdev)); ++ netdev_close(outdev); ++ return EOPNOTSUPP; ++ } ++ + action->out.ifindex_out = netdev_get_ifindex(outdev); ++ if (action->out.ifindex_out < 0) { ++ VLOG_DBG_RL(&rl, ++ "Can't find ifindex for output port %s, error %d", ++ netdev_get_name(outdev), action->out.ifindex_out); ++ netdev_close(outdev); ++ return -action->out.ifindex_out; ++ } ++ + action->out.ingress = is_internal_port(netdev_get_type(outdev)); + action->type = TC_ACT_OUTPUT; + flower.action_count++; +@@ -2015,9 +2035,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, + if (stats) { + memset(stats, 0, sizeof *stats); + if (!tc_get_flower(&id, &flower)) { +- stats->n_packets = get_32aligned_u64(&flower.stats.n_packets); +- stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes); +- stats->used = flower.lastused; ++ parse_tc_flower_to_stats(&flower, stats); + } + } + +diff --git a/lib/odp-util.c b/lib/odp-util.c +index 9a705cffa3..3ae850b669 100644 +--- a/lib/odp-util.c ++++ b/lib/odp-util.c +@@ -4630,6 +4630,11 @@ odp_flow_format(const struct nlattr *key, size_t key_len, + ds_put_char(ds, ','); + } + ds_put_cstr(ds, "eth()"); ++ } else if (attr_type == OVS_KEY_ATTR_PACKET_TYPE && is_wildcard) { ++ /* See the above help text, however in the case where the ++ * packet type is not shown, we still need to display the ++ * eth() header if the packets type is wildcarded. */ ++ has_packet_type_key = false; + } + ofpbuf_clear(&ofp); + } +diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c +index 006837c2e1..a0b70a89d7 100644 +--- a/lib/ofp-actions.c ++++ b/lib/ofp-actions.c +@@ -853,7 +853,9 @@ decode_NXAST_RAW_CONTROLLER2(const struct ext_action_header *eah, + case NXAC2PT_REASON: { + uint8_t u8; + error = ofpprop_parse_u8(&payload, &u8); +- oc->reason = u8; ++ if (!error) { ++ oc->reason = u8; ++ } + break; + } + +diff --git a/lib/ofp-msgs.c b/lib/ofp-msgs.c +index fec54f75f8..93aa812978 100644 +--- a/lib/ofp-msgs.c ++++ b/lib/ofp-msgs.c +@@ -1123,8 +1123,8 @@ ofpmp_partial_error(struct hmap *assembler, struct ofpmp_partial *p, + void + ofpmp_assembler_clear(struct hmap *assembler) + { +- struct ofpmp_partial *p, *next; +- HMAP_FOR_EACH_SAFE (p, next, hmap_node, assembler) { ++ struct ofpmp_partial *p; ++ HMAP_FOR_EACH_SAFE (p, hmap_node, assembler) { + ofpmp_partial_destroy(assembler, p); + } + } +@@ -1290,8 +1290,8 @@ ofpmp_assembler_execute(struct hmap *assembler, struct ofpbuf *msg, + * on either side by parts with 0-byte bodies. We remove the 0-byte + * ones here to simplify processing later. + */ +- struct ofpbuf *b, *next; +- LIST_FOR_EACH_SAFE (b, next, list_node, out) { ++ struct ofpbuf *b; ++ LIST_FOR_EACH_SAFE (b, list_node, out) { + if (b->size <= min_len && !ovs_list_is_short(out)) { + ovs_list_remove(&b->list_node); + ofpbuf_delete(b); +diff --git a/lib/ofp-packet.c b/lib/ofp-packet.c +index 4579548ee1..9485ddfc93 100644 +--- a/lib/ofp-packet.c ++++ b/lib/ofp-packet.c +@@ -133,7 +133,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose, + case NXPINT_FULL_LEN: { + uint32_t u32; + error = ofpprop_parse_u32(&payload, &u32); +- *total_len = u32; ++ if (!error) { ++ *total_len = u32; ++ } + break; + } + +@@ -152,7 +154,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose, + case NXPINT_REASON: { + uint8_t reason; + error = ofpprop_parse_u8(&payload, &reason); +- pin->reason = reason; ++ if (!error) { ++ pin->reason = reason; ++ } + break; + } + +@@ -883,7 +887,9 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose, + case NXCPT_ODP_PORT: { + uint32_t value; + error = ofpprop_parse_u32(&payload, &value); +- pin->odp_port = u32_to_odp(value); ++ if (!error) { ++ pin->odp_port = u32_to_odp(value); ++ } + break; + } + +diff --git a/lib/ovs-lldp.c b/lib/ovs-lldp.c +index 162311fa45..a9d205ec83 100644 +--- a/lib/ovs-lldp.c ++++ b/lib/ovs-lldp.c +@@ -559,9 +559,9 @@ aa_mapping_unregister_mapping(struct lldp *lldp, + struct lldpd_hardware *hw, + struct aa_mapping_internal *m) + { +- struct lldpd_aa_isid_vlan_maps_tlv *lm, *lm_next; ++ struct lldpd_aa_isid_vlan_maps_tlv *lm; + +- LIST_FOR_EACH_SAFE (lm, lm_next, m_entries, ++ LIST_FOR_EACH_SAFE (lm, m_entries, + &hw->h_lport.p_isid_vlan_maps) { + uint32_t isid = lm->isid_vlan_data.isid; + +@@ -953,8 +953,8 @@ lldp_ref(const struct lldp *lldp_) + void + lldp_destroy_dummy(struct lldp *lldp) + { +- struct lldpd_hardware *hw, *hw_next; +- struct lldpd_chassis *chassis, *chassis_next; ++ struct lldpd_hardware *hw; ++ struct lldpd_chassis *chassis; + struct lldpd *cfg; + + if (!lldp) { +@@ -963,13 +963,13 @@ lldp_destroy_dummy(struct lldp *lldp) + + cfg = lldp->lldpd; + +- LIST_FOR_EACH_SAFE (hw, hw_next, h_entries, &cfg->g_hardware) { ++ LIST_FOR_EACH_SAFE (hw, h_entries, &cfg->g_hardware) { + ovs_list_remove(&hw->h_entries); + free(hw->h_lport.p_lastframe); + free(hw); + } + +- LIST_FOR_EACH_SAFE (chassis, chassis_next, list, &cfg->g_chassis) { ++ LIST_FOR_EACH_SAFE (chassis, list, &cfg->g_chassis) { + ovs_list_remove(&chassis->list); + free(chassis); + } +diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h +index ecc251a7ff..83bd10cca5 100644 +--- a/lib/ovs-numa.h ++++ b/lib/ovs-numa.h +@@ -68,9 +68,9 @@ void ovs_numa_dump_destroy(struct ovs_numa_dump *); + int ovs_numa_thread_setaffinity_core(unsigned core_id); + + #define FOR_EACH_CORE_ON_DUMP(ITER, DUMP) \ +- HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->cores) ++ HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->cores) + + #define FOR_EACH_NUMA_ON_DUMP(ITER, DUMP) \ +- HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->numas) ++ HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->numas) + + #endif /* ovs-numa.h */ +diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c +index dead31275d..9713c7dc7c 100644 +--- a/lib/ovsdb-cs.c ++++ b/lib/ovsdb-cs.c +@@ -900,8 +900,8 @@ ovsdb_cs_db_get_table(struct ovsdb_cs_db *db, const char *table) + static void + ovsdb_cs_db_destroy_tables(struct ovsdb_cs_db *db) + { +- struct ovsdb_cs_db_table *table, *next; +- HMAP_FOR_EACH_SAFE (table, next, hmap_node, &db->tables) { ++ struct ovsdb_cs_db_table *table; ++ HMAP_FOR_EACH_SAFE (table, hmap_node, &db->tables) { + json_destroy(table->ack_cond); + json_destroy(table->req_cond); + json_destroy(table->new_cond); +@@ -1793,8 +1793,8 @@ ovsdb_cs_update_server_row(struct server_row *row, + static void + ovsdb_cs_clear_server_rows(struct ovsdb_cs *cs) + { +- struct server_row *row, *next; +- HMAP_FOR_EACH_SAFE (row, next, hmap_node, &cs->server_rows) { ++ struct server_row *row; ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &cs->server_rows) { + ovsdb_cs_delete_server_row(cs, row); + } + } +@@ -2128,9 +2128,9 @@ void + ovsdb_cs_free_schema(struct shash *schema) + { + if (schema) { +- struct shash_node *node, *next; ++ struct shash_node *node; + +- SHASH_FOR_EACH_SAFE (node, next, schema) { ++ SHASH_FOR_EACH_SAFE (node, schema) { + struct sset *sset = node->data; + sset_destroy(sset); + free(sset); +diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c +index c19128d55c..882ede7559 100644 +--- a/lib/ovsdb-idl.c ++++ b/lib/ovsdb-idl.c +@@ -389,25 +389,25 @@ ovsdb_idl_clear(struct ovsdb_idl *db) + */ + for (size_t i = 0; i < db->class_->n_tables; i++) { + struct ovsdb_idl_table *table = &db->tables[i]; +- struct ovsdb_idl_row *row, *next_row; ++ struct ovsdb_idl_row *row; + + if (hmap_is_empty(&table->rows)) { + continue; + } + +- HMAP_FOR_EACH_SAFE (row, next_row, hmap_node, &table->rows) { +- struct ovsdb_idl_arc *arc, *next_arc; ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { ++ struct ovsdb_idl_arc *arc; + + if (!ovsdb_idl_row_is_orphan(row)) { + ovsdb_idl_remove_from_indexes(row); + ovsdb_idl_row_unparse(row); + } +- LIST_FOR_EACH_SAFE (arc, next_arc, src_node, &row->src_arcs) { ++ LIST_FOR_EACH_SAFE (arc, src_node, &row->src_arcs) { + ovs_list_remove(&arc->src_node); + ovs_list_remove(&arc->dst_node); + free(arc); + } +- LIST_FOR_EACH_SAFE (arc, next_arc, dst_node, &row->dst_arcs) { ++ LIST_FOR_EACH_SAFE (arc, dst_node, &row->dst_arcs) { + ovs_list_remove(&arc->src_node); + ovs_list_remove(&arc->dst_node); + free(arc); +@@ -1041,8 +1041,8 @@ ovsdb_idl_condition_destroy(struct ovsdb_idl_condition *cond) + void + ovsdb_idl_condition_clear(struct ovsdb_idl_condition *cond) + { +- struct ovsdb_idl_clause *clause, *next; +- HMAP_FOR_EACH_SAFE (clause, next, hmap_node, &cond->clauses) { ++ struct ovsdb_idl_clause *clause; ++ HMAP_FOR_EACH_SAFE (clause, hmap_node, &cond->clauses) { + hmap_remove(&cond->clauses, &clause->hmap_node); + ovsdb_idl_clause_destroy(clause); + } +@@ -1345,9 +1345,9 @@ ovsdb_idl_track_clear__(struct ovsdb_idl *idl, bool flush_all) + struct ovsdb_idl_table *table = &idl->tables[i]; + + if (!ovs_list_is_empty(&table->track_list)) { +- struct ovsdb_idl_row *row, *next; ++ struct ovsdb_idl_row *row; + +- LIST_FOR_EACH_SAFE(row, next, track_node, &table->track_list) { ++ LIST_FOR_EACH_SAFE (row, track_node, &table->track_list) { + if (row->updated) { + free(row->updated); + row->updated = NULL; +@@ -1480,9 +1480,9 @@ ovsdb_idl_parse_update(struct ovsdb_idl *idl, + static void + ovsdb_idl_reparse_deleted(struct ovsdb_idl *db) + { +- struct ovsdb_idl_row *row, *next; ++ struct ovsdb_idl_row *row; + +- LIST_FOR_EACH_SAFE (row, next, track_node, &db->deleted_untracked_rows) { ++ LIST_FOR_EACH_SAFE (row, track_node, &db->deleted_untracked_rows) { + ovsdb_idl_row_untrack_change(row); + add_tracked_change_for_references(row); + ovsdb_idl_row_reparse_backrefs(row); +@@ -1906,8 +1906,8 @@ ovsdb_idl_index_create2(struct ovsdb_idl *idl, + static void + ovsdb_idl_destroy_indexes(struct ovsdb_idl_table *table) + { +- struct ovsdb_idl_index *index, *next; +- LIST_FOR_EACH_SAFE (index, next, node, &table->indexes) { ++ struct ovsdb_idl_index *index; ++ LIST_FOR_EACH_SAFE (index, node, &table->indexes) { + skiplist_destroy(index->skiplist, NULL); + free(index->columns); + free(index); +@@ -2145,12 +2145,12 @@ ovsdb_idl_row_clear_new(struct ovsdb_idl_row *row) + static void + ovsdb_idl_row_clear_arcs(struct ovsdb_idl_row *row, bool destroy_dsts) + { +- struct ovsdb_idl_arc *arc, *next; ++ struct ovsdb_idl_arc *arc; + + /* Delete all forward arcs. If 'destroy_dsts', destroy any orphaned rows + * that this causes to be unreferenced. + */ +- LIST_FOR_EACH_SAFE (arc, next, src_node, &row->src_arcs) { ++ LIST_FOR_EACH_SAFE (arc, src_node, &row->src_arcs) { + ovs_list_remove(&arc->dst_node); + if (destroy_dsts + && ovsdb_idl_row_is_orphan(arc->dst) +@@ -2166,7 +2166,7 @@ ovsdb_idl_row_clear_arcs(struct ovsdb_idl_row *row, bool destroy_dsts) + static void + ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *row) + { +- struct ovsdb_idl_arc *arc, *next; ++ struct ovsdb_idl_arc *arc; + + /* This is trickier than it looks. ovsdb_idl_row_clear_arcs() will destroy + * 'arc', so we need to use the "safe" variant of list traversal. However, +@@ -2178,7 +2178,7 @@ ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *row) + * (If duplicate arcs were possible then we would need to make sure that + * 'next' didn't also point into 'arc''s destination, but we forbid + * duplicate arcs.) */ +- LIST_FOR_EACH_SAFE (arc, next, dst_node, &row->dst_arcs) { ++ LIST_FOR_EACH_SAFE (arc, dst_node, &row->dst_arcs) { + struct ovsdb_idl_row *ref = arc->src; + + ovsdb_idl_row_unparse(ref); +@@ -2329,9 +2329,9 @@ ovsdb_idl_row_destroy_postprocess(struct ovsdb_idl *idl) + struct ovsdb_idl_table *table = &idl->tables[i]; + + if (!ovs_list_is_empty(&table->track_list)) { +- struct ovsdb_idl_row *row, *next; ++ struct ovsdb_idl_row *row; + +- LIST_FOR_EACH_SAFE(row, next, track_node, &table->track_list) { ++ LIST_FOR_EACH_SAFE (row, track_node, &table->track_list) { + if (!ovsdb_idl_track_is_set(row->table)) { + ovs_list_remove(&row->track_node); + ovsdb_idl_row_unparse(row); +@@ -2729,7 +2729,7 @@ ovsdb_idl_txn_increment(struct ovsdb_idl_txn *txn, + void + ovsdb_idl_txn_destroy(struct ovsdb_idl_txn *txn) + { +- struct ovsdb_idl_txn_insert *insert, *next; ++ struct ovsdb_idl_txn_insert *insert; + + if (txn->status == TXN_INCOMPLETE) { + ovsdb_cs_forget_transaction(txn->idl->cs, txn->request_id); +@@ -2739,7 +2739,7 @@ ovsdb_idl_txn_destroy(struct ovsdb_idl_txn *txn) + ovsdb_idl_txn_abort(txn); + ds_destroy(&txn->comment); + free(txn->error); +- HMAP_FOR_EACH_SAFE (insert, next, hmap_node, &txn->inserted_rows) { ++ HMAP_FOR_EACH_SAFE (insert, hmap_node, &txn->inserted_rows) { + free(insert); + } + hmap_destroy(&txn->inserted_rows); +@@ -2824,7 +2824,7 @@ substitute_uuids(struct json *json, const struct ovsdb_idl_txn *txn) + static void + ovsdb_idl_txn_disassemble(struct ovsdb_idl_txn *txn) + { +- struct ovsdb_idl_row *row, *next; ++ struct ovsdb_idl_row *row; + + /* This must happen early. Otherwise, ovsdb_idl_row_parse() will call an + * ovsdb_idl_column's 'parse' function, which will call +@@ -2832,7 +2832,7 @@ ovsdb_idl_txn_disassemble(struct ovsdb_idl_txn *txn) + * transaction and fail to update the graph. */ + txn->idl->txn = NULL; + +- HMAP_FOR_EACH_SAFE (row, next, txn_node, &txn->txn_rows) { ++ HMAP_FOR_EACH_SAFE (row, txn_node, &txn->txn_rows) { + enum { INSERTED, MODIFIED, DELETED } op + = (!row->new_datum ? DELETED + : !row->old_datum ? INSERTED +diff --git a/lib/ovsdb-map-op.c b/lib/ovsdb-map-op.c +index 7b90ba84f9..795066e8ef 100644 +--- a/lib/ovsdb-map-op.c ++++ b/lib/ovsdb-map-op.c +@@ -91,8 +91,8 @@ map_op_list_create(void) + void + map_op_list_destroy(struct map_op_list *list, const struct ovsdb_type *type) + { +- struct map_op *map_op, *next; +- HMAP_FOR_EACH_SAFE (map_op, next, node, &list->hmap) { ++ struct map_op *map_op; ++ HMAP_FOR_EACH_SAFE (map_op, node, &list->hmap) { + map_op_destroy(map_op, type); + } + hmap_destroy(&list->hmap); +diff --git a/lib/ovsdb-set-op.c b/lib/ovsdb-set-op.c +index 62c4621181..321043282e 100644 +--- a/lib/ovsdb-set-op.c ++++ b/lib/ovsdb-set-op.c +@@ -90,8 +90,8 @@ set_op_list_create(void) + void + set_op_list_destroy(struct set_op_list *list, const struct ovsdb_type *type) + { +- struct set_op *set_op, *next; +- HMAP_FOR_EACH_SAFE (set_op, next, node, &list->hmap) { ++ struct set_op *set_op; ++ HMAP_FOR_EACH_SAFE (set_op, node, &list->hmap) { + set_op_destroy(set_op, type); + } + hmap_destroy(&list->hmap); +diff --git a/lib/pcap-file.c b/lib/pcap-file.c +index 41835f6f4d..3ed7ea4880 100644 +--- a/lib/pcap-file.c ++++ b/lib/pcap-file.c +@@ -344,9 +344,9 @@ tcp_reader_open(void) + void + tcp_reader_close(struct tcp_reader *r) + { +- struct tcp_stream *stream, *next_stream; ++ struct tcp_stream *stream; + +- HMAP_FOR_EACH_SAFE (stream, next_stream, hmap_node, &r->streams) { ++ HMAP_FOR_EACH_SAFE (stream, hmap_node, &r->streams) { + tcp_stream_destroy(r, stream); + } + hmap_destroy(&r->streams); +diff --git a/lib/perf-counter.c b/lib/perf-counter.c +index e4eca58d03..6952fcb594 100644 +--- a/lib/perf-counter.c ++++ b/lib/perf-counter.c +@@ -178,14 +178,14 @@ perf_counters_clear(void) + void + perf_counters_destroy(void) + { +- struct shash_node *node, *next; ++ struct shash_node *node; + + if (fd__ != -1) { + ioctl(fd__, PERF_EVENT_IOC_DISABLE, 0); + close(fd__); + } + +- SHASH_FOR_EACH_SAFE (node, next, &perf_counters) { ++ SHASH_FOR_EACH_SAFE (node, &perf_counters) { + shash_delete(&perf_counters, node); + } + +diff --git a/lib/poll-loop.c b/lib/poll-loop.c +index 4e751ff2c7..70fabeb8a3 100644 +--- a/lib/poll-loop.c ++++ b/lib/poll-loop.c +@@ -298,9 +298,9 @@ log_wakeup(const char *where, const struct pollfd *pollfd, int timeout) + static void + free_poll_nodes(struct poll_loop *loop) + { +- struct poll_node *node, *next; ++ struct poll_node *node; + +- HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) { ++ HMAP_FOR_EACH_SAFE (node, hmap_node, &loop->poll_nodes) { + hmap_remove(&loop->poll_nodes, &node->hmap_node); + #ifdef _WIN32 + if (node->wevent && node->pollfd.fd) { +diff --git a/lib/rculist.h b/lib/rculist.h +index 1072b87af2..c0d77acf94 100644 +--- a/lib/rculist.h ++++ b/lib/rculist.h +@@ -365,35 +365,57 @@ rculist_is_singleton_protected(const struct rculist *list) + return list_next == list->prev && list_next != list; + } + +-#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \ +- for (INIT_CONTAINER(ITER, rculist_next(RCULIST), MEMBER); \ +- &(ITER)->MEMBER != (RCULIST); \ +- ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER)) +-#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \ +- for (ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER); \ +- &(ITER)->MEMBER != (RCULIST); \ +- ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER)) +- +-#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \ +- for (INIT_CONTAINER(ITER, (RCULIST)->prev, MEMBER); \ +- &(ITER)->MEMBER != (RCULIST); \ +- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) +-#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \ +- for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ +- &(ITER)->MEMBER != (RCULIST); \ +- ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) +- +-#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \ +- for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \ +- &(ITER)->MEMBER != (RCULIST); \ +- ASSIGN_CONTAINER(ITER, rculist_next_protected(&(ITER)->MEMBER), \ +- MEMBER)) +- +-#define RCULIST_FOR_EACH_SAFE_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \ +- for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \ +- (&(ITER)->MEMBER != (RCULIST) \ +- ? INIT_CONTAINER(NEXT, rculist_next_protected(&(ITER)->MEMBER), \ +- MEMBER), 1 : 0); \ +- (ITER) = (NEXT)) ++#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \ ++ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(RCULIST), \ ++ const struct rculist); \ ++ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ ++ UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER)))) ++ ++#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \ ++ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(&(ITER)->MEMBER), \ ++ const struct rculist); \ ++ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ ++ UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER)))) ++ ++#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \ ++ for (INIT_MULTIVAR(ITER, MEMBER, (RCULIST)->prev, struct rculist); \ ++ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ ++ UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev)) ++ ++#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \ ++ for (INIT_MULTIVAR(ITER, MEMBER, (ITER)->MEMBER.prev, struct rculist); \ ++ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ ++ UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev)) ++ ++#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \ ++ for (INIT_MULTIVAR(ITER, MEMBER, rculist_next_protected(RCULIST), \ ++ struct rculist); \ ++ CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ ++ UPDATE_MULTIVAR(ITER, rculist_next_protected(ITER_VAR(ITER))) \ ++ ++#define RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED(ITER, MEMBER, RCULIST) \ ++ for (INIT_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \ ++ rculist_next_protected(RCULIST), \ ++ struct rculist); \ ++ CONDITION_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \ ++ ITER_VAR(ITER) != (RCULIST), \ ++ ITER_NEXT_VAR(ITER) = rculist_next_protected(ITER_VAR(VAR))); \ ++ UPDATE_MULTIVAR_SHORT(ITER)) ++ ++#define RCULIST_FOR_EACH_SAFE_LONG_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \ ++ for (INIT_MULTIVAR_SAFE_LONG(ITER, NEXT, MEMBER, \ ++ rculist_next_protected(RCULIST) \ ++ struct rculist); \ ++ CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER \ ++ ITER_VAR(ITER) != (RCULIST), \ ++ ITER_VAR(NEXT) = rculist_next_protected(ITER_VAR(VAR)), \ ++ ITER_VAR(NEXT) != (RCULIST)); \ ++ UPDATE_MULTIVAR_LONG(ITER)) ++ ++#define RCULIST_FOR_EACH_SAFE_PROTECTED(...) \ ++ OVERLOAD_SAFE_MACRO(RCULIST_FOR_EACH_SAFE_LONG_PROTECTED, \ ++ RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED, \ ++ 4, __VA_ARGS__) ++ + + #endif /* rculist.h */ +diff --git a/lib/reconnect.c b/lib/reconnect.c +index a929ddfd2d..89a0bcaf95 100644 +--- a/lib/reconnect.c ++++ b/lib/reconnect.c +@@ -75,7 +75,8 @@ struct reconnect { + + static void reconnect_transition__(struct reconnect *, long long int now, + enum state state); +-static long long int reconnect_deadline__(const struct reconnect *); ++static long long int reconnect_deadline__(const struct reconnect *, ++ long long int now); + static bool reconnect_may_retry(struct reconnect *); + + static const char * +@@ -539,7 +540,7 @@ reconnect_transition__(struct reconnect *fsm, long long int now, + } + + static long long int +-reconnect_deadline__(const struct reconnect *fsm) ++reconnect_deadline__(const struct reconnect *fsm, long long int now) + { + ovs_assert(fsm->state_entered != LLONG_MIN); + switch (fsm->state) { +@@ -557,8 +558,18 @@ reconnect_deadline__(const struct reconnect *fsm) + if (fsm->probe_interval) { + long long int base = MAX(fsm->last_activity, fsm->state_entered); + long long int expiration = base + fsm->probe_interval; +- if (fsm->last_receive_attempt >= expiration) { ++ if (now < expiration || fsm->last_receive_attempt >= expiration) { ++ /* We still have time before the expiration or the time has ++ * already passed and there was no activity. In the first case ++ * we need to wait for the expiration, in the second - we're ++ * already past the deadline. */ + return expiration; ++ } else { ++ /* Time has already passed, but we didn't attempt to receive ++ * anything. We need to wake up and try to receive even if ++ * nothing is pending, so we can update the expiration time or ++ * transition to a different state. */ ++ return now + 1; + } + } + return LLONG_MAX; +@@ -566,8 +577,10 @@ reconnect_deadline__(const struct reconnect *fsm) + case S_IDLE: + if (fsm->probe_interval) { + long long int expiration = fsm->state_entered + fsm->probe_interval; +- if (fsm->last_receive_attempt >= expiration) { ++ if (now < expiration || fsm->last_receive_attempt >= expiration) { + return expiration; ++ } else { ++ return now + 1; + } + } + return LLONG_MAX; +@@ -618,7 +631,7 @@ reconnect_deadline__(const struct reconnect *fsm) + enum reconnect_action + reconnect_run(struct reconnect *fsm, long long int now) + { +- if (now >= reconnect_deadline__(fsm)) { ++ if (now >= reconnect_deadline__(fsm, now)) { + switch (fsm->state) { + case S_VOID: + return 0; +@@ -671,7 +684,7 @@ reconnect_wait(struct reconnect *fsm, long long int now) + int + reconnect_timeout(struct reconnect *fsm, long long int now) + { +- long long int deadline = reconnect_deadline__(fsm); ++ long long int deadline = reconnect_deadline__(fsm, now); + if (deadline != LLONG_MAX) { + long long int remaining = deadline - now; + return MAX(0, MIN(INT_MAX, remaining)); +diff --git a/lib/seq.c b/lib/seq.c +index 6581cb06ba..99e5bf8bd1 100644 +--- a/lib/seq.c ++++ b/lib/seq.c +@@ -297,9 +297,9 @@ static void + seq_thread_woke(struct seq_thread *thread) + OVS_REQUIRES(seq_mutex) + { +- struct seq_waiter *waiter, *next_waiter; ++ struct seq_waiter *waiter; + +- LIST_FOR_EACH_SAFE (waiter, next_waiter, list_node, &thread->waiters) { ++ LIST_FOR_EACH_SAFE (waiter, list_node, &thread->waiters) { + ovs_assert(waiter->thread == thread); + seq_waiter_destroy(waiter); + } +@@ -319,9 +319,9 @@ static void + seq_wake_waiters(struct seq *seq) + OVS_REQUIRES(seq_mutex) + { +- struct seq_waiter *waiter, *next_waiter; ++ struct seq_waiter *waiter; + +- HMAP_FOR_EACH_SAFE (waiter, next_waiter, hmap_node, &seq->waiters) { ++ HMAP_FOR_EACH_SAFE (waiter, hmap_node, &seq->waiters) { + latch_set(&waiter->thread->latch); + seq_waiter_destroy(waiter); + } +diff --git a/lib/shash.c b/lib/shash.c +index a8433629ab..a7b2c64582 100644 +--- a/lib/shash.c ++++ b/lib/shash.c +@@ -68,9 +68,9 @@ shash_moved(struct shash *sh) + void + shash_clear(struct shash *sh) + { +- struct shash_node *node, *next; ++ struct shash_node *node; + +- SHASH_FOR_EACH_SAFE (node, next, sh) { ++ SHASH_FOR_EACH_SAFE (node, sh) { + hmap_remove(&sh->map, &node->node); + free(node->name); + free(node); +@@ -81,9 +81,9 @@ shash_clear(struct shash *sh) + void + shash_clear_free_data(struct shash *sh) + { +- struct shash_node *node, *next; ++ struct shash_node *node; + +- SHASH_FOR_EACH_SAFE (node, next, sh) { ++ SHASH_FOR_EACH_SAFE (node, sh) { + hmap_remove(&sh->map, &node->node); + free(node->data); + free(node->name); +diff --git a/lib/simap.c b/lib/simap.c +index f404ece677..0ee08d74d5 100644 +--- a/lib/simap.c ++++ b/lib/simap.c +@@ -63,9 +63,9 @@ simap_moved(struct simap *simap) + void + simap_clear(struct simap *simap) + { +- struct simap_node *node, *next; ++ struct simap_node *node; + +- SIMAP_FOR_EACH_SAFE (node, next, simap) { ++ SIMAP_FOR_EACH_SAFE (node, simap) { + hmap_remove(&simap->map, &node->node); + free(node->name); + free(node); +diff --git a/lib/simap.h b/lib/simap.h +index 5e646e6607..8db7bea7c9 100644 +--- a/lib/simap.h ++++ b/lib/simap.h +@@ -41,12 +41,22 @@ struct simap_node { + BUILD_ASSERT_TYPE(SIMAP_NODE, struct simap_node *), \ + BUILD_ASSERT_TYPE(SIMAP, struct simap *)) + +-#define SIMAP_FOR_EACH_SAFE(SIMAP_NODE, NEXT, SIMAP) \ +- HMAP_FOR_EACH_SAFE_INIT (SIMAP_NODE, NEXT, node, &(SIMAP)->map, \ ++#define SIMAP_FOR_EACH_SAFE_SHORT(SIMAP_NODE, SIMAP) \ ++ HMAP_FOR_EACH_SAFE_SHORT_INIT (SIMAP_NODE, node, &(SIMAP)->map, \ + BUILD_ASSERT_TYPE(SIMAP_NODE, struct simap_node *), \ +- BUILD_ASSERT_TYPE(NEXT, struct simap_node *), \ + BUILD_ASSERT_TYPE(SIMAP, struct simap *)) + ++#define SIMAP_FOR_EACH_SAFE_LONG(SIMAP_NODE, NEXT, SIMAP) \ ++ HMAP_FOR_EACH_SAFE_LONG_INIT (SIMAP_NODE, NEXT, node, &(SIMAP)->map, \ ++ BUILD_ASSERT_TYPE(SIMAP_NODE, struct simap_node *), \ ++ BUILD_ASSERT_TYPE(NEXT, struct simap_node *), \ ++ BUILD_ASSERT_TYPE(SIMAP, struct simap *)) ++ ++#define SIMAP_FOR_EACH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(SIMAP_FOR_EACH_SAFE_LONG, \ ++ SIMAP_FOR_EACH_SAFE_SHORT, \ ++ 3, __VA_ARGS__) ++ + void simap_init(struct simap *); + void simap_destroy(struct simap *); + void simap_swap(struct simap *, struct simap *); +diff --git a/lib/smap.c b/lib/smap.c +index e82261497c..b23eeb52d3 100644 +--- a/lib/smap.c ++++ b/lib/smap.c +@@ -185,9 +185,9 @@ smap_steal(struct smap *smap, struct smap_node *node, + void + smap_clear(struct smap *smap) + { +- struct smap_node *node, *next; ++ struct smap_node *node; + +- SMAP_FOR_EACH_SAFE (node, next, smap) { ++ SMAP_FOR_EACH_SAFE (node, smap) { + smap_remove_node(smap, node); + } + } +diff --git a/lib/smap.h b/lib/smap.h +index a921159667..2fe6c540a7 100644 +--- a/lib/smap.h ++++ b/lib/smap.h +@@ -45,13 +45,24 @@ struct smap_node { + BUILD_ASSERT_TYPE(SMAP_NODE, struct smap_node *), \ + BUILD_ASSERT_TYPE(SMAP, struct smap *)) + +-#define SMAP_FOR_EACH_SAFE(SMAP_NODE, NEXT, SMAP) \ +- HMAP_FOR_EACH_SAFE_INIT ( \ ++#define SMAP_FOR_EACH_SAFE_SHORT(SMAP_NODE, SMAP) \ ++ HMAP_FOR_EACH_SAFE_SHORT_INIT ( \ ++ SMAP_NODE, node, &(SMAP)->map, \ ++ BUILD_ASSERT_TYPE(SMAP_NODE, struct smap_node *), \ ++ BUILD_ASSERT_TYPE(SMAP, struct smap *)) ++ ++#define SMAP_FOR_EACH_SAFE_LONG(SMAP_NODE, NEXT, SMAP) \ ++ HMAP_FOR_EACH_SAFE_LONG_INIT ( \ + SMAP_NODE, NEXT, node, &(SMAP)->map, \ + BUILD_ASSERT_TYPE(SMAP_NODE, struct smap_node *), \ + BUILD_ASSERT_TYPE(NEXT, struct smap_node *), \ + BUILD_ASSERT_TYPE(SMAP, struct smap *)) + ++#define SMAP_FOR_EACH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(SMAP_FOR_EACH_SAFE_LONG, \ ++ SMAP_FOR_EACH_SAFE_SHORT, \ ++ 3, __VA_ARGS__) ++ + /* Initializer for an immutable struct smap 'SMAP' that contains one or two + * key-value pairs, e.g. + * +diff --git a/lib/socket-util.c b/lib/socket-util.c +index 4f1ffecf5d..38705cc51e 100644 +--- a/lib/socket-util.c ++++ b/lib/socket-util.c +@@ -62,7 +62,8 @@ static bool parse_sockaddr_components(struct sockaddr_storage *ss, + const char *port_s, + uint16_t default_port, + const char *s, +- bool resolve_host); ++ bool resolve_host, ++ bool *dns_failure); + + /* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a + * positive errno value. */ +@@ -438,7 +439,7 @@ parse_sockaddr_components_dns(struct sockaddr_storage *ss OVS_UNUSED, + dns_resolve(host_s, &tmp_host_s); + if (tmp_host_s != NULL) { + parse_sockaddr_components(ss, tmp_host_s, port_s, +- default_port, s, false); ++ default_port, s, false, NULL); + free(tmp_host_s); + return true; + } +@@ -450,11 +451,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss, + char *host_s, + const char *port_s, uint16_t default_port, + const char *s, +- bool resolve_host) ++ bool resolve_host, bool *dns_failure) + { + struct sockaddr_in *sin = sin_cast(sa_cast(ss)); + int port; + ++ if (dns_failure) { ++ *dns_failure = false; ++ } ++ + if (port_s && port_s[0]) { + if (!str_to_int(port_s, 10, &port) || port < 0 || port > 65535) { + VLOG_ERR("%s: bad port number \"%s\"", s, port_s); +@@ -501,10 +506,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss, + return true; + + resolve: +- if (resolve_host && parse_sockaddr_components_dns(ss, host_s, port_s, +- default_port, s)) { +- return true; +- } else if (!resolve_host) { ++ if (resolve_host) { ++ if (parse_sockaddr_components_dns(ss, host_s, port_s, ++ default_port, s)) { ++ return true; ++ } ++ if (dns_failure) { ++ *dns_failure = true; ++ } ++ } else { + VLOG_ERR("%s: bad IP address \"%s\"", s, host_s); + } + exit: +@@ -521,10 +531,12 @@ exit: + * It resolves the host if 'resolve_host' is true. + * + * On success, returns true and stores the parsed remote address into '*ss'. +- * On failure, logs an error, stores zeros into '*ss', and returns false. */ ++ * On failure, logs an error, stores zeros into '*ss', and returns false, ++ * '*dns_failure' indicates if the host resolution failed. */ + bool + inet_parse_active(const char *target_, int default_port, +- struct sockaddr_storage *ss, bool resolve_host) ++ struct sockaddr_storage *ss, ++ bool resolve_host, bool *dns_failure) + { + char *target = xstrdup(target_); + char *port, *host; +@@ -539,7 +551,7 @@ inet_parse_active(const char *target_, int default_port, + ok = false; + } else { + ok = parse_sockaddr_components(ss, host, port, default_port, +- target_, resolve_host); ++ target_, resolve_host, dns_failure); + } + if (!ok) { + memset(ss, 0, sizeof *ss); +@@ -576,7 +588,7 @@ inet_open_active(int style, const char *target, int default_port, + int error; + + /* Parse. */ +- if (!inet_parse_active(target, default_port, &ss, true)) { ++ if (!inet_parse_active(target, default_port, &ss, true, NULL)) { + error = EAFNOSUPPORT; + goto exit; + } +@@ -660,7 +672,7 @@ inet_parse_passive(const char *target_, int default_port, + ok = false; + } else { + ok = parse_sockaddr_components(ss, host, port, default_port, +- target_, true); ++ target_, true, NULL); + } + if (!ok) { + memset(ss, 0, sizeof *ss); +@@ -783,7 +795,8 @@ inet_parse_address(const char *target_, struct sockaddr_storage *ss) + { + char *target = xstrdup(target_); + char *host = unbracket(target); +- bool ok = parse_sockaddr_components(ss, host, NULL, 0, target_, false); ++ bool ok = parse_sockaddr_components(ss, host, NULL, 0, ++ target_, false, NULL); + if (!ok) { + memset(ss, 0, sizeof *ss); + } +diff --git a/lib/socket-util.h b/lib/socket-util.h +index 9ccb7d4cc4..bf66393df9 100644 +--- a/lib/socket-util.h ++++ b/lib/socket-util.h +@@ -49,7 +49,8 @@ ovs_be32 guess_netmask(ovs_be32 ip); + void inet_parse_host_port_tokens(char *s, char **hostp, char **portp); + void inet_parse_port_host_tokens(char *s, char **portp, char **hostp); + bool inet_parse_active(const char *target, int default_port, +- struct sockaddr_storage *ssp, bool resolve_host); ++ struct sockaddr_storage *ssp, ++ bool resolve_host, bool *dns_failure); + int inet_open_active(int style, const char *target, int default_port, + struct sockaddr_storage *ssp, int *fdp, uint8_t dscp); + +diff --git a/lib/sset.c b/lib/sset.c +index b2e3f43ec9..c3197e305f 100644 +--- a/lib/sset.c ++++ b/lib/sset.c +@@ -212,9 +212,9 @@ sset_add_array(struct sset *set, char **names, size_t n) + void + sset_clear(struct sset *set) + { +- const char *name, *next; ++ const char *name; + +- SSET_FOR_EACH_SAFE (name, next, set) { ++ SSET_FOR_EACH_SAFE (name, set) { + sset_delete(set, SSET_NODE_FROM_NAME(name)); + } + } +@@ -320,9 +320,9 @@ sset_at_position(const struct sset *set, struct sset_position *pos) + void + sset_intersect(struct sset *a, const struct sset *b) + { +- const char *name, *next; ++ const char *name; + +- SSET_FOR_EACH_SAFE (name, next, a) { ++ SSET_FOR_EACH_SAFE (name, a) { + if (!sset_contains(b, name)) { + sset_delete(a, SSET_NODE_FROM_NAME(name)); + } +diff --git a/lib/sset.h b/lib/sset.h +index f0bb8b5344..214d6fb41c 100644 +--- a/lib/sset.h ++++ b/lib/sset.h +@@ -87,13 +87,26 @@ void sset_intersect(struct sset *, const struct sset *); + NAME != NULL; \ + (NAME) = SSET_NEXT(SSET, NAME)) + +-#define SSET_FOR_EACH_SAFE(NAME, NEXT, SSET) \ ++#define SSET_FOR_EACH_SAFE_LONG(NAME, NEXT, SSET) \ + for ((NAME) = SSET_FIRST(SSET); \ + (NAME != NULL \ + ? (NEXT) = SSET_NEXT(SSET, NAME), true \ + : false); \ + (NAME) = (NEXT)) + ++#define SSET_FOR_EACH_SAFE_SHORT(NAME, SSET) \ ++ for (const char * NAME__next = \ ++ ((NAME) = SSET_FIRST(SSET), NULL); \ ++ (NAME != NULL \ ++ ? (NAME__next = SSET_NEXT(SSET, NAME), true) \ ++ : (NAME__next = NULL, false)); \ ++ (NAME) = NAME__next) ++ ++#define SSET_FOR_EACH_SAFE(...) \ ++ OVERLOAD_SAFE_MACRO(SSET_FOR_EACH_SAFE_LONG, \ ++ SSET_FOR_EACH_SAFE_SHORT, \ ++ 3, __VA_ARGS__) ++ + const char **sset_array(const struct sset *); + const char **sset_sort(const struct sset *); + +diff --git a/lib/stopwatch.c b/lib/stopwatch.c +index 1c71df1a12..ec567603b1 100644 +--- a/lib/stopwatch.c ++++ b/lib/stopwatch.c +@@ -464,7 +464,7 @@ stopwatch_thread(void *ign OVS_UNUSED) + static void + stopwatch_exit(void) + { +- struct shash_node *node, *node_next; ++ struct shash_node *node; + struct stopwatch_packet *pkt = stopwatch_packet_create(OP_SHUTDOWN); + stopwatch_packet_write(pkt); + xpthread_join(stopwatch_thread_id, NULL); +@@ -473,7 +473,7 @@ stopwatch_exit(void) + * other competing thread. We are now the sole owners + * of all data in the file. + */ +- SHASH_FOR_EACH_SAFE (node, node_next, &stopwatches) { ++ SHASH_FOR_EACH_SAFE (node, &stopwatches) { + struct stopwatch *sw = node->data; + shash_delete(&stopwatches, node); + free(sw); +diff --git a/lib/stream.c b/lib/stream.c +index fcaddf10ad..71039e24f1 100644 +--- a/lib/stream.c ++++ b/lib/stream.c +@@ -788,7 +788,7 @@ stream_parse_target_with_default_port(const char *target, int default_port, + struct sockaddr_storage *ss) + { + return ((!strncmp(target, "tcp:", 4) || !strncmp(target, "ssl:", 4)) +- && inet_parse_active(target + 4, default_port, ss, true)); ++ && inet_parse_active(target + 4, default_port, ss, true, NULL)); + } + + /* Attempts to guess the content type of a stream whose first few bytes were +diff --git a/lib/tc.c b/lib/tc.c +index adb2d3182a..df73a43d4c 100644 +--- a/lib/tc.c ++++ b/lib/tc.c +@@ -568,16 +568,17 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) + + flower->key.encap_eth_type[0] = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]); ++ flower->mask.encap_eth_type[0] = CONSTANT_HTONS(0xffff); + + if (attrs[TCA_FLOWER_KEY_VLAN_ID]) { + flower->key.vlan_id[0] = + nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]); +- flower->mask.vlan_id[0] = 0xffff; ++ flower->mask.vlan_id[0] = VLAN_VID_MASK >> VLAN_VID_SHIFT; + } + if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) { + flower->key.vlan_prio[0] = + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]); +- flower->mask.vlan_prio[0] = 0xff; ++ flower->mask.vlan_prio[0] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT; + } + + if (!attrs[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { +@@ -590,17 +591,18 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) + } + + flower->key.encap_eth_type[1] = flower->key.encap_eth_type[0]; ++ flower->mask.encap_eth_type[1] = CONSTANT_HTONS(0xffff); + flower->key.encap_eth_type[0] = encap_ethtype; + + if (attrs[TCA_FLOWER_KEY_CVLAN_ID]) { + flower->key.vlan_id[1] = + nl_attr_get_u16(attrs[TCA_FLOWER_KEY_CVLAN_ID]); +- flower->mask.vlan_id[1] = 0xffff; ++ flower->mask.vlan_id[1] = VLAN_VID_MASK >> VLAN_VID_SHIFT; + } + if (attrs[TCA_FLOWER_KEY_CVLAN_PRIO]) { + flower->key.vlan_prio[1] = + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_CVLAN_PRIO]); +- flower->mask.vlan_prio[1] = 0xff; ++ flower->mask.vlan_prio[1] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT; + } + } + +@@ -937,24 +939,21 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) { + key->icmp_code = + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]); + mask->icmp_code = +- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]); ++ nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE_MASK]); + } + if (attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]) { +- key->icmp_type = +- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]); ++ key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE]); + mask->icmp_type = + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]); + } + } else if (ip_proto == IPPROTO_ICMPV6) { + if (attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]) { +- key->icmp_code = +- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); ++ key->icmp_code = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); + mask->icmp_code = +- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); ++ nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]); + } + if (attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]) { +- key->icmp_type = +- nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]); ++ key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE]); + mask->icmp_type = + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]); + } +@@ -1006,14 +1005,14 @@ static const struct nl_policy pedit_policy[] = { + static int + nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) + { +- struct tc_action *action; ++ struct tc_action *action = &flower->actions[flower->action_count++]; + struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)]; + const struct tc_pedit *pe; + const struct tc_pedit_key *keys; + const struct nlattr *nla, *keys_ex, *ex_type; + const void *keys_attr; +- char *rewrite_key = (void *) &flower->rewrite.key; +- char *rewrite_mask = (void *) &flower->rewrite.mask; ++ char *rewrite_key = (void *) &action->rewrite.key; ++ char *rewrite_mask = (void *) &action->rewrite.mask; + size_t keys_ex_size, left; + int type, i = 0, err; + +@@ -1092,7 +1091,6 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) + i++; + } + +- action = &flower->actions[flower->action_count++]; + action->type = TC_ACT_PEDIT; + + return 0; +@@ -1487,7 +1485,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) + if (ipv4_max) { + ovs_be32 addr = nl_attr_get_be32(ipv4_max); + +- action->ct.range.ipv4.max = addr; ++ if (action->ct.range.ipv4.min != addr) { ++ action->ct.range.ipv4.max = addr; ++ } + } + } else if (ipv6_min) { + action->ct.range.ip_family = AF_INET6; +@@ -1496,7 +1496,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) + if (ipv6_max) { + struct in6_addr addr = nl_attr_get_in6_addr(ipv6_max); + +- action->ct.range.ipv6.max = addr; ++ if (!ipv6_addr_equals(&action->ct.range.ipv6.min, &addr)) { ++ action->ct.range.ipv6.max = addr; ++ } + } + } + +@@ -1504,6 +1506,10 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) + action->ct.range.port.min = nl_attr_get_be16(port_min); + if (port_max) { + action->ct.range.port.max = nl_attr_get_be16(port_max); ++ if (action->ct.range.port.min == ++ action->ct.range.port.max) { ++ action->ct.range.port.max = 0; ++ } + } + } + } +@@ -1702,6 +1708,9 @@ static const struct nl_policy stats_policy[] = { + [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct gnet_stats_basic), + .optional = false, }, ++ [TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC, ++ .min_len = sizeof(struct gnet_stats_basic), ++ .optional = true, }, + }; + + static int +@@ -1714,8 +1723,11 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, + const char *act_kind; + struct nlattr *action_attrs[ARRAY_SIZE(act_policy)]; + struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)]; +- struct ovs_flow_stats *stats = &flower->stats; +- const struct gnet_stats_basic *bs; ++ struct ovs_flow_stats *stats_sw = &flower->stats_sw; ++ struct ovs_flow_stats *stats_hw = &flower->stats_hw; ++ const struct gnet_stats_basic *bs_all = NULL; ++ const struct gnet_stats_basic *bs_hw = NULL; ++ struct gnet_stats_basic bs_sw = { .packets = 0, .bytes = 0, }; + int err = 0; + + if (!nl_parse_nested(action, act_policy, action_attrs, +@@ -1771,10 +1783,26 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, + return EPROTO; + } + +- bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs); +- if (bs->packets) { +- put_32aligned_u64(&stats->n_packets, bs->packets); +- put_32aligned_u64(&stats->n_bytes, bs->bytes); ++ bs_all = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs_all); ++ if (stats_attrs[TCA_STATS_BASIC_HW]) { ++ bs_hw = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC_HW], ++ sizeof *bs_hw); ++ ++ bs_sw.packets = bs_all->packets - bs_hw->packets; ++ bs_sw.bytes = bs_all->bytes - bs_hw->bytes; ++ } else { ++ bs_sw.packets = bs_all->packets; ++ bs_sw.bytes = bs_all->bytes; ++ } ++ ++ if (bs_sw.packets > get_32aligned_u64(&stats_sw->n_packets)) { ++ put_32aligned_u64(&stats_sw->n_packets, bs_sw.packets); ++ put_32aligned_u64(&stats_sw->n_bytes, bs_sw.bytes); ++ } ++ ++ if (bs_hw && bs_hw->packets > get_32aligned_u64(&stats_hw->n_packets)) { ++ put_32aligned_u64(&stats_hw->n_packets, bs_hw->packets); ++ put_32aligned_u64(&stats_hw->n_bytes, bs_hw->bytes); + } + + return 0; +@@ -2399,14 +2427,14 @@ nl_msg_put_act_flags(struct ofpbuf *request) { + * first_word_mask/last_word_mask - the mask to use for the first/last read + * (as we read entire words). */ + static void +-calc_offsets(struct tc_flower *flower, struct flower_key_to_pedit *m, ++calc_offsets(struct tc_action *action, struct flower_key_to_pedit *m, + int *cur_offset, int *cnt, ovs_be32 *last_word_mask, + ovs_be32 *first_word_mask, ovs_be32 **mask, ovs_be32 **data) + { + int start_offset, max_offset, total_size; + int diff, right_zero_bits, left_zero_bits; +- char *rewrite_key = (void *) &flower->rewrite.key; +- char *rewrite_mask = (void *) &flower->rewrite.mask; ++ char *rewrite_key = (void *) &action->rewrite.key; ++ char *rewrite_mask = (void *) &action->rewrite.mask; + + max_offset = m->offset + m->size; + start_offset = ROUND_DOWN(m->offset, 4); +@@ -2473,7 +2501,8 @@ csum_update_flag(struct tc_flower *flower, + + static int + nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, +- struct tc_flower *flower) ++ struct tc_flower *flower, ++ struct tc_action *action) + { + struct { + struct tc_pedit sel; +@@ -2497,7 +2526,7 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, + continue; + } + +- calc_offsets(flower, m, &cur_offset, &cnt, &last_word_mask, ++ calc_offsets(action, m, &cur_offset, &cnt, &last_word_mask, + &first_word_mask, &mask, &data); + + for (j = 0; j < cnt; j++, mask++, data++, cur_offset += 4) { +@@ -2556,6 +2585,29 @@ nl_msg_put_flower_acts_release(struct ofpbuf *request, uint16_t act_index) + nl_msg_end_nested(request, act_offset); + } + ++/* Aggregates all previous successive pedit actions csum_update_flags ++ * to flower->csum_update_flags. Only append one csum action to the ++ * last pedit action. */ ++static void ++nl_msg_put_csum_act(struct ofpbuf *request, struct tc_flower *flower, ++ uint16_t *act_index) ++{ ++ size_t act_offset; ++ ++ /* No pedit actions or processed already. */ ++ if (!flower->csum_update_flags) { ++ return; ++ } ++ ++ act_offset = nl_msg_start_nested(request, (*act_index)++); ++ nl_msg_put_act_csum(request, flower->csum_update_flags); ++ nl_msg_put_act_flags(request); ++ nl_msg_end_nested(request, act_offset); ++ ++ /* Clear it. So we can have another series of pedit actions. */ ++ flower->csum_update_flags = 0; ++} ++ + static int + nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) + { +@@ -2572,20 +2624,22 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) + + action = flower->actions; + for (i = 0; i < flower->action_count; i++, action++) { ++ if (action->type != TC_ACT_PEDIT) { ++ nl_msg_put_csum_act(request, flower, &act_index); ++ } + switch (action->type) { + case TC_ACT_PEDIT: { + act_offset = nl_msg_start_nested(request, act_index++); +- error = nl_msg_put_flower_rewrite_pedits(request, flower); ++ error = nl_msg_put_flower_rewrite_pedits(request, flower, ++ action); + if (error) { + return error; + } + nl_msg_end_nested(request, act_offset); + +- if (flower->csum_update_flags) { +- act_offset = nl_msg_start_nested(request, act_index++); +- nl_msg_put_act_csum(request, flower->csum_update_flags); +- nl_msg_put_act_flags(request); +- nl_msg_end_nested(request, act_offset); ++ if (i == flower->action_count - 1) { ++ /* If this is the last action check csum calc again. */ ++ nl_msg_put_csum_act(request, flower, &act_index); + } + } + break; +@@ -2914,13 +2968,13 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) + FLOWER_PUT_MASKED_VALUE(icmp_code, TCA_FLOWER_KEY_ICMPV6_CODE); + FLOWER_PUT_MASKED_VALUE(icmp_type, TCA_FLOWER_KEY_ICMPV6_TYPE); + } +- +- FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE); +- FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE); +- FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK); +- FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS); + } + ++ FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE); ++ FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE); ++ FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK); ++ FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS); ++ + if (host_eth_type == ETH_P_IP) { + FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_src, TCA_FLOWER_KEY_IPV4_SRC); + FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_dst, TCA_FLOWER_KEY_IPV4_DST); +@@ -2993,12 +3047,79 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) + return 0; + } + ++static void ++log_tc_flower_match(const char *msg, ++ const struct tc_flower *a, ++ const struct tc_flower *b) ++{ ++ uint8_t key_a[sizeof(struct tc_flower_key)]; ++ uint8_t key_b[sizeof(struct tc_flower_key)]; ++ struct ds s = DS_EMPTY_INITIALIZER; ++ ++ for (int i = 0; i < sizeof a->key; i++) { ++ uint8_t mask_a = ((uint8_t *) &a->mask)[i]; ++ uint8_t mask_b = ((uint8_t *) &b->mask)[i]; ++ ++ key_a[i] = ((uint8_t *) &a->key)[i] & mask_a; ++ key_b[i] = ((uint8_t *) &b->key)[i] & mask_b; ++ } ++ ds_put_cstr(&s, "\nExpected Mask:\n"); ++ ds_put_hex(&s, &a->mask, sizeof a->mask); ++ ds_put_cstr(&s, "\nReceived Mask:\n"); ++ ds_put_hex(&s, &b->mask, sizeof b->mask); ++ ds_put_cstr(&s, "\nExpected Key:\n"); ++ ds_put_hex(&s, &a->key, sizeof a->key); ++ ds_put_cstr(&s, "\nReceived Key:\n"); ++ ds_put_hex(&s, &b->key, sizeof b->key); ++ ds_put_cstr(&s, "\nExpected Masked Key:\n"); ++ ds_put_hex(&s, key_a, sizeof key_a); ++ ds_put_cstr(&s, "\nReceived Masked Key:\n"); ++ ds_put_hex(&s, key_b, sizeof key_b); ++ ++ if (a->action_count != b->action_count) { ++ /* If action count is not equal, we print all actions to see which ++ * ones are missing. */ ++ const struct tc_action *action; ++ int i; ++ ++ ds_put_cstr(&s, "\nExpected Actions:\n"); ++ for (i = 0, action = a->actions; i < a->action_count; i++, action++) { ++ ds_put_cstr(&s, " - "); ++ ds_put_hex(&s, action, sizeof *action); ++ ds_put_cstr(&s, "\n"); ++ } ++ ds_put_cstr(&s, "Received Actions:\n"); ++ for (i = 0, action = b->actions; i < b->action_count; i++, action++) { ++ ds_put_cstr(&s, " - "); ++ ds_put_hex(&s, action, sizeof *action); ++ ds_put_cstr(&s, "\n"); ++ } ++ } else { ++ /* Only dump the delta in actions. */ ++ const struct tc_action *action_a = a->actions; ++ const struct tc_action *action_b = b->actions; ++ ++ for (int i = 0; i < a->action_count; i++, action_a++, action_b++) { ++ if (memcmp(action_a, action_b, sizeof *action_a)) { ++ ds_put_format(&s, ++ "\nAction %d mismatch:\n - Expected Action: ", ++ i); ++ ds_put_hex(&s, action_a, sizeof *action_a); ++ ds_put_cstr(&s, "\n - Received Action: "); ++ ds_put_hex(&s, action_b, sizeof *action_b); ++ } ++ } ++ } ++ VLOG_DBG_RL(&error_rl, "%s%s", msg, ds_cstr(&s)); ++ ds_destroy(&s); ++} ++ + static bool + cmp_tc_flower_match_action(const struct tc_flower *a, + const struct tc_flower *b) + { + if (memcmp(&a->mask, &b->mask, sizeof a->mask)) { +- VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare"); ++ log_tc_flower_match("tc flower compare failed mask compare:", a, b); + return false; + } + +@@ -3011,8 +3132,8 @@ cmp_tc_flower_match_action(const struct tc_flower *a, + uint8_t key_b = ((uint8_t *)&b->key)[i] & mask; + + if (key_a != key_b) { +- VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at " +- "%d", i); ++ log_tc_flower_match("tc flower compare failed masked key compare:", ++ a, b); + return false; + } + } +@@ -3022,14 +3143,15 @@ cmp_tc_flower_match_action(const struct tc_flower *a, + const struct tc_action *action_b = b->actions; + + if (a->action_count != b->action_count) { +- VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check"); ++ log_tc_flower_match("tc flower compare failed action length check", ++ a, b); + return false; + } + + for (int i = 0; i < a->action_count; i++, action_a++, action_b++) { + if (memcmp(action_a, action_b, sizeof *action_a)) { +- VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare " +- "for %d", i); ++ log_tc_flower_match("tc flower compare failed action compare", ++ a, b); + return false; + } + } +diff --git a/lib/tc.h b/lib/tc.h +index a147ca461d..d6cdddd169 100644 +--- a/lib/tc.h ++++ b/lib/tc.h +@@ -256,11 +256,23 @@ struct tc_action { + bool force; + bool commit; + } ct; ++ ++ struct { ++ struct tc_flower_key key; ++ struct tc_flower_key mask; ++ } rewrite; + }; + + enum tc_action_type type; + }; + ++/* assert that if we overflow with a masked write of uint32_t to the last byte ++ * of action.rewrite we overflow inside struct tc_action. ++ * shouldn't happen unless someone moves rewrite to the end of action */ ++BUILD_ASSERT_DECL(offsetof(struct tc_action, rewrite) ++ + MEMBER_SIZEOF(struct tc_action, rewrite) ++ + sizeof(uint32_t) - 2 < sizeof(struct tc_action)); ++ + enum tc_offloaded_state { + TC_OFFLOADED_STATE_UNDEFINED, + TC_OFFLOADED_STATE_IN_HW, +@@ -330,15 +342,10 @@ struct tc_flower { + int action_count; + struct tc_action actions[TCA_ACT_MAX_NUM]; + +- struct ovs_flow_stats stats; ++ struct ovs_flow_stats stats_sw; ++ struct ovs_flow_stats stats_hw; + uint64_t lastused; + +- struct { +- bool rewrite; +- struct tc_flower_key key; +- struct tc_flower_key mask; +- } rewrite; +- + uint32_t csum_update_flags; + + bool tunnel; +@@ -352,13 +359,6 @@ struct tc_flower { + enum tc_offload_policy tc_policy; + }; + +-/* assert that if we overflow with a masked write of uint32_t to the last byte +- * of flower.rewrite we overflow inside struct flower. +- * shouldn't happen unless someone moves rewrite to the end of flower */ +-BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite) +- + MEMBER_SIZEOF(struct tc_flower, rewrite) +- + sizeof(uint32_t) - 2 < sizeof(struct tc_flower)); +- + int tc_replace_flower(struct tcf_id *id, struct tc_flower *flower); + int tc_del_filter(struct tcf_id *id); + int tc_get_flower(struct tcf_id *id, struct tc_flower *flower); +diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c +index 58269d3b16..f9fee37939 100644 +--- a/lib/tnl-ports.c ++++ b/lib/tnl-ports.c +@@ -259,14 +259,14 @@ ipdev_map_delete(struct ip_device *ip_dev, ovs_be16 tp_port, uint8_t nw_proto) + void + tnl_port_map_delete(odp_port_t port, const char type[]) + { +- struct tnl_port *p, *next; ++ struct tnl_port *p; + struct ip_device *ip_dev; + uint8_t nw_proto; + + nw_proto = tnl_type_to_nw_proto(type); + + ovs_mutex_lock(&mutex); +- LIST_FOR_EACH_SAFE(p, next, node, &port_list) { ++ LIST_FOR_EACH_SAFE (p, node, &port_list) { + if (p->port == port && p->nw_proto == nw_proto && + ovs_refcount_unref_relaxed(&p->ref_cnt) == 1) { + ovs_list_remove(&p->node); +@@ -444,11 +444,11 @@ delete_ipdev(struct ip_device *ip_dev) + void + tnl_port_map_insert_ipdev(const char dev_name[]) + { +- struct ip_device *ip_dev, *next; ++ struct ip_device *ip_dev; + + ovs_mutex_lock(&mutex); + +- LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) { ++ LIST_FOR_EACH_SAFE (ip_dev, node, &addr_list) { + if (!strcmp(netdev_get_name(ip_dev->dev), dev_name)) { + if (ip_dev->change_seq == netdev_get_change_seq(ip_dev->dev)) { + goto out; +@@ -466,10 +466,10 @@ out: + void + tnl_port_map_delete_ipdev(const char dev_name[]) + { +- struct ip_device *ip_dev, *next; ++ struct ip_device *ip_dev; + + ovs_mutex_lock(&mutex); +- LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) { ++ LIST_FOR_EACH_SAFE (ip_dev, node, &addr_list) { + if (!strcmp(netdev_get_name(ip_dev->dev), dev_name)) { + delete_ipdev(ip_dev); + } +@@ -480,10 +480,10 @@ tnl_port_map_delete_ipdev(const char dev_name[]) + void + tnl_port_map_run(void) + { +- struct ip_device *ip_dev, *next; ++ struct ip_device *ip_dev; + + ovs_mutex_lock(&mutex); +- LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) { ++ LIST_FOR_EACH_SAFE (ip_dev, node, &addr_list) { + char dev_name[IFNAMSIZ]; + + if (ip_dev->change_seq == netdev_get_change_seq(ip_dev->dev)) { +diff --git a/lib/unixctl.c b/lib/unixctl.c +index 69aed6722c..103357ee91 100644 +--- a/lib/unixctl.c ++++ b/lib/unixctl.c +@@ -390,8 +390,8 @@ unixctl_server_run(struct unixctl_server *server) + } + } + +- struct unixctl_conn *conn, *next; +- LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) { ++ struct unixctl_conn *conn; ++ LIST_FOR_EACH_SAFE (conn, node, &server->conns) { + int error = run_connection(conn); + if (error && error != EAGAIN) { + kill_connection(conn); +@@ -422,9 +422,9 @@ void + unixctl_server_destroy(struct unixctl_server *server) + { + if (server) { +- struct unixctl_conn *conn, *next; ++ struct unixctl_conn *conn; + +- LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) { ++ LIST_FOR_EACH_SAFE (conn, node, &server->conns) { + kill_connection(conn); + } + +diff --git a/lib/vconn.c b/lib/vconn.c +index 7415e6291f..b556762277 100644 +--- a/lib/vconn.c ++++ b/lib/vconn.c +@@ -960,8 +960,8 @@ vconn_transact_multipart(struct vconn *vconn, + ovs_list_init(replies); + + /* Send all the requests. */ +- struct ofpbuf *b, *next; +- LIST_FOR_EACH_SAFE (b, next, list_node, requests) { ++ struct ofpbuf *b; ++ LIST_FOR_EACH_SAFE (b, list_node, requests) { + ovs_list_remove(&b->list_node); + int error = vconn_send_block(vconn, b); + if (error) { +diff --git a/ofproto/bond.c b/ofproto/bond.c +index cdfdf0b9d8..845f69e21d 100644 +--- a/ofproto/bond.c ++++ b/ofproto/bond.c +@@ -338,7 +338,7 @@ static void + update_recirc_rules__(struct bond *bond) + { + struct match match; +- struct bond_pr_rule_op *pr_op, *next_op; ++ struct bond_pr_rule_op *pr_op; + uint64_t ofpacts_stub[128 / 8]; + struct ofpbuf ofpacts; + int i; +@@ -372,7 +372,7 @@ update_recirc_rules__(struct bond *bond) + + ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); + +- HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) { ++ HMAP_FOR_EACH_SAFE (pr_op, hmap_node, &bond->pr_rule_ops) { + int error; + switch (pr_op->op) { + case ADD: +@@ -1258,7 +1258,7 @@ insert_bal(struct ovs_list *bals, struct bond_member *member) + break; + } + } +- ovs_list_insert(&pos->bal_node, &member->bal_node); ++ ovs_list_insert(pos ? &pos->bal_node : bals, &member->bal_node); + } + + /* Removes 'member' from its current list and then inserts it into 'bals' so +diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c +index fa8f6cd0e8..172a58cfb7 100644 +--- a/ofproto/connmgr.c ++++ b/ofproto/connmgr.c +@@ -310,8 +310,8 @@ connmgr_destroy(struct connmgr *mgr) + return; + } + +- struct ofservice *ofservice, *next_ofservice; +- HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, hmap_node, &mgr->services) { ++ struct ofservice *ofservice; ++ HMAP_FOR_EACH_SAFE (ofservice, hmap_node, &mgr->services) { + ofservice_destroy(ofservice); + } + hmap_destroy(&mgr->services); +@@ -351,8 +351,8 @@ connmgr_run(struct connmgr *mgr, + } + } + +- struct ofconn *ofconn, *next_ofconn; +- LIST_FOR_EACH_SAFE (ofconn, next_ofconn, connmgr_node, &mgr->conns) { ++ struct ofconn *ofconn; ++ LIST_FOR_EACH_SAFE (ofconn, connmgr_node, &mgr->conns) { + ofconn_run(ofconn, handle_openflow); + } + ofmonitor_run(mgr); +@@ -592,8 +592,8 @@ connmgr_set_controllers(struct connmgr *mgr, struct shash *controllers) + + /* Delete services that are no longer configured. + * Update configuration of all now-existing services. */ +- struct ofservice *ofservice, *next_ofservice; +- HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, hmap_node, &mgr->services) { ++ struct ofservice *ofservice; ++ HMAP_FOR_EACH_SAFE (ofservice, hmap_node, &mgr->services) { + const char *target = ofservice->target; + struct ofproto_controller *c = shash_find_data(controllers, target); + if (!c) { +@@ -1137,9 +1137,9 @@ ofconn_remove_bundle(struct ofconn *ofconn, struct ofp_bundle *bundle) + static void + bundle_remove_all(struct ofconn *ofconn) + { +- struct ofp_bundle *b, *next; ++ struct ofp_bundle *b; + +- HMAP_FOR_EACH_SAFE (b, next, node, &ofconn->bundles) { ++ HMAP_FOR_EACH_SAFE (b, node, &ofconn->bundles) { + ofp_bundle_remove__(ofconn, b); + } + } +@@ -1149,8 +1149,8 @@ bundle_remove_expired(struct ofconn *ofconn, long long int now) + { + long long int limit = now - bundle_idle_timeout; + +- struct ofp_bundle *b, *next; +- HMAP_FOR_EACH_SAFE (b, next, node, &ofconn->bundles) { ++ struct ofp_bundle *b; ++ HMAP_FOR_EACH_SAFE (b, node, &ofconn->bundles) { + if (b->used <= limit) { + ofconn_send_error(ofconn, b->msg, OFPERR_OFPBFC_TIMEOUT); + ofp_bundle_remove__(ofconn, b); +@@ -1247,8 +1247,8 @@ ofconn_destroy(struct ofconn *ofconn) + + free(ofconn->async_cfg); + +- struct ofmonitor *monitor, *next_monitor; +- HMAP_FOR_EACH_SAFE (monitor, next_monitor, ofconn_node, ++ struct ofmonitor *monitor; ++ HMAP_FOR_EACH_SAFE (monitor, ofconn_node, + &ofconn->monitors) { + ofmonitor_destroy(monitor); + } +@@ -1953,8 +1953,8 @@ static void + ofservice_close_all(struct ofservice *ofservice) + OVS_REQUIRES(ofproto_mutex) + { +- struct ofconn *ofconn, *next; +- LIST_FOR_EACH_SAFE (ofconn, next, ofservice_node, &ofservice->conns) { ++ struct ofconn *ofconn; ++ LIST_FOR_EACH_SAFE (ofconn, ofservice_node, &ofservice->conns) { + ofconn_destroy(ofconn); + } + } +diff --git a/ofproto/in-band.c b/ofproto/in-band.c +index 82d8dfa147..3992251f5f 100644 +--- a/ofproto/in-band.c ++++ b/ofproto/in-band.c +@@ -377,7 +377,7 @@ in_band_run(struct in_band *ib) + uint64_t ofpacts_stub[128 / 8]; + struct ofpbuf ofpacts; + +- struct in_band_rule *rule, *next; ++ struct in_band_rule *rule; + + ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); + +@@ -391,7 +391,7 @@ in_band_run(struct in_band *ib) + + update_rules(ib); + +- HMAP_FOR_EACH_SAFE (rule, next, hmap_node, &ib->rules) { ++ HMAP_FOR_EACH_SAFE (rule, hmap_node, &ib->rules) { + switch (rule->op) { + case ADD: + ofproto_add_flow(ib->ofproto, &rule->match, rule->priority, +diff --git a/ofproto/netflow.c b/ofproto/netflow.c +index ed58de17de..aad9f9c77a 100644 +--- a/ofproto/netflow.c ++++ b/ofproto/netflow.c +@@ -299,7 +299,7 @@ static void + netflow_run__(struct netflow *nf) OVS_REQUIRES(mutex) + { + long long int now = time_msec(); +- struct netflow_flow *nf_flow, *next; ++ struct netflow_flow *nf_flow; + + if (nf->packet.size) { + collectors_send(nf->collectors, nf->packet.data, nf->packet.size); +@@ -312,7 +312,7 @@ netflow_run__(struct netflow *nf) OVS_REQUIRES(mutex) + + nf->next_timeout = now + 1000; + +- HMAP_FOR_EACH_SAFE (nf_flow, next, hmap_node, &nf->flows) { ++ HMAP_FOR_EACH_SAFE (nf_flow, hmap_node, &nf->flows) { + if (now > nf_flow->last_expired + nf->active_timeout) { + bool idle = nf_flow->used < nf_flow->last_expired; + netflow_expire__(nf, nf_flow); +@@ -416,8 +416,8 @@ netflow_unref(struct netflow *nf) + collectors_destroy(nf->collectors); + ofpbuf_uninit(&nf->packet); + +- struct netflow_flow *nf_flow, *next; +- HMAP_FOR_EACH_SAFE (nf_flow, next, hmap_node, &nf->flows) { ++ struct netflow_flow *nf_flow; ++ HMAP_FOR_EACH_SAFE (nf_flow, hmap_node, &nf->flows) { + hmap_remove(&nf->flows, &nf_flow->hmap_node); + free(nf_flow); + } +diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c +index 9280e008ea..fc927fe866 100644 +--- a/ofproto/ofproto-dpif-ipfix.c ++++ b/ofproto/ofproto-dpif-ipfix.c +@@ -1078,7 +1078,7 @@ dpif_ipfix_set_options( + { + int i; + struct ofproto_ipfix_flow_exporter_options *options; +- struct dpif_ipfix_flow_exporter_map_node *node, *next; ++ struct dpif_ipfix_flow_exporter_map_node *node; + + ovs_mutex_lock(&mutex); + dpif_ipfix_bridge_exporter_set_options(&di->bridge_exporter, +@@ -1103,7 +1103,7 @@ dpif_ipfix_set_options( + } + + /* Remove dropped flow exporters, if any needs to be removed. */ +- HMAP_FOR_EACH_SAFE (node, next, node, &di->flow_exporter_map) { ++ HMAP_FOR_EACH_SAFE (node, node, &di->flow_exporter_map) { + /* This is slow but doesn't take any extra memory, and + * this table is not supposed to contain many rows anyway. */ + options = (struct ofproto_ipfix_flow_exporter_options *) +@@ -1215,7 +1215,7 @@ static void + dpif_ipfix_clear(struct dpif_ipfix *di) OVS_REQUIRES(mutex) + { + struct dpif_ipfix_flow_exporter_map_node *exp_node; +- struct dpif_ipfix_port *dip, *next; ++ struct dpif_ipfix_port *dip; + + dpif_ipfix_bridge_exporter_clear(&di->bridge_exporter); + +@@ -1224,7 +1224,7 @@ dpif_ipfix_clear(struct dpif_ipfix *di) OVS_REQUIRES(mutex) + free(exp_node); + } + +- HMAP_FOR_EACH_SAFE (dip, next, hmap_node, &di->ports) { ++ HMAP_FOR_EACH_SAFE (dip, hmap_node, &di->ports) { + dpif_ipfix_del_port__(di, dip); + } + } +@@ -2799,7 +2799,7 @@ dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter, + bool forced_end, const uint64_t export_time_usec, + const uint32_t export_time_sec) + { +- struct ipfix_flow_cache_entry *entry, *next_entry; ++ struct ipfix_flow_cache_entry *entry; + uint64_t max_flow_start_timestamp_usec; + bool template_msg_sent = false; + enum ipfix_flow_end_reason flow_end_reason; +@@ -2811,7 +2811,7 @@ dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter, + max_flow_start_timestamp_usec = export_time_usec - + 1000000LL * exporter->cache_active_timeout; + +- LIST_FOR_EACH_SAFE (entry, next_entry, cache_flow_start_timestamp_list_node, ++ LIST_FOR_EACH_SAFE (entry, cache_flow_start_timestamp_list_node, + &exporter->cache_flow_start_timestamp_list) { + if (forced_end) { + flow_end_reason = FORCED_END; +diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c +index 30e7caf54a..e8e1de920b 100644 +--- a/ofproto/ofproto-dpif-sflow.c ++++ b/ofproto/ofproto-dpif-sflow.c +@@ -468,7 +468,8 @@ sflow_choose_agent_address(const char *agent_device, + const char *target; + SSET_FOR_EACH (target, targets) { + struct sockaddr_storage ss; +- if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss, true)) { ++ if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, ++ &ss, true, NULL)) { + /* sFlow only supports target in default routing table with + * packet mark zero. + */ +@@ -590,10 +591,10 @@ void + dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex) + { + if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) { +- struct dpif_sflow_port *dsp, *next; ++ struct dpif_sflow_port *dsp; + + dpif_sflow_clear(ds); +- HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) { ++ HMAP_FOR_EACH_SAFE (dsp, hmap_node, &ds->ports) { + dpif_sflow_del_port__(ds, dsp); + } + hmap_destroy(&ds->ports); +diff --git a/ofproto/ofproto-dpif-trace.c b/ofproto/ofproto-dpif-trace.c +index 78a54c715d..109940ad2a 100644 +--- a/ofproto/ofproto-dpif-trace.c ++++ b/ofproto/ofproto-dpif-trace.c +@@ -65,8 +65,8 @@ static void + oftrace_node_list_destroy(struct ovs_list *nodes) + { + if (nodes) { +- struct oftrace_node *node, *next; +- LIST_FOR_EACH_SAFE (node, next, node, nodes) { ++ struct oftrace_node *node; ++ LIST_FOR_EACH_SAFE (node, node, nodes) { + ovs_list_remove(&node->node); + oftrace_node_destroy(node); + } +diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c +index dcc91cb380..9224ee2e6d 100644 +--- a/ofproto/ofproto-dpif-xlate-cache.c ++++ b/ofproto/ofproto-dpif-xlate-cache.c +@@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) + { + switch (entry->type) { + case XC_TABLE: ++ ofproto_unref(&(entry->table.ofproto->up)); + break; + case XC_RULE: + ofproto_rule_unref(&entry->rule->up); +@@ -231,6 +232,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) + free(entry->learn.ofm); + break; + case XC_NORMAL: ++ ofproto_unref(&(entry->normal.ofproto->up)); + break; + case XC_FIN_TIMEOUT: + /* 'u.fin.rule' is always already held as a XC_RULE, which +diff --git a/ofproto/ofproto-dpif-xlate-cache.h b/ofproto/ofproto-dpif-xlate-cache.h +index 114aff8ea3..0fc6d2ea60 100644 +--- a/ofproto/ofproto-dpif-xlate-cache.h ++++ b/ofproto/ofproto-dpif-xlate-cache.h +@@ -61,9 +61,8 @@ enum xc_type { + * that a flow relates to, although they may be used for other effects as well + * (for instance, refreshing hard timeouts for learned flows). + * +- * An explicit reference is taken to all pointers other than the ones for +- * struct ofproto_dpif. ofproto_dpif pointers are explicitly protected by +- * destroying all xlate caches before the ofproto is destroyed. */ ++ * An explicit reference is taken to all pointers. ++ */ + struct xc_entry { + enum xc_type type; + union { +diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c +index 578cbfe581..17f7e2883f 100644 +--- a/ofproto/ofproto-dpif-xlate.c ++++ b/ofproto/ofproto-dpif-xlate.c +@@ -865,7 +865,7 @@ xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge) + ovs_list_init(&xbridge->xbundles); + hmap_init(&xbridge->xports); + hmap_insert(&xcfg->xbridges, &xbridge->hmap_node, +- hash_pointer(xbridge->ofproto, 0)); ++ uuid_hash(&xbridge->ofproto->uuid)); + } + + static void +@@ -1222,13 +1222,13 @@ xlate_txn_start(void) + static void + xlate_xcfg_free(struct xlate_cfg *xcfg) + { +- struct xbridge *xbridge, *next_xbridge; ++ struct xbridge *xbridge; + + if (!xcfg) { + return; + } + +- HMAP_FOR_EACH_SAFE (xbridge, next_xbridge, hmap_node, &xcfg->xbridges) { ++ HMAP_FOR_EACH_SAFE (xbridge, hmap_node, &xcfg->xbridges) { + xlate_xbridge_remove(xcfg, xbridge); + } + +@@ -1282,18 +1282,18 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name, + static void + xlate_xbridge_remove(struct xlate_cfg *xcfg, struct xbridge *xbridge) + { +- struct xbundle *xbundle, *next_xbundle; +- struct xport *xport, *next_xport; ++ struct xbundle *xbundle; ++ struct xport *xport; + + if (!xbridge) { + return; + } + +- HMAP_FOR_EACH_SAFE (xport, next_xport, ofp_node, &xbridge->xports) { ++ HMAP_FOR_EACH_SAFE (xport, ofp_node, &xbridge->xports) { + xlate_xport_remove(xcfg, xport); + } + +- LIST_FOR_EACH_SAFE (xbundle, next_xbundle, list_node, &xbridge->xbundles) { ++ LIST_FOR_EACH_SAFE (xbundle, list_node, &xbridge->xbundles) { + xlate_xbundle_remove(xcfg, xbundle); + } + +@@ -1639,7 +1639,7 @@ xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto) + + xbridges = &xcfg->xbridges; + +- HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0), ++ HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, uuid_hash(&ofproto->uuid), + xbridges) { + if (xbridge->ofproto == ofproto) { + return xbridge; +@@ -1661,6 +1661,23 @@ xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid) + return NULL; + } + ++struct ofproto_dpif * ++xlate_ofproto_lookup(const struct uuid *uuid) ++{ ++ struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); ++ struct xbridge *xbridge; ++ ++ if (!xcfg) { ++ return NULL; ++ } ++ ++ xbridge = xbridge_lookup_by_uuid(xcfg, uuid); ++ if (xbridge != NULL) { ++ return xbridge->ofproto; ++ } ++ return NULL; ++} ++ + static struct xbundle * + xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle) + { +@@ -3015,7 +3032,7 @@ xlate_normal(struct xlate_ctx *ctx) + bool is_grat_arp = is_gratuitous_arp(flow, wc); + if (ctx->xin->allow_side_effects + && flow->packet_type == htonl(PT_ETH) +- && in_port->pt_mode != NETDEV_PT_LEGACY_L3 ++ && in_port && in_port->pt_mode != NETDEV_PT_LEGACY_L3 + ) { + update_learning_table(ctx, in_xbundle, flow->dl_src, vlan, + is_grat_arp); +@@ -3024,12 +3041,14 @@ xlate_normal(struct xlate_ctx *ctx) + struct xc_entry *entry; + + /* Save just enough info to update mac learning table later. */ +- entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL); +- entry->normal.ofproto = ctx->xbridge->ofproto; +- entry->normal.in_port = flow->in_port.ofp_port; +- entry->normal.dl_src = flow->dl_src; +- entry->normal.vlan = vlan; +- entry->normal.is_gratuitous_arp = is_grat_arp; ++ if (ofproto_try_ref(&ctx->xbridge->ofproto->up)) { ++ entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL); ++ entry->normal.ofproto = ctx->xbridge->ofproto; ++ entry->normal.in_port = flow->in_port.ofp_port; ++ entry->normal.dl_src = flow->dl_src; ++ entry->normal.vlan = vlan; ++ entry->normal.is_gratuitous_arp = is_grat_arp; ++ } + } + + /* Determine output bundle. */ +@@ -3523,6 +3542,9 @@ propagate_tunnel_data_to_flow__(struct flow *dst_flow, + dst_flow->dl_dst = dmac; + dst_flow->dl_src = smac; + ++ /* Clear VLAN entries which do not apply for tunnel flows. */ ++ memset(dst_flow->vlans, 0, sizeof dst_flow->vlans); ++ + dst_flow->packet_type = htonl(PT_ETH); + dst_flow->nw_dst = src_flow->tunnel.ip_dst; + dst_flow->nw_src = src_flow->tunnel.ip_src; +@@ -4176,6 +4198,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, + if (xport->pt_mode == NETDEV_PT_LEGACY_L3) { + flow->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE, + ntohs(flow->dl_type)); ++ if (ctx->pending_encap) { ++ /* The Ethernet header was not actually added yet. */ ++ ctx->pending_encap = false; ++ } + } + } + +diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h +index 851088d794..2ba90e999c 100644 +--- a/ofproto/ofproto-dpif-xlate.h ++++ b/ofproto/ofproto-dpif-xlate.h +@@ -176,6 +176,7 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name, struct dpif *, + bool forward_bpdu, bool has_in_band, + const struct dpif_backer_support *support); + void xlate_remove_ofproto(struct ofproto_dpif *); ++struct ofproto_dpif *xlate_ofproto_lookup(const struct uuid *uuid); + + void xlate_bundle_set(struct ofproto_dpif *, struct ofbundle *, + const char *name, enum port_vlan_mode, +diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c +index 8143dd965f..6601f23464 100644 +--- a/ofproto/ofproto-dpif.c ++++ b/ofproto/ofproto-dpif.c +@@ -215,10 +215,6 @@ struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers); + static struct hmap all_ofproto_dpifs_by_name = + HMAP_INITIALIZER(&all_ofproto_dpifs_by_name); + +-/* All existing ofproto_dpif instances, indexed by ->uuid. */ +-static struct hmap all_ofproto_dpifs_by_uuid = +- HMAP_INITIALIZER(&all_ofproto_dpifs_by_uuid); +- + static bool ofproto_use_tnl_push_pop = true; + static void ofproto_unixctl_init(void); + static void ct_zone_config_init(struct dpif_backer *backer); +@@ -1663,7 +1659,7 @@ static int + construct(struct ofproto *ofproto_) + { + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); +- struct shash_node *node, *next; ++ struct shash_node *node; + int error; + + /* Tunnel module can get used right after the udpif threads are running. */ +@@ -1701,7 +1697,7 @@ construct(struct ofproto *ofproto_) + ofproto->ams_seqno = seq_read(ofproto->ams_seq); + + +- SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) { ++ SHASH_FOR_EACH_SAFE (node, &init_ofp_ports) { + struct iface_hint *iface_hint = node->data; + + if (!strcmp(iface_hint->br_name, ofproto->up.name)) { +@@ -1720,9 +1716,6 @@ construct(struct ofproto *ofproto_) + hmap_insert(&all_ofproto_dpifs_by_name, + &ofproto->all_ofproto_dpifs_by_name_node, + hash_string(ofproto->up.name, 0)); +- hmap_insert(&all_ofproto_dpifs_by_uuid, +- &ofproto->all_ofproto_dpifs_by_uuid_node, +- uuid_hash(&ofproto->uuid)); + memset(&ofproto->stats, 0, sizeof ofproto->stats); + + ofproto_init_tables(ofproto_, N_TABLES); +@@ -1820,8 +1813,6 @@ destruct(struct ofproto *ofproto_, bool del) + + hmap_remove(&all_ofproto_dpifs_by_name, + &ofproto->all_ofproto_dpifs_by_name_node); +- hmap_remove(&all_ofproto_dpifs_by_uuid, +- &ofproto->all_ofproto_dpifs_by_uuid_node); + + OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { + CLS_FOR_EACH (rule, up.cr, &table->cls) { +@@ -1945,7 +1936,7 @@ run(struct ofproto *ofproto_) + + new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif)); + if (ofproto->dump_seq != new_dump_seq) { +- struct rule *rule, *next_rule; ++ struct rule *rule; + long long now = time_msec(); + + /* We know stats are relatively fresh, so now is a good time to do some +@@ -1955,7 +1946,7 @@ run(struct ofproto *ofproto_) + /* Expire OpenFlow flows whose idle_timeout or hard_timeout + * has passed. */ + ovs_mutex_lock(&ofproto_mutex); +- LIST_FOR_EACH_SAFE (rule, next_rule, expirable, ++ LIST_FOR_EACH_SAFE (rule, expirable, + &ofproto->up.expirable) { + rule_expire(rule_dpif_cast(rule), now); + } +@@ -2371,6 +2362,12 @@ set_ipfix( + dpif_ipfix_unref(di); + ofproto->ipfix = NULL; + } ++ ++ /* TODO: need to consider ipfix option changes more than ++ * enable/disable */ ++ if (new_di || !ofproto->ipfix) { ++ ofproto->backer->need_revalidate = REV_RECONFIGURE; ++ } + } + + return 0; +@@ -3106,11 +3103,11 @@ bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos) + { + struct ofproto_dpif *ofproto = bundle->ofproto; + struct mac_learning *ml = ofproto->ml; +- struct mac_entry *mac, *next_mac; ++ struct mac_entry *mac; + + ofproto->backer->need_revalidate = REV_RECONFIGURE; + ovs_rwlock_wrlock(&ml->rwlock); +- LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) { ++ LIST_FOR_EACH_SAFE (mac, lru_node, &ml->lrus) { + if (mac_entry_get_port(ml, mac) == bundle) { + if (all_ofprotos) { + struct ofproto_dpif *o; +@@ -3141,13 +3138,13 @@ bundle_move(struct ofbundle *old, struct ofbundle *new) + { + struct ofproto_dpif *ofproto = old->ofproto; + struct mac_learning *ml = ofproto->ml; +- struct mac_entry *mac, *next_mac; ++ struct mac_entry *mac; + + ovs_assert(new->ofproto == old->ofproto); + + ofproto->backer->need_revalidate = REV_RECONFIGURE; + ovs_rwlock_wrlock(&ml->rwlock); +- LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) { ++ LIST_FOR_EACH_SAFE (mac, lru_node, &ml->lrus) { + if (mac_entry_get_port(ml, mac) == old) { + mac_entry_set_port(ml, mac, new); + } +@@ -3244,7 +3241,7 @@ static void + bundle_destroy(struct ofbundle *bundle) + { + struct ofproto_dpif *ofproto; +- struct ofport_dpif *port, *next_port; ++ struct ofport_dpif *port; + + if (!bundle) { + return; +@@ -3257,7 +3254,7 @@ bundle_destroy(struct ofbundle *bundle) + xlate_bundle_remove(bundle); + xlate_txn_commit(); + +- LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { ++ LIST_FOR_EACH_SAFE (port, bundle_node, &bundle->ports) { + bundle_del_port(port); + } + +@@ -3347,9 +3344,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, + } + } + if (!ok || ovs_list_size(&bundle->ports) != s->n_members) { +- struct ofport_dpif *next_port; +- +- LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { ++ LIST_FOR_EACH_SAFE (port, bundle_node, &bundle->ports) { + for (i = 0; i < s->n_members; i++) { + if (s->members[i] == port->up.ofp_port) { + goto found; +@@ -3963,6 +3958,10 @@ port_add(struct ofproto *ofproto_, struct netdev *netdev) + simap_put(&ofproto->backer->tnl_backers, + dp_port_name, odp_to_u32(port_no)); + } ++ } else { ++ struct dpif *dpif = ofproto->backer->dpif; ++ const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif)); ++ netdev_set_dpif_type(netdev, dpif_type_str); + } + + if (netdev_get_tunnel_config(netdev)) { +@@ -4471,12 +4470,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, + atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig); + } + if (xcache) { +- struct xc_entry *entry; ++ if (ofproto_try_ref(&ofproto->up)) { ++ struct xc_entry *entry; + +- entry = xlate_cache_add_entry(xcache, XC_TABLE); +- entry->table.ofproto = ofproto; +- entry->table.id = *table_id; +- entry->table.match = true; ++ entry = xlate_cache_add_entry(xcache, XC_TABLE); ++ entry->table.ofproto = ofproto; ++ entry->table.id = *table_id; ++ entry->table.match = true; ++ } + } + return rule; + } +@@ -4507,12 +4508,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, + stats->n_packets, &orig); + } + if (xcache) { +- struct xc_entry *entry; ++ if (ofproto_try_ref(&ofproto->up)) { ++ struct xc_entry *entry; + +- entry = xlate_cache_add_entry(xcache, XC_TABLE); +- entry->table.ofproto = ofproto; +- entry->table.id = next_id; +- entry->table.match = (rule != NULL); ++ entry = xlate_cache_add_entry(xcache, XC_TABLE); ++ entry->table.ofproto = ofproto; ++ entry->table.id = next_id; ++ entry->table.match = (rule != NULL); ++ } + } + if (rule) { + goto out; /* Match. */ +@@ -5550,9 +5553,9 @@ ct_zone_timeout_policy_sweep(struct dpif_backer *backer) + { + if (!ovs_list_is_empty(&backer->ct_tp_kill_list) + && time_msec() >= timeout_policy_cleanup_timer) { +- struct ct_timeout_policy *ct_tp, *next; ++ struct ct_timeout_policy *ct_tp; + +- LIST_FOR_EACH_SAFE (ct_tp, next, list_node, &backer->ct_tp_kill_list) { ++ LIST_FOR_EACH_SAFE (ct_tp, list_node, &backer->ct_tp_kill_list) { + if (!ct_dpif_del_timeout_policy(backer->dpif, ct_tp->tp_id)) { + ovs_list_remove(&ct_tp->list_node); + ct_timeout_policy_destroy(ct_tp, backer->tp_ids); +@@ -5818,15 +5821,7 @@ ofproto_dpif_lookup_by_name(const char *name) + struct ofproto_dpif * + ofproto_dpif_lookup_by_uuid(const struct uuid *uuid) + { +- struct ofproto_dpif *ofproto; +- +- HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node, +- uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) { +- if (uuid_equals(&ofproto->uuid, uuid)) { +- return ofproto; +- } +- } +- return NULL; ++ return xlate_ofproto_lookup(uuid); + } + + static void +diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h +index 14b909973d..47e96e62e1 100644 +--- a/ofproto/ofproto-provider.h ++++ b/ofproto/ofproto-provider.h +@@ -143,6 +143,8 @@ struct ofproto { + /* Variable length mf_field mapping. Stores all configured variable length + * meta-flow fields (struct mf_field) in a switch. */ + struct vl_mff_map vl_mff_map; ++ /* refcount to this ofproto, held by rule/group/xlate_caches */ ++ struct ovs_refcount refcount; + }; + + void ofproto_init_tables(struct ofproto *, int n_tables); +diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c +index 56aeac7209..2ed1078007 100644 +--- a/ofproto/ofproto.c ++++ b/ofproto/ofproto.c +@@ -549,6 +549,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, + + ovs_mutex_init(&ofproto->vl_mff_map.mutex); + cmap_init(&ofproto->vl_mff_map.cmap); ++ ovs_refcount_init(&ofproto->refcount); + + error = ofproto->ofproto_class->construct(ofproto); + if (error) { +@@ -1695,9 +1696,33 @@ ofproto_destroy__(struct ofproto *ofproto) + ofproto->ofproto_class->dealloc(ofproto); + } + +-/* Destroying rules is doubly deferred, must have 'ofproto' around for them. +- * - 1st we defer the removal of the rules from the classifier +- * - 2nd we defer the actual destruction of the rules. */ ++/* ++ * Rule destruction requires ofproto to remain accessible. ++ * Depending on the rule destruction call (shown in below), it can take several ++ * RCU grace periods before the ofproto reference is not needed anymore. ++ * The ofproto destruction callback is thus protected by a refcount, ++ * and such destruction is itself deferred. ++ * ++ * remove_rules_postponed (one grace period) ++ * -> remove_rule_rcu ++ * -> remove_rule_rcu__ ++ * -> ofproto_rule_unref -> ref count != 1 ++ * -> ... more grace periods. ++ * -> rule_destroy_cb (> 2 grace periods) ++ * -> free ++ * ++ * NOTE: The original ofproto destruction is only deferred by two grace ++ * periods to keep ofproto accessible. By using refcount together the ++ * destruction can be deferred for longer time. Now ofproto has 3 states: ++ * ++ * state 1: alive, with refcount >= 1 ++ * state 2: dying, with refcount == 0, however pointer is valid ++ * state 3: died, memory freed, pointer might be dangling. ++ * ++ * We only need to add refcount to certain objects whose destruction can ++ * take several RCU grace periods (rule, group, xlate_cache). Other ++ * references to ofproto must be cleared before the 2 RCU grace periods. ++ */ + static void + ofproto_destroy_defer__(struct ofproto *ofproto) + OVS_EXCLUDED(ofproto_mutex) +@@ -1705,11 +1730,31 @@ ofproto_destroy_defer__(struct ofproto *ofproto) + ovsrcu_postpone(ofproto_destroy__, ofproto); + } + ++void ++ofproto_ref(struct ofproto *ofproto) ++{ ++ ovs_refcount_ref(&ofproto->refcount); ++} ++ ++bool ++ofproto_try_ref(struct ofproto *ofproto) ++{ ++ return ovs_refcount_try_ref_rcu(&ofproto->refcount); ++} ++ ++void ++ofproto_unref(struct ofproto *ofproto) ++{ ++ if (ofproto && ovs_refcount_unref(&ofproto->refcount) == 1) { ++ ovsrcu_postpone(ofproto_destroy_defer__, ofproto); ++ } ++} ++ + void + ofproto_destroy(struct ofproto *p, bool del) + OVS_EXCLUDED(ofproto_mutex) + { +- struct ofport *ofport, *next_ofport; ++ struct ofport *ofport; + struct ofport_usage *usage; + + if (!p) { +@@ -1717,7 +1762,7 @@ ofproto_destroy(struct ofproto *p, bool del) + } + + ofproto_flush__(p, del); +- HMAP_FOR_EACH_SAFE (ofport, next_ofport, hmap_node, &p->ports) { ++ HMAP_FOR_EACH_SAFE (ofport, hmap_node, &p->ports) { + ofport_destroy(ofport, del); + } + +@@ -1736,8 +1781,7 @@ ofproto_destroy(struct ofproto *p, bool del) + p->connmgr = NULL; + ovs_mutex_unlock(&ofproto_mutex); + +- /* Destroying rules is deferred, must have 'ofproto' around for them. */ +- ovsrcu_postpone(ofproto_destroy_defer__, p); ++ ofproto_unref(p); + } + + /* Destroys the datapath with the respective 'name' and 'type'. With the Linux +@@ -2782,7 +2826,7 @@ init_ports(struct ofproto *p) + { + struct ofproto_port_dump dump; + struct ofproto_port ofproto_port; +- struct shash_node *node, *next; ++ struct shash_node *node; + + OFPROTO_PORT_FOR_EACH (&ofproto_port, &dump, p) { + const char *name = ofproto_port.name; +@@ -2813,7 +2857,7 @@ init_ports(struct ofproto *p) + } + } + +- SHASH_FOR_EACH_SAFE(node, next, &init_ofp_ports) { ++ SHASH_FOR_EACH_SAFE (node, &init_ofp_ports) { + struct iface_hint *iface_hint = node->data; + + if (!strcmp(iface_hint->br_name, p->name)) { +@@ -2929,6 +2973,9 @@ ofproto_rule_destroy__(struct rule *rule) + cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr)); + rule_actions_destroy(rule_get_actions(rule)); + ovs_mutex_destroy(&rule->mutex); ++ /* ofproto_unref() must be called first. It is possible because ofproto ++ * destruction is deferred by an RCU grace period. */ ++ ofproto_unref(rule->ofproto); + rule->ofproto->ofproto_class->rule_dealloc(rule); + } + +@@ -3069,6 +3116,9 @@ group_destroy_cb(struct ofgroup *group) + &group->props)); + ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *, + &group->buckets)); ++ /* ofproto_unref() must be called first. It is possible because ofproto ++ * destruction is deferred by an RCU grace period. */ ++ ofproto_unref(group->ofproto); + group->ofproto->ofproto_class->group_dealloc(group); + } + +@@ -5271,10 +5321,15 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr, + struct rule *rule; + enum ofperr error; + ++ if (!ofproto_try_ref(ofproto)) { ++ return OFPERR_OFPFMFC_UNKNOWN; ++ } ++ + /* Allocate new rule. */ + rule = ofproto->ofproto_class->rule_alloc(); + if (!rule) { + cls_rule_destroy(cr); ++ ofproto_unref(ofproto); + VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); + return OFPERR_OFPFMFC_UNKNOWN; + } +@@ -6797,9 +6852,9 @@ static void + meter_delete_all(struct ofproto *ofproto) + OVS_REQUIRES(ofproto_mutex) + { +- struct meter *meter, *next; ++ struct meter *meter; + +- HMAP_FOR_EACH_SAFE (meter, next, node, &ofproto->meters) { ++ HMAP_FOR_EACH_SAFE (meter, node, &ofproto->meters) { + hmap_remove(&ofproto->meters, &meter->node); + meter_destroy(ofproto, meter); + } +@@ -7339,8 +7394,13 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm, + return OFPERR_OFPGMFC_BAD_TYPE; + } + ++ if (!ofproto_try_ref(ofproto)) { ++ return OFPERR_OFPFMFC_UNKNOWN; ++ } ++ + *ofgroup = ofproto->ofproto_class->group_alloc(); + if (!*ofgroup) { ++ ofproto_unref(ofproto); + VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name); + return OFPERR_OFPGMFC_OUT_OF_GROUPS; + } +@@ -7377,6 +7437,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm, + &(*ofgroup)->props)); + ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *, + &(*ofgroup)->buckets)); ++ ofproto_unref(ofproto); + ofproto->ofproto_class->group_dealloc(*ofgroup); + } + return error; +@@ -9138,8 +9199,8 @@ oftable_configure_eviction(struct oftable *table, unsigned int eviction, + + /* Destroy existing eviction groups, then destroy and recreate data + * structures to recover memory. */ +- struct eviction_group *evg, *next; +- HMAP_FOR_EACH_SAFE (evg, next, id_node, &table->eviction_groups_by_id) { ++ struct eviction_group *evg; ++ HMAP_FOR_EACH_SAFE (evg, id_node, &table->eviction_groups_by_id) { + eviction_group_destroy(table, evg); + } + hmap_destroy(&table->eviction_groups_by_id); +diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h +index b0262da2df..4e15167ab7 100644 +--- a/ofproto/ofproto.h ++++ b/ofproto/ofproto.h +@@ -563,6 +563,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *, + enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *, + uint8_t table_id); + ++void ofproto_ref(struct ofproto *); ++void ofproto_unref(struct ofproto *); ++bool ofproto_try_ref(struct ofproto *); ++ + #ifdef __cplusplus + } + #endif +diff --git a/ovsdb/condition.c b/ovsdb/condition.c +index 388dd54a16..9aa3788dbb 100644 +--- a/ovsdb/condition.c ++++ b/ovsdb/condition.c +@@ -220,13 +220,13 @@ ovsdb_condition_optimize(struct ovsdb_condition *cnd) + static void + ovsdb_condition_optimize_destroy(struct ovsdb_condition *cnd) + { +- struct shash_node *node, *next; ++ struct shash_node *node; + +- SHASH_FOR_EACH_SAFE (node, next, &cnd->o_columns) { ++ SHASH_FOR_EACH_SAFE (node, &cnd->o_columns) { + struct ovsdb_o_column *o_column = node->data; +- struct ovsdb_o_clause *c, *c_next; ++ struct ovsdb_o_clause *c; + +- HMAP_FOR_EACH_SAFE(c, c_next, hmap_node, &o_column->o_clauses) { ++ HMAP_FOR_EACH_SAFE (c, hmap_node, &o_column->o_clauses) { + hmap_remove(&o_column->o_clauses, &c->hmap_node); + free(c); + } +diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c +index 351c39d8aa..916a1f414e 100644 +--- a/ovsdb/jsonrpc-server.c ++++ b/ovsdb/jsonrpc-server.c +@@ -197,9 +197,9 @@ ovsdb_jsonrpc_server_remove_db(struct ovsdb_jsonrpc_server *svr, + void + ovsdb_jsonrpc_server_destroy(struct ovsdb_jsonrpc_server *svr) + { +- struct shash_node *node, *next; ++ struct shash_node *node; + +- SHASH_FOR_EACH_SAFE (node, next, &svr->remotes) { ++ SHASH_FOR_EACH_SAFE (node, &svr->remotes) { + ovsdb_jsonrpc_server_del_remote(node); + } + shash_destroy(&svr->remotes); +@@ -227,9 +227,9 @@ void + ovsdb_jsonrpc_server_set_remotes(struct ovsdb_jsonrpc_server *svr, + const struct shash *new_remotes) + { +- struct shash_node *node, *next; ++ struct shash_node *node; + +- SHASH_FOR_EACH_SAFE (node, next, &svr->remotes) { ++ SHASH_FOR_EACH_SAFE (node, &svr->remotes) { + struct ovsdb_jsonrpc_remote *remote = node->data; + struct ovsdb_jsonrpc_options *options + = shash_find_data(new_remotes, node->name); +@@ -585,9 +585,9 @@ ovsdb_jsonrpc_session_set_options(struct ovsdb_jsonrpc_session *session, + static void + ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote) + { +- struct ovsdb_jsonrpc_session *s, *next; ++ struct ovsdb_jsonrpc_session *s; + +- LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { ++ LIST_FOR_EACH_SAFE (s, node, &remote->sessions) { + int error = ovsdb_jsonrpc_session_run(s); + if (error) { + ovsdb_jsonrpc_session_close(s); +@@ -642,9 +642,9 @@ ovsdb_jsonrpc_session_get_memory_usage_all( + static void + ovsdb_jsonrpc_session_close_all(struct ovsdb_jsonrpc_remote *remote) + { +- struct ovsdb_jsonrpc_session *s, *next; ++ struct ovsdb_jsonrpc_session *s; + +- LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { ++ LIST_FOR_EACH_SAFE (s, node, &remote->sessions) { + ovsdb_jsonrpc_session_close(s); + } + } +@@ -660,9 +660,9 @@ static void + ovsdb_jsonrpc_session_reconnect_all(struct ovsdb_jsonrpc_remote *remote, + bool force, const char *comment) + { +- struct ovsdb_jsonrpc_session *s, *next; ++ struct ovsdb_jsonrpc_session *s; + +- LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { ++ LIST_FOR_EACH_SAFE (s, node, &remote->sessions) { + if (force || !s->db_change_aware) { + jsonrpc_session_force_reconnect(s->js); + if (comment && jsonrpc_session_is_connected(s->js)) { +@@ -909,9 +909,9 @@ error: + static void + ovsdb_jsonrpc_session_unlock_all(struct ovsdb_jsonrpc_session *s) + { +- struct ovsdb_lock_waiter *waiter, *next; ++ struct ovsdb_lock_waiter *waiter; + +- HMAP_FOR_EACH_SAFE (waiter, next, session_node, &s->up.waiters) { ++ HMAP_FOR_EACH_SAFE (waiter, session_node, &s->up.waiters) { + ovsdb_jsonrpc_session_unlock__(waiter); + } + } +@@ -1198,8 +1198,8 @@ static void + ovsdb_jsonrpc_trigger_remove__(struct ovsdb_jsonrpc_session *s, + struct ovsdb *db) + { +- struct ovsdb_jsonrpc_trigger *t, *next; +- HMAP_FOR_EACH_SAFE (t, next, hmap_node, &s->triggers) { ++ struct ovsdb_jsonrpc_trigger *t; ++ HMAP_FOR_EACH_SAFE (t, hmap_node, &s->triggers) { + if (!db || t->trigger.db == db) { + ovsdb_jsonrpc_trigger_complete(t); + } +@@ -1226,8 +1226,8 @@ ovsdb_jsonrpc_trigger_complete_all(struct ovsdb_jsonrpc_session *s) + static void + ovsdb_jsonrpc_trigger_complete_done(struct ovsdb_jsonrpc_session *s) + { +- struct ovsdb_jsonrpc_trigger *trigger, *next; +- LIST_FOR_EACH_SAFE (trigger, next, trigger.node, &s->up.completions) { ++ struct ovsdb_jsonrpc_trigger *trigger; ++ LIST_FOR_EACH_SAFE (trigger, trigger.node, &s->up.completions) { + ovsdb_jsonrpc_trigger_complete(trigger); + } + } +@@ -1688,8 +1688,8 @@ ovsdb_jsonrpc_monitor_preremove_db(struct ovsdb_jsonrpc_session *s, + { + ovs_assert(db); + +- struct ovsdb_jsonrpc_monitor *m, *next; +- HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) { ++ struct ovsdb_jsonrpc_monitor *m; ++ HMAP_FOR_EACH_SAFE (m, node, &s->monitors) { + if (m->db == db) { + ovsdb_jsonrpc_monitor_destroy(m, true); + } +@@ -1700,9 +1700,9 @@ ovsdb_jsonrpc_monitor_preremove_db(struct ovsdb_jsonrpc_session *s, + static void + ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) + { +- struct ovsdb_jsonrpc_monitor *m, *next; ++ struct ovsdb_jsonrpc_monitor *m; + +- HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) { ++ HMAP_FOR_EACH_SAFE (m, node, &s->monitors) { + ovsdb_jsonrpc_monitor_destroy(m, false); + } + } +diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c +index 0f222cc992..952fa902e4 100644 +--- a/ovsdb/monitor.c ++++ b/ovsdb/monitor.c +@@ -638,14 +638,14 @@ ovsdb_monitor_change_set_destroy(struct ovsdb_monitor_change_set *mcs) + { + ovs_list_remove(&mcs->list_node); + +- struct ovsdb_monitor_change_set_for_table *mcst, *next_mcst; +- LIST_FOR_EACH_SAFE (mcst, next_mcst, list_in_change_set, ++ struct ovsdb_monitor_change_set_for_table *mcst; ++ LIST_FOR_EACH_SAFE (mcst, list_in_change_set, + &mcs->change_set_for_tables) { + ovs_list_remove(&mcst->list_in_change_set); + ovs_list_remove(&mcst->list_in_mt); + +- struct ovsdb_monitor_row *row, *next; +- HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mcst->rows) { ++ struct ovsdb_monitor_row *row; ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &mcst->rows) { + hmap_remove(&mcst->rows, &row->hmap_node); + ovsdb_monitor_row_destroy(mcst->mt, row, mcst->n_columns); + } +@@ -700,13 +700,13 @@ void + ovsdb_monitor_session_condition_destroy( + struct ovsdb_monitor_session_condition *condition) + { +- struct shash_node *node, *next; ++ struct shash_node *node; + + if (!condition) { + return; + } + +- SHASH_FOR_EACH_SAFE (node, next, &condition->tables) { ++ SHASH_FOR_EACH_SAFE (node, &condition->tables) { + struct ovsdb_monitor_table_condition *mtc = node->data; + + ovsdb_condition_destroy(&mtc->new_condition); +@@ -1122,11 +1122,11 @@ ovsdb_monitor_compose_update( + json = NULL; + struct ovsdb_monitor_change_set_for_table *mcst; + LIST_FOR_EACH (mcst, list_in_change_set, &mcs->change_set_for_tables) { +- struct ovsdb_monitor_row *row, *next; ++ struct ovsdb_monitor_row *row; + struct json *table_json = NULL; + struct ovsdb_monitor_table *mt = mcst->mt; + +- HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mcst->rows) { ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &mcst->rows) { + struct json *row_json; + row_json = (*row_update)(mt, condition, OVSDB_MONITOR_ROW, row, + initial, changed, mcst->n_columns); +@@ -1711,8 +1711,8 @@ ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) + ovsdb_monitor_json_cache_flush(dbmon); + hmap_destroy(&dbmon->json_cache); + +- struct ovsdb_monitor_change_set *cs, *cs_next; +- LIST_FOR_EACH_SAFE (cs, cs_next, list_node, &dbmon->change_sets) { ++ struct ovsdb_monitor_change_set *cs; ++ LIST_FOR_EACH_SAFE (cs, list_node, &dbmon->change_sets) { + ovsdb_monitor_change_set_destroy(cs); + } + +@@ -1760,14 +1760,14 @@ ovsdb_monitors_commit(struct ovsdb *db, const struct ovsdb_txn *txn) + void + ovsdb_monitors_remove(struct ovsdb *db) + { +- struct ovsdb_monitor *m, *next_m; ++ struct ovsdb_monitor *m; + +- LIST_FOR_EACH_SAFE (m, next_m, list_node, &db->monitors) { +- struct jsonrpc_monitor_node *jm, *next_jm; ++ LIST_FOR_EACH_SAFE (m, list_node, &db->monitors) { ++ struct jsonrpc_monitor_node *jm; + + /* Delete all front-end monitors. Removing the last front-end monitor + * will also destroy the corresponding ovsdb_monitor. */ +- LIST_FOR_EACH_SAFE (jm, next_jm, node, &m->jsonrpc_monitors) { ++ LIST_FOR_EACH_SAFE (jm, node, &m->jsonrpc_monitors) { + ovsdb_jsonrpc_monitor_destroy(jm->jsonrpc_monitor, false); + } + } +@@ -1789,14 +1789,14 @@ ovsdb_monitor_get_memory_usage(struct simap *usage) + void + ovsdb_monitor_prereplace_db(struct ovsdb *db) + { +- struct ovsdb_monitor *m, *next_m; ++ struct ovsdb_monitor *m; + +- LIST_FOR_EACH_SAFE (m, next_m, list_node, &db->monitors) { +- struct jsonrpc_monitor_node *jm, *next_jm; ++ LIST_FOR_EACH_SAFE (m, list_node, &db->monitors) { ++ struct jsonrpc_monitor_node *jm; + + /* Delete all front-end monitors. Removing the last front-end monitor + * will also destroy the corresponding ovsdb_monitor. */ +- LIST_FOR_EACH_SAFE (jm, next_jm, node, &m->jsonrpc_monitors) { ++ LIST_FOR_EACH_SAFE (jm, node, &m->jsonrpc_monitors) { + ovsdb_jsonrpc_monitor_destroy(jm->jsonrpc_monitor, true); + } + } +diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in +index 10a70ae26f..13c5359395 100755 +--- a/ovsdb/ovsdb-idlc.in ++++ b/ovsdb/ovsdb-idlc.in +@@ -251,10 +251,18 @@ const struct %(s)s *%(s)s_table_first(const struct %(s)s_table *); + for ((ROW) = %(s)s_table_first(TABLE); \\ + (ROW); \\ + (ROW) = %(s)s_next(ROW)) +-#define %(S)s_TABLE_FOR_EACH_SAFE(ROW, NEXT, TABLE) \\ ++#define %(S)s_TABLE_FOR_EACH_SAFE_LONG(ROW, NEXT, TABLE) \\ + for ((ROW) = %(s)s_table_first(TABLE); \\ + (ROW) ? ((NEXT) = %(s)s_next(ROW), 1) : 0; \\ + (ROW) = (NEXT)) ++#define %(S)s_TABLE_FOR_EACH_SAFE_SHORT(ROW, TABLE) \\ ++ for (const struct %(s)s * ROW__next = ((ROW) = %(s)s_table_first(TABLE), NULL); \\ ++ (ROW) ? (ROW__next = %(s)s_next(ROW), 1) : (ROW__next = NULL, 0); \\ ++ (ROW) = ROW__next) ++#define %(S)s_TABLE_FOR_EACH_SAFE(...) \\ ++ OVERLOAD_SAFE_MACRO(%(S)s_TABLE_FOR_EACH_SAFE_LONG, \\ ++ %(S)s_TABLE_FOR_EACH_SAFE_SHORT, 3, __VA_ARGS__) ++ + + const struct %(s)s *%(s)s_get_for_uuid(const struct ovsdb_idl *, const struct uuid *); + const struct %(s)s *%(s)s_table_get_for_uuid(const struct %(s)s_table *, const struct uuid *); +@@ -264,10 +272,17 @@ const struct %(s)s *%(s)s_next(const struct %(s)s *); + for ((ROW) = %(s)s_first(IDL); \\ + (ROW); \\ + (ROW) = %(s)s_next(ROW)) +-#define %(S)s_FOR_EACH_SAFE(ROW, NEXT, IDL) \\ ++#define %(S)s_FOR_EACH_SAFE_LONG(ROW, NEXT, IDL) \\ + for ((ROW) = %(s)s_first(IDL); \\ + (ROW) ? ((NEXT) = %(s)s_next(ROW), 1) : 0; \\ + (ROW) = (NEXT)) ++#define %(S)s_FOR_EACH_SAFE_SHORT(ROW, IDL) \\ ++ for (const struct %(s)s * ROW__next = ((ROW) = %(s)s_first(IDL), NULL); \\ ++ (ROW) ? (ROW__next = %(s)s_next(ROW), 1) : (ROW__next = NULL, 0); \\ ++ (ROW) = ROW__next) ++#define %(S)s_FOR_EACH_SAFE(...) \\ ++ OVERLOAD_SAFE_MACRO(%(S)s_FOR_EACH_SAFE_LONG, \\ ++ %(S)s_FOR_EACH_SAFE_SHORT, 3, __VA_ARGS__) + + unsigned int %(s)s_get_seqno(const struct ovsdb_idl *); + unsigned int %(s)s_row_get_seqno(const struct %(s)s *row, enum ovsdb_idl_change change); +diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c +index 9fe90592ea..774416fc7a 100644 +--- a/ovsdb/ovsdb-server.c ++++ b/ovsdb/ovsdb-server.c +@@ -26,6 +26,7 @@ + #include "command-line.h" + #include "daemon.h" + #include "dirs.h" ++#include "dns-resolve.h" + #include "openvswitch/dynamic-string.h" + #include "fatal-signal.h" + #include "file.h" +@@ -228,8 +229,7 @@ main_loop(struct server_config *config, + + ovsdb_relay_run(); + +- struct shash_node *next; +- SHASH_FOR_EACH_SAFE (node, next, all_dbs) { ++ SHASH_FOR_EACH_SAFE (node, all_dbs) { + struct db *db = node->data; + ovsdb_txn_history_run(db->db); + ovsdb_storage_run(db->db->storage); +@@ -321,7 +321,7 @@ main(int argc, char *argv[]) + FILE *config_tmpfile; + struct server_config server_config; + struct shash all_dbs; +- struct shash_node *node, *next; ++ struct shash_node *node; + int replication_probe_interval = REPLICATION_DEFAULT_PROBE_INTERVAL; + + ovs_cmdl_proctitle_init(argc, argv); +@@ -329,6 +329,7 @@ main(int argc, char *argv[]) + service_start(&argc, &argv); + fatal_ignore_sigpipe(); + process_init(); ++ dns_resolve_init(true); + + bool active = false; + parse_options(argc, argv, &db_filenames, &remotes, &unixctl_path, +@@ -490,7 +491,7 @@ main(int argc, char *argv[]) + main_loop(&server_config, jsonrpc, &all_dbs, unixctl, &remotes, + run_process, &exiting, &is_backup); + +- SHASH_FOR_EACH_SAFE(node, next, &all_dbs) { ++ SHASH_FOR_EACH_SAFE (node, &all_dbs) { + struct db *db = node->data; + close_db(&server_config, db, NULL); + shash_delete(&all_dbs, node); +@@ -511,6 +512,7 @@ main(int argc, char *argv[]) + run_command, process_status_msg(status)); + } + } ++ dns_resolve_destroy(); + perf_counters_destroy(); + service_stop(); + return 0; +@@ -1240,8 +1242,8 @@ update_server_status(struct shash *all_dbs) + + /* Update rows for databases that still exist. + * Delete rows for databases that no longer exist. */ +- const struct ovsdb_row *row, *next_row; +- HMAP_FOR_EACH_SAFE (row, next_row, hmap_node, &database_table->rows) { ++ const struct ovsdb_row *row; ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &database_table->rows) { + const char *name; + ovsdb_util_read_string_column(row, "name", &name); + struct db *db = shash_find_data(all_dbs, name); +diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c +index d4a9e34cc4..df2e373c3c 100644 +--- a/ovsdb/ovsdb-tool.c ++++ b/ovsdb/ovsdb-tool.c +@@ -1579,15 +1579,14 @@ do_check_cluster(struct ovs_cmdl_context *ctx) + } + free(c.servers); + +- struct commit *next_commit; +- HMAP_FOR_EACH_SAFE (commit, next_commit, hmap_node, &c.commits) { ++ HMAP_FOR_EACH_SAFE (commit, hmap_node, &c.commits) { + hmap_remove(&c.commits, &commit->hmap_node); + free(commit); + } + hmap_destroy(&c.commits); + +- struct leader *leader, *next_leader; +- HMAP_FOR_EACH_SAFE (leader, next_leader, hmap_node, &c.leaders) { ++ struct leader *leader; ++ HMAP_FOR_EACH_SAFE (leader, hmap_node, &c.leaders) { + hmap_remove(&c.leaders, &leader->hmap_node); + free(leader); + } +diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c +index e6d866182c..91b4a01af8 100644 +--- a/ovsdb/ovsdb.c ++++ b/ovsdb/ovsdb.c +@@ -571,8 +571,8 @@ ovsdb_replace(struct ovsdb *dst, struct ovsdb *src) + ovsdb_monitor_prereplace_db(dst); + + /* Cancel triggers. */ +- struct ovsdb_trigger *trigger, *next; +- LIST_FOR_EACH_SAFE (trigger, next, node, &dst->triggers) { ++ struct ovsdb_trigger *trigger; ++ LIST_FOR_EACH_SAFE (trigger, node, &dst->triggers) { + ovsdb_trigger_prereplace_db(trigger); + } + +diff --git a/ovsdb/query.c b/ovsdb/query.c +index de74519989..eebe564127 100644 +--- a/ovsdb/query.c ++++ b/ovsdb/query.c +@@ -40,9 +40,9 @@ ovsdb_query(struct ovsdb_table *table, const struct ovsdb_condition *cnd, + } + } else { + /* Linear scan. */ +- const struct ovsdb_row *row, *next; ++ const struct ovsdb_row *row; + +- HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { + if (ovsdb_condition_match_every_clause(row, cnd) && + !output_row(row, aux)) { + break; +diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c +index 30760233ee..e685c8103b 100644 +--- a/ovsdb/raft-private.c ++++ b/ovsdb/raft-private.c +@@ -36,7 +36,10 @@ raft_address_validate(const char *address) + return NULL; + } else if (!strncmp(address, "ssl:", 4) || !strncmp(address, "tcp:", 4)) { + struct sockaddr_storage ss; +- if (!inet_parse_active(address + 4, -1, &ss, true)) { ++ bool dns_failure = false; ++ ++ if (!inet_parse_active(address + 4, -1, &ss, true, &dns_failure) ++ && !dns_failure) { + return ovsdb_error(NULL, "%s: syntax error in address", address); + } + return NULL; +@@ -147,8 +150,8 @@ raft_server_destroy(struct raft_server *s) + void + raft_servers_destroy(struct hmap *servers) + { +- struct raft_server *s, *next; +- HMAP_FOR_EACH_SAFE (s, next, hmap_node, servers) { ++ struct raft_server *s; ++ HMAP_FOR_EACH_SAFE (s, hmap_node, servers) { + hmap_remove(servers, &s->hmap_node); + raft_server_destroy(s); + } +diff --git a/ovsdb/raft.c b/ovsdb/raft.c +index 1a3447a8dd..530c5e5a3d 100644 +--- a/ovsdb/raft.c ++++ b/ovsdb/raft.c +@@ -74,6 +74,7 @@ enum raft_failure_test { + FT_CRASH_BEFORE_SEND_EXEC_REQ, + FT_CRASH_AFTER_SEND_EXEC_REQ, + FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, ++ FT_CRASH_BEFORE_SEND_SNAPSHOT_REP, + FT_DELAY_ELECTION, + FT_DONT_SEND_VOTE_REQUEST, + FT_STOP_RAFT_RPC, +@@ -379,12 +380,19 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *); + static void raft_run_reconfigure(struct raft *); + + static void raft_set_leader(struct raft *, const struct uuid *sid); ++ + static struct raft_server * + raft_find_server(const struct raft *raft, const struct uuid *sid) + { + return raft_server_find(&raft->servers, sid); + } + ++static struct raft_server * ++raft_find_new_server(struct raft *raft, const struct uuid *uuid) ++{ ++ return raft_server_find(&raft->add_servers, uuid); ++} ++ + static char * + raft_make_address_passive(const char *address_) + { +@@ -692,8 +700,8 @@ static void + raft_set_servers(struct raft *raft, const struct hmap *new_servers, + enum vlog_level level) + { +- struct raft_server *s, *next; +- HMAP_FOR_EACH_SAFE (s, next, hmap_node, &raft->servers) { ++ struct raft_server *s; ++ HMAP_FOR_EACH_SAFE (s, hmap_node, &raft->servers) { + if (!raft_server_find(new_servers, &s->sid)) { + ovs_assert(s != raft->remove_server); + +@@ -703,7 +711,7 @@ raft_set_servers(struct raft *raft, const struct hmap *new_servers, + } + } + +- HMAP_FOR_EACH_SAFE (s, next, hmap_node, new_servers) { ++ HMAP_FOR_EACH_SAFE (s, hmap_node, new_servers) { + if (!raft_find_server(raft, &s->sid)) { + VLOG(level, "server %s added to configuration", s->nickname); + +@@ -1376,8 +1384,8 @@ raft_close__(struct raft *raft) + raft->remove_server = NULL; + } + +- struct raft_conn *conn, *next; +- LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { ++ struct raft_conn *conn; ++ LIST_FOR_EACH_SAFE (conn, list_node, &raft->conns) { + raft_conn_close(conn); + } + } +@@ -1713,8 +1721,8 @@ raft_waiters_run(struct raft *raft) + } + + uint64_t cur = ovsdb_log_commit_progress(raft->log); +- struct raft_waiter *w, *next; +- LIST_FOR_EACH_SAFE (w, next, list_node, &raft->waiters) { ++ struct raft_waiter *w; ++ LIST_FOR_EACH_SAFE (w, list_node, &raft->waiters) { + if (cur < w->commit_ticket) { + break; + } +@@ -1736,8 +1744,8 @@ raft_waiters_wait(struct raft *raft) + static void + raft_waiters_destroy(struct raft *raft) + { +- struct raft_waiter *w, *next; +- LIST_FOR_EACH_SAFE (w, next, list_node, &raft->waiters) { ++ struct raft_waiter *w; ++ LIST_FOR_EACH_SAFE (w, list_node, &raft->waiters) { + raft_waiter_destroy(w); + } + } +@@ -1867,6 +1875,8 @@ raft_open_conn(struct raft *raft, const char *address, const struct uuid *sid) + static void + raft_conn_close(struct raft_conn *conn) + { ++ VLOG_DBG("closing connection to server %s (%s)", ++ conn->nickname, jsonrpc_session_get_name(conn->js)); + jsonrpc_session_close(conn->js); + ovs_list_remove(&conn->list_node); + free(conn->nickname); +@@ -1957,16 +1967,29 @@ raft_run(struct raft *raft) + } + + /* Close unneeded sessions. */ +- struct raft_conn *next; +- LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { ++ struct raft_server *server; ++ LIST_FOR_EACH_SAFE (conn, list_node, &raft->conns) { + if (!raft_conn_should_stay_open(raft, conn)) { ++ server = raft_find_new_server(raft, &conn->sid); ++ if (server) { ++ /* We only have one incoming connection from joining servers, ++ * so if it's closed, we need to destroy the record about the ++ * server. This way the process can be started over on the ++ * next join request. */ ++ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); ++ VLOG_INFO_RL(&rl, "cluster "CID_FMT": server %s (%s) " ++ "disconnected while joining", ++ CID_ARGS(&raft->cid), ++ server->nickname, server->address); ++ hmap_remove(&raft->add_servers, &server->hmap_node); ++ raft_server_destroy(server); ++ } + raft->n_disconnections++; + raft_conn_close(conn); + } + } + + /* Open needed sessions. */ +- struct raft_server *server; + HMAP_FOR_EACH (server, hmap_node, &raft->servers) { + raft_open_conn(raft, server->address, &server->sid); + } +@@ -2039,8 +2062,8 @@ raft_run(struct raft *raft) + * commands becomes new leader: the pending commands can still complete + * if the crashed leader has replicated the transactions to majority of + * followers before it crashed. */ +- struct raft_command *cmd, *next_cmd; +- HMAP_FOR_EACH_SAFE (cmd, next_cmd, hmap_node, &raft->commands) { ++ struct raft_command *cmd; ++ HMAP_FOR_EACH_SAFE (cmd, hmap_node, &raft->commands) { + if (cmd->timestamp + && now - cmd->timestamp > raft->election_timer * 2) { + raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT); +@@ -2243,8 +2266,8 @@ raft_command_initiate(struct raft *raft, + static void + log_all_commands(struct raft *raft) + { +- struct raft_command *cmd, *next; +- HMAP_FOR_EACH_SAFE (cmd, next, hmap_node, &raft->commands) { ++ struct raft_command *cmd; ++ HMAP_FOR_EACH_SAFE (cmd, hmap_node, &raft->commands) { + VLOG_DBG("raft command eid: "UUID_FMT, UUID_ARGS(&cmd->eid)); + } + } +@@ -2398,8 +2421,8 @@ raft_command_complete(struct raft *raft, + static void + raft_complete_all_commands(struct raft *raft, enum raft_command_status status) + { +- struct raft_command *cmd, *next; +- HMAP_FOR_EACH_SAFE (cmd, next, hmap_node, &raft->commands) { ++ struct raft_command *cmd; ++ HMAP_FOR_EACH_SAFE (cmd, hmap_node, &raft->commands) { + raft_command_complete(raft, cmd, status); + } + } +@@ -3354,12 +3377,6 @@ raft_find_peer(struct raft *raft, const struct uuid *uuid) + return s && !uuid_equals(&raft->sid, &s->sid) ? s : NULL; + } + +-static struct raft_server * +-raft_find_new_server(struct raft *raft, const struct uuid *uuid) +-{ +- return raft_server_find(&raft->add_servers, uuid); +-} +- + /* Figure 3.1: "If there exists an N such that N > commitIndex, a + * majority of matchIndex[i] >= N, and log[N].term == currentTerm, set + * commitIndex = N (sections 3.5 and 3.6)." */ +@@ -4142,6 +4159,10 @@ static void + raft_handle_install_snapshot_request( + struct raft *raft, const struct raft_install_snapshot_request *rq) + { ++ if (failure_test == FT_CRASH_BEFORE_SEND_SNAPSHOT_REP) { ++ ovs_fatal(0, "Raft test: crash before sending install_snapshot_reply"); ++ } ++ + if (raft_handle_install_snapshot_request__(raft, rq)) { + union raft_rpc rpy = { + .install_snapshot_reply = { +@@ -4940,6 +4961,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, + failure_test = FT_CRASH_AFTER_SEND_EXEC_REQ; + } else if (!strcmp(test, "crash-after-receiving-append-request-update")) { + failure_test = FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE; ++ } else if (!strcmp(test, "crash-before-sending-install-snapshot-reply")) { ++ failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP; + } else if (!strcmp(test, "delay-election")) { + failure_test = FT_DELAY_ELECTION; + struct raft *raft; +diff --git a/ovsdb/relay.c b/ovsdb/relay.c +index ef0e44d340..122ee8c52f 100644 +--- a/ovsdb/relay.c ++++ b/ovsdb/relay.c +@@ -269,9 +269,9 @@ ovsdb_relay_clear(struct ovsdb *db) + + SHASH_FOR_EACH (table_node, &db->tables) { + struct ovsdb_table *table = table_node->data; +- struct ovsdb_row *row, *next; ++ struct ovsdb_row *row; + +- HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { + ovsdb_txn_row_delete(txn, row); + } + } +diff --git a/ovsdb/replication.c b/ovsdb/replication.c +index d8b56d8131..477c69d701 100644 +--- a/ovsdb/replication.c ++++ b/ovsdb/replication.c +@@ -549,8 +549,8 @@ reset_database(struct ovsdb *db) + /* Delete all rows if the table is not excluded. */ + if (!excluded_tables_find(db->schema->name, table_node->name)) { + struct ovsdb_table *table = table_node->data; +- struct ovsdb_row *row, *next; +- HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { ++ struct ovsdb_row *row; ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { + ovsdb_txn_row_delete(txn, row); + } + } +@@ -769,9 +769,9 @@ replication_dbs_destroy(void) + return; + } + +- struct shash_node *node, *next; ++ struct shash_node *node; + +- SHASH_FOR_EACH_SAFE (node, next, replication_dbs) { ++ SHASH_FOR_EACH_SAFE (node, replication_dbs) { + hmap_remove(&replication_dbs->map, &node->node); + struct replication_db *rdb = node->data; + if (rdb->active_db_schema) { +diff --git a/ovsdb/table.c b/ovsdb/table.c +index 455a3663fe..2184701ec1 100644 +--- a/ovsdb/table.c ++++ b/ovsdb/table.c +@@ -309,10 +309,10 @@ void + ovsdb_table_destroy(struct ovsdb_table *table) + { + if (table) { +- struct ovsdb_row *row, *next; ++ struct ovsdb_row *row; + size_t i; + +- HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { ++ HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { + ovsdb_row_destroy(row); + } + hmap_destroy(&table->rows); +diff --git a/ovsdb/transaction-forward.c b/ovsdb/transaction-forward.c +index d15f2f1d6d..963e937957 100644 +--- a/ovsdb/transaction-forward.c ++++ b/ovsdb/transaction-forward.c +@@ -126,10 +126,10 @@ ovsdb_txn_forward_steal_reply(struct ovsdb_txn_forward *txn_fwd) + void + ovsdb_txn_forward_run(struct ovsdb *db, struct ovsdb_cs *cs) + { +- struct ovsdb_txn_forward *t, *next; ++ struct ovsdb_txn_forward *t; + + /* Send all transactions that needs to be forwarded. */ +- LIST_FOR_EACH_SAFE (t, next, new_node, &db->txn_forward_new) { ++ LIST_FOR_EACH_SAFE (t, new_node, &db->txn_forward_new) { + if (!ovsdb_cs_may_send_transaction(cs)) { + break; + } +@@ -167,9 +167,9 @@ ovsdb_txn_forward_cancel(struct ovsdb *db, struct ovsdb_txn_forward *txn_fwd) + void + ovsdb_txn_forward_cancel_all(struct ovsdb *db, bool sent_only) + { +- struct ovsdb_txn_forward *t, *next; ++ struct ovsdb_txn_forward *t; + +- HMAP_FOR_EACH_SAFE (t, next, sent_node, &db->txn_forward_sent) { ++ HMAP_FOR_EACH_SAFE (t, sent_node, &db->txn_forward_sent) { + ovsdb_txn_forward_cancel(db, t); + } + +@@ -177,7 +177,7 @@ ovsdb_txn_forward_cancel_all(struct ovsdb *db, bool sent_only) + return; + } + +- LIST_FOR_EACH_SAFE (t, next, new_node, &db->txn_forward_new) { ++ LIST_FOR_EACH_SAFE (t, new_node, &db->txn_forward_new) { + ovsdb_txn_forward_cancel(db, t); + } + } +diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c +index db86d847c3..3a6ddfa1df 100644 +--- a/ovsdb/transaction.c ++++ b/ovsdb/transaction.c +@@ -159,15 +159,15 @@ ovsdb_txn_row_abort(struct ovsdb_txn *txn OVS_UNUSED, + hmap_replace(&new->table->rows, &new->hmap_node, &old->hmap_node); + } + +- struct ovsdb_weak_ref *weak, *next; +- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { ++ struct ovsdb_weak_ref *weak; ++ LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->deleted_refs) { + ovs_list_remove(&weak->src_node); + ovs_list_init(&weak->src_node); + if (hmap_node_is_null(&weak->dst_node)) { + ovsdb_weak_ref_destroy(weak); + } + } +- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { ++ LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->added_refs) { + ovs_list_remove(&weak->src_node); + ovs_list_init(&weak->src_node); + if (hmap_node_is_null(&weak->dst_node)) { +@@ -508,11 +508,11 @@ static struct ovsdb_error * + ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED, + struct ovsdb_txn_row *txn_row) + { +- struct ovsdb_weak_ref *weak, *next, *dst_weak; ++ struct ovsdb_weak_ref *weak, *dst_weak; + struct ovsdb_row *dst_row; + + /* Find and clean up deleted references from destination rows. */ +- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { ++ LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->deleted_refs) { + dst_row = CONST_CAST(struct ovsdb_row *, + ovsdb_table_get_row(weak->dst_table, &weak->dst)); + if (dst_row) { +@@ -529,7 +529,7 @@ ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED, + } + + /* Insert the weak references added in the new version of the row. */ +- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { ++ LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->added_refs) { + dst_row = CONST_CAST(struct ovsdb_row *, + ovsdb_table_get_row(weak->dst_table, &weak->dst)); + +@@ -597,7 +597,7 @@ find_and_add_weak_ref(struct ovsdb_txn_row *txn_row, + static struct ovsdb_error * OVS_WARN_UNUSED_RESULT + assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) + { +- struct ovsdb_weak_ref *weak, *next; ++ struct ovsdb_weak_ref *weak; + struct ovsdb_table *table; + struct shash_node *node; + +@@ -642,7 +642,7 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) + + /* Collecting all key-value pairs that references deleted rows. */ + ovsdb_datum_init_empty(&deleted_refs); +- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { ++ LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->deleted_refs) { + if (column->index == weak->column_idx) { + ovsdb_datum_add_unsafe(&deleted_refs, &weak->key, &weak->value, + &column->type, NULL); +@@ -1094,10 +1094,10 @@ static void + ovsdb_txn_destroy_cloned(struct ovsdb_txn *txn) + { + ovs_assert(!txn->db); +- struct ovsdb_txn_table *t, *next_txn_table; +- LIST_FOR_EACH_SAFE (t, next_txn_table, node, &txn->txn_tables) { +- struct ovsdb_txn_row *r, *next_txn_row; +- HMAP_FOR_EACH_SAFE (r, next_txn_row, hmap_node, &t->txn_rows) { ++ struct ovsdb_txn_table *t; ++ LIST_FOR_EACH_SAFE (t, node, &txn->txn_tables) { ++ struct ovsdb_txn_row *r; ++ HMAP_FOR_EACH_SAFE (r, hmap_node, &t->txn_rows) { + if (r->old) { + ovsdb_row_destroy(r->old); + } +@@ -1549,19 +1549,19 @@ for_each_txn_row(struct ovsdb_txn *txn, + serial++; + + do { +- struct ovsdb_txn_table *t, *next_txn_table; ++ struct ovsdb_txn_table *t; + + any_work = false; +- LIST_FOR_EACH_SAFE (t, next_txn_table, node, &txn->txn_tables) { ++ LIST_FOR_EACH_SAFE (t, node, &txn->txn_tables) { + if (t->serial != serial) { + t->serial = serial; + t->n_processed = 0; + } + + while (t->n_processed < hmap_count(&t->txn_rows)) { +- struct ovsdb_txn_row *r, *next_txn_row; ++ struct ovsdb_txn_row *r; + +- HMAP_FOR_EACH_SAFE (r, next_txn_row, hmap_node, &t->txn_rows) { ++ HMAP_FOR_EACH_SAFE (r, hmap_node, &t->txn_rows) { + if (r->serial != serial) { + struct ovsdb_error *error; + +@@ -1629,8 +1629,8 @@ ovsdb_txn_history_destroy(struct ovsdb *db) + return; + } + +- struct ovsdb_txn_history_node *txn_h_node, *next; +- LIST_FOR_EACH_SAFE (txn_h_node, next, node, &db->txn_history) { ++ struct ovsdb_txn_history_node *txn_h_node; ++ LIST_FOR_EACH_SAFE (txn_h_node, node, &db->txn_history) { + ovs_list_remove(&txn_h_node->node); + ovsdb_txn_destroy_cloned(txn_h_node->txn); + free(txn_h_node); +diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c +index 726c138bf0..7d3003bca3 100644 +--- a/ovsdb/trigger.c ++++ b/ovsdb/trigger.c +@@ -146,14 +146,14 @@ ovsdb_trigger_prereplace_db(struct ovsdb_trigger *trigger) + bool + ovsdb_trigger_run(struct ovsdb *db, long long int now) + { +- struct ovsdb_trigger *t, *next; ++ struct ovsdb_trigger *t; + + bool run_triggers = db->run_triggers; + db->run_triggers_now = db->run_triggers = false; + + bool disconnect_all = false; + +- LIST_FOR_EACH_SAFE (t, next, node, &db->triggers) { ++ LIST_FOR_EACH_SAFE (t, node, &db->triggers) { + if (run_triggers + || now - t->created >= t->timeout_msec + || t->progress || t->txn_forward) { +diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py +index 4ecdcaa197..b87099ff52 100644 +--- a/python/ovs/db/idl.py ++++ b/python/ovs/db/idl.py +@@ -140,6 +140,47 @@ class ConditionState(object): + return False + + ++class IdlTable(object): ++ def __init__(self, idl, table): ++ assert(isinstance(table, ovs.db.schema.TableSchema)) ++ self._table = table ++ self.need_table = False ++ self.rows = custom_index.IndexedRows(self) ++ self.idl = idl ++ self._condition_state = ConditionState() ++ self.columns = {k: IdlColumn(v) for k, v in table.columns.items()} ++ ++ def __getattr__(self, attr): ++ return getattr(self._table, attr) ++ ++ @property ++ def condition_state(self): ++ # read-only, no setter ++ return self._condition_state ++ ++ @property ++ def condition(self): ++ return self.condition_state.latest ++ ++ @condition.setter ++ def condition(self, condition): ++ assert(isinstance(condition, list)) ++ self.idl.cond_change(self.name, condition) ++ ++ @classmethod ++ def schema_tables(cls, idl, schema): ++ return {k: cls(idl, v) for k, v in schema.tables.items()} ++ ++ ++class IdlColumn(object): ++ def __init__(self, column): ++ self._column = column ++ self.alert = True ++ ++ def __getattr__(self, attr): ++ return getattr(self._column, attr) ++ ++ + class Idl(object): + """Open vSwitch Database Interface Definition Language (OVSDB IDL). + +@@ -241,7 +282,7 @@ class Idl(object): + assert isinstance(schema_helper, SchemaHelper) + schema = schema_helper.get_idl_schema() + +- self.tables = schema.tables ++ self.tables = IdlTable.schema_tables(self, schema) + self.readonly = schema.readonly + self._db = schema + remotes = self._parse_remotes(remote) +@@ -282,15 +323,6 @@ class Idl(object): + self.cond_changed = False + self.cond_seqno = 0 + +- for table in schema.tables.values(): +- for column in table.columns.values(): +- if not hasattr(column, 'alert'): +- column.alert = True +- table.need_table = False +- table.rows = custom_index.IndexedRows(table) +- table.idl = self +- table.condition = ConditionState() +- + def _parse_remotes(self, remote): + # If remote is - + # "tcp:10.0.0.1:6641,unix:/tmp/db.sock,t,s,tcp:10.0.0.2:6642" +@@ -330,7 +362,7 @@ class Idl(object): + def ack_conditions(self): + """Mark all requested table conditions as acked""" + for table in self.tables.values(): +- table.condition.ack() ++ table.condition_state.ack() + + def sync_conditions(self): + """Synchronize condition state when the FSM is restarted +@@ -356,14 +388,17 @@ class Idl(object): + flushing the local cached DB contents. + """ + ack_all = self.last_id == str(uuid.UUID(int=0)) ++ if ack_all: ++ self.cond_changed = False ++ + for table in self.tables.values(): + if ack_all: +- table.condition.request() +- table.condition.ack() ++ table.condition_state.request() ++ table.condition_state.ack() + else: +- if table.condition.reset(): ++ if table.condition_state.reset(): + self.last_id = str(uuid.UUID(int=0)) +- self.cond_changed = True ++ self.cond_changed = True + + def restart_fsm(self): + # Resync data DB table conditions to avoid missing updated due to +@@ -482,7 +517,7 @@ class Idl(object): + sh.register_table(self._server_db_table) + schema = sh.get_idl_schema() + self._server_db = schema +- self.server_tables = schema.tables ++ self.server_tables = IdlTable.schema_tables(self, schema) + self.__send_server_monitor_request() + except error.Error as e: + vlog.err("%s: error receiving server schema: %s" +@@ -588,10 +623,10 @@ class Idl(object): + for table in self.tables.values(): + # Always use the most recent conditions set by the IDL client when + # requesting monitor_cond_change +- if table.condition.new is not None: ++ if table.condition_state.new is not None: + change_requests[table.name] = [ +- {"where": table.condition.new}] +- table.condition.request() ++ {"where": table.condition_state.new}] ++ table.condition_state.request() + + if not change_requests: + return +@@ -627,19 +662,20 @@ class Idl(object): + cond = [False] + + # Compare the new condition to the last known condition +- if table.condition.latest != cond: +- table.condition.init(cond) ++ if table.condition_state.latest != cond: ++ table.condition_state.init(cond) + self.cond_changed = True + + # New condition will be sent out after all already requested ones + # are acked. +- if table.condition.new: +- any_reqs = any(t.condition.request for t in self.tables.values()) ++ if table.condition_state.new: ++ any_reqs = any(t.condition_state.request ++ for t in self.tables.values()) + return self.cond_seqno + int(any_reqs) + 1 + + # Already requested conditions should be up to date at + # self.cond_seqno + 1 while acked conditions are already up to date +- return self.cond_seqno + int(bool(table.condition.requested)) ++ return self.cond_seqno + int(bool(table.condition_state.requested)) + + def wait(self, poller): + """Arranges for poller.block() to wake up when self.run() has something +@@ -811,8 +847,8 @@ class Idl(object): + columns.append(column) + monitor_request = {"columns": columns} + if method in ("monitor_cond", "monitor_cond_since") and ( +- not ConditionState.is_true(table.condition.acked)): +- monitor_request["where"] = table.condition.acked ++ not ConditionState.is_true(table.condition_state.acked)): ++ monitor_request["where"] = table.condition_state.acked + monitor_requests[table.name] = [monitor_request] + + args = [self._db.name, str(self.uuid), monitor_requests] +@@ -1148,13 +1184,6 @@ class Idl(object): + return True + + +-def _uuid_to_row(atom, base): +- if base.ref_table: +- return base.ref_table.rows.get(atom) +- else: +- return atom +- +- + def _row_to_uuid(value): + if isinstance(value, Row): + return value.uuid +@@ -1268,6 +1297,17 @@ class Row(object): + data=", ".join("{col}={val}".format(col=c, val=getattr(self, c)) + for c in sorted(self._table.columns))) + ++ def _uuid_to_row(self, atom, base): ++ if base.ref_table: ++ try: ++ table = self._idl.tables[base.ref_table.name] ++ except KeyError as e: ++ msg = "Table {} is not registered".format(base.ref_table.name) ++ raise AttributeError(msg) from e ++ return table.rows.get(atom) ++ else: ++ return atom ++ + def __getattr__(self, column_name): + assert self._changes is not None + assert self._mutations is not None +@@ -1309,7 +1349,7 @@ class Row(object): + datum = data.Datum.from_python(column.type, dlist, + _row_to_uuid) + elif column.type.is_map(): +- dmap = datum.to_python(_uuid_to_row) ++ dmap = datum.to_python(self._uuid_to_row) + if inserts is not None: + dmap.update(inserts) + if removes is not None: +@@ -1326,7 +1366,7 @@ class Row(object): + else: + datum = inserts + +- return datum.to_python(_uuid_to_row) ++ return datum.to_python(self._uuid_to_row) + + def __setattr__(self, column_name, value): + assert self._changes is not None +@@ -1410,7 +1450,7 @@ class Row(object): + if value: + try: + old_value = data.Datum.to_python(self._data[column_name], +- _uuid_to_row) ++ self._uuid_to_row) + except error.Error: + return + if key not in old_value: +diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py +index c4c6c87e9f..6b0d023ae3 100644 +--- a/python/ovs/reconnect.py ++++ b/python/ovs/reconnect.py +@@ -44,7 +44,7 @@ class Reconnect(object): + is_connected = False + + @staticmethod +- def deadline(fsm): ++ def deadline(fsm, now): + return None + + @staticmethod +@@ -56,7 +56,7 @@ class Reconnect(object): + is_connected = False + + @staticmethod +- def deadline(fsm): ++ def deadline(fsm, now): + return None + + @staticmethod +@@ -68,7 +68,7 @@ class Reconnect(object): + is_connected = False + + @staticmethod +- def deadline(fsm): ++ def deadline(fsm, now): + return fsm.state_entered + fsm.backoff + + @staticmethod +@@ -80,7 +80,7 @@ class Reconnect(object): + is_connected = False + + @staticmethod +- def deadline(fsm): ++ def deadline(fsm, now): + return fsm.state_entered + max(1000, fsm.backoff) + + @staticmethod +@@ -92,13 +92,24 @@ class Reconnect(object): + is_connected = True + + @staticmethod +- def deadline(fsm): ++ def deadline(fsm, now): + if fsm.probe_interval: + base = max(fsm.last_activity, fsm.state_entered) + expiration = base + fsm.probe_interval +- if (fsm.last_receive_attempt is None or ++ if (now < expiration or ++ fsm.last_receive_attempt is None or + fsm.last_receive_attempt >= expiration): ++ # We still have time before the expiration or the time has ++ # already passed and there was no activity. In the first ++ # case we need to wait for the expiration, in the second - ++ # we're already past the deadline. */ + return expiration ++ else: ++ # Time has already passed, but we didn't attempt to receive ++ # anything. We need to wake up and try to receive even if ++ # nothing is pending, so we can update the expiration time ++ # or transition to a different state. ++ return now + 1 + return None + + @staticmethod +@@ -114,12 +125,15 @@ class Reconnect(object): + is_connected = True + + @staticmethod +- def deadline(fsm): ++ def deadline(fsm, now): + if fsm.probe_interval: + expiration = fsm.state_entered + fsm.probe_interval +- if (fsm.last_receive_attempt is None or ++ if (now < expiration or ++ fsm.last_receive_attempt is None or + fsm.last_receive_attempt >= expiration): + return expiration ++ else: ++ return now + 1 + return None + + @staticmethod +@@ -134,7 +148,7 @@ class Reconnect(object): + is_connected = False + + @staticmethod +- def deadline(fsm): ++ def deadline(fsm, now): + return fsm.state_entered + + @staticmethod +@@ -545,7 +559,7 @@ class Reconnect(object): + returned if the "probe interval" is nonzero--see + self.set_probe_interval()).""" + +- deadline = self.state.deadline(self) ++ deadline = self.state.deadline(self, now) + if deadline is not None and now >= deadline: + return self.state.run(self, now) + else: +@@ -562,7 +576,7 @@ class Reconnect(object): + """Returns the number of milliseconds after which self.run() should be + called if nothing else notable happens in the meantime, or None if this + is currently unnecessary.""" +- deadline = self.state.deadline(self) ++ deadline = self.state.deadline(self, now) + if deadline is not None: + remaining = deadline - now + return max(0, remaining) +diff --git a/tests/alb.at b/tests/alb.at +index 2bef06f39c..0036bd1f29 100644 +--- a/tests/alb.at ++++ b/tests/alb.at +@@ -96,6 +96,52 @@ OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance + OVS_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([ALB - cross-numa]) ++OVS_VSWITCHD_START([add-port br0 p0 \ ++ -- set Interface p0 type=dummy-pmd options:n_rxq=4 \ ++ -- set Interface p0 options:numa_id=0 \ ++ -- set Open_vSwitch . other_config:pmd-cpu-mask=0x3 \ ++ -- set open_vswitch . other_config:pmd-rxq-assign=group \ ++ -- set open_vswitch . other_config:pmd-rxq-isolate=false \ ++ -- set open_vswitch . other_config:pmd-auto-lb="true" \ ++ -- set open_vswitch . other_config:pmd-auto-lb-load-threshold=0], ++ [], [], [--dummy-numa 1,2,1,2]) ++OVS_WAIT_UNTIL([grep "PMD auto load balance is enabled" ovs-vswitchd.log]) ++AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg]) ++ ++# no pinned rxqs - cross-numa pmd could change ++get_log_next_line_num ++ovs-appctl time/warp 600000 10000 ++OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) ++OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) ++ ++# all pinned rxqs - cross-numa pmd will not change ++AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1,3:1']) ++get_log_next_line_num ++ovs-appctl time/warp 600000 10000 ++OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) ++OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "Variance improvement 0%."]) ++ ++# mix of pinned (non-isolated) and non-pinned rxqs - cross-numa pmd could change ++AT_CHECK([ovs-vsctl remove Interface p0 other_config pmd-rxq-affinity]) ++AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1']) ++get_log_next_line_num ++ovs-appctl time/warp 600000 10000 ++OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) ++OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) ++ ++# mix of pinned (isolated) and non-pinned rxqs - cross-numa pmd could change ++AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf]) ++AT_CHECK([ovs-vsctl set Interface p0 options:n_rxq=6]) ++AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-isolate=true]) ++get_log_next_line_num ++ovs-appctl time/warp 600000 10000 ++OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) ++OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ + AT_SETUP([ALB - PMD/RxQ assignment type]) + OVS_VSWITCHD_START([add-port br0 p0 \ + -- set Interface p0 type=dummy-pmd options:n_rxq=3 \ +diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at +index 7c2edeb9d4..ffb7208c7f 100644 +--- a/tests/ofproto-dpif.at ++++ b/tests/ofproto-dpif.at +@@ -81,11 +81,12 @@ recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff: + + ovs-appctl netdev-dummy/set-admin-state p1 up + ovs-appctl time/warp 100 +-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ++OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl + ---- bond0 ---- + bond_mode: active-backup + bond may use recirculation: no, + bond-hash-basis: 0 ++lb_output action: disabled, bond-id: -1 + updelay: 0 ms + downdelay: 0 ms + lacp_status: off +@@ -99,7 +100,6 @@ member p1: enabled + + member p2: enabled + may_enable: true +- + ]) + + OVS_VSWITCHD_STOP +@@ -129,11 +129,12 @@ ovs-appctl time/warp 100 + OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'member p1: disabled'`"]) + ovs-appctl netdev-dummy/set-admin-state p1 up + ovs-appctl time/warp 100 +-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ++OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl + ---- bond0 ---- + bond_mode: active-backup + bond may use recirculation: no, + bond-hash-basis: 0 ++lb_output action: disabled, bond-id: -1 + updelay: 0 ms + downdelay: 0 ms + lacp_status: off +@@ -150,7 +151,6 @@ member p2: enabled + + member p3: enabled + may_enable: true +- + ]) + + dnl Now delete the primary and verify that the output shows that the +@@ -171,11 +171,12 @@ ovs-vsctl \ + --id=@p1 create Interface name=p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ + set Port bond0 interfaces="$uuids, @p1]" + ovs-appctl time/warp 100 +-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ++OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl + ---- bond0 ---- + bond_mode: active-backup + bond may use recirculation: no, + bond-hash-basis: 0 ++lb_output action: disabled, bond-id: -1 + updelay: 0 ms + downdelay: 0 ms + lacp_status: off +@@ -192,17 +193,17 @@ member p2: enabled + + member p3: enabled + may_enable: true +- + ]) + + dnl Switch to another primary + ovs-vsctl set port bond0 other_config:bond-primary=p2 + ovs-appctl time/warp 100 +-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ++OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl + ---- bond0 ---- + bond_mode: active-backup + bond may use recirculation: no, + bond-hash-basis: 0 ++lb_output action: disabled, bond-id: -1 + updelay: 0 ms + downdelay: 0 ms + lacp_status: off +@@ -211,25 +212,25 @@ active-backup primary: p2 + + + member p1: enabled +- active member + may_enable: true + + member p2: enabled ++ active member + may_enable: true + + member p3: enabled + may_enable: true +- + ]) + + dnl Remove the "bond-primary" config directive from the bond. + AT_CHECK([ovs-vsctl remove Port bond0 other_config bond-primary]) + ovs-appctl time/warp 100 +-OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ++OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl + ---- bond0 ---- + bond_mode: active-backup + bond may use recirculation: no, + bond-hash-basis: 0 ++lb_output action: disabled, bond-id: -1 + updelay: 0 ms + downdelay: 0 ms + lacp_status: off +@@ -238,15 +239,14 @@ active-backup primary: + + + member p1: enabled +- active member + may_enable: true + + member p2: enabled ++ active member + may_enable: true + + member p3: enabled + may_enable: true +- + ]) + + OVS_VSWITCHD_STOP +@@ -5573,7 +5573,36 @@ check_flows () { + echo "n_packets=$n" + test "$n" = 1 + } +-OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0]) ++OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ ++# Checks for regression against a bug in which OVS crashed ++# with in_port=OFPP_NONE or in_port=OFPP_CONTROLLER and ++# recirculation is involved. ++AT_SETUP([ofproto-dpif - packet-out recirculation with OFPP_NONE and OFPP_CONTROLLER]) ++OVS_VSWITCHD_START ++add_of_ports br0 1 2 ++ ++AT_DATA([flows.txt], [dnl ++table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1) ++table=1 ip actions=ct(commit),normal ++]) ++AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) ++ ++packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000 ++AT_CHECK([ovs-ofctl packet-out br0 "in_port=none,packet=$packet actions=table"]) ++AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller,packet=$packet actions=table"]) ++ ++# Dumps out the flow table, extracts the number of packets that have gone ++# through the (single) flow in table 1, and returns success if it's exactly 2. ++check_flows () { ++ n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p') ++ echo "n_packets=$n" ++ test "$n" = 2 ++} ++OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) + + OVS_VSWITCHD_STOP + AT_CLEANUP +diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at +index 736d9809cb..7051d95396 100644 +--- a/tests/ofproto-macros.at ++++ b/tests/ofproto-macros.at +@@ -175,6 +175,7 @@ m4_define([_OVS_VSWITCHD_START], + /dpdk|INFO|DPDK Disabled - Use other_config:dpdk-init to enable/d + /netlink_socket|INFO|netlink: could not enable listening to all nsid/d + /probe tc:/d ++/setting extended ack support failed/d + /tc: Using policy/d']]) + ]) + +@@ -239,6 +240,7 @@ check_logs () { + /timeval.*context switches: [[0-9]]* voluntary, [[0-9]]* involuntary/d + /ovs_rcu.*blocked [[0-9]]* ms waiting for .* to quiesce/d + /Dropped [[0-9]]* log messages/d ++/setting extended ack support failed/d + /|WARN|/p + /|ERR|/p + /|EMER|/p" ${logs} +diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at +index 66545da572..e6c5bc6e94 100644 +--- a/tests/ovs-macros.at ++++ b/tests/ovs-macros.at +@@ -259,7 +259,20 @@ dnl Executes shell COMMAND in a loop until it returns zero. If COMMAND does + dnl not return zero within a reasonable time limit, executes the commands + dnl in IF-FAILED (if provided) and fails the test. + m4_define([OVS_WAIT_UNTIL], +- [OVS_WAIT([$1], [$2], [AT_LINE], [until $1])]) ++ [AT_FAIL_IF([test "$#" -ge 3]) ++ dnl The second argument should not be a number (confused with AT_CHECK ?). ++ AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null]) ++ OVS_WAIT([$1], [$2], [AT_LINE], [until $1])]) ++ ++dnl OVS_WAIT_UNTIL_EQUAL(COMMAND, OUTPUT) ++dnl ++dnl Executes shell COMMAND in a loop until it returns zero and the output ++dnl equals OUTPUT. If COMMAND does not return zero or a desired output within ++dnl a reasonable time limit, fails the test. ++m4_define([OVS_WAIT_UNTIL_EQUAL], ++ [AT_FAIL_IF([test "$#" -ge 3]) ++ echo "$2" > wait_until_expected ++ OVS_WAIT_UNTIL([$1 | diff -u wait_until_expected - ])]) + + dnl OVS_WAIT_WHILE(COMMAND, [IF-FAILED]) + dnl +@@ -267,7 +280,10 @@ dnl Executes shell COMMAND in a loop until it returns nonzero. If COMMAND does + dnl not return nonzero within a reasonable time limit, executes the commands + dnl in IF-FAILED (if provided) and fails the test. + m4_define([OVS_WAIT_WHILE], +- [OVS_WAIT([if $1; then return 1; else return 0; fi], [$2], ++ [AT_FAIL_IF([test "$#" -ge 3]) ++ dnl The second argument should not be a number (confused with AT_CHECK ?). ++ AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null]) ++ OVS_WAIT([if $1; then return 1; else return 0; fi], [$2], + [AT_LINE], [while $1])]) + + dnl OVS_APP_EXIT_AND_WAIT(DAEMON) +diff --git a/tests/ovs-vswitchd.at b/tests/ovs-vswitchd.at +index bba4fea2bc..977b2eba1f 100644 +--- a/tests/ovs-vswitchd.at ++++ b/tests/ovs-vswitchd.at +@@ -121,6 +121,7 @@ OVS_APP_EXIT_AND_WAIT_BY_TARGET(["`pwd`"/unixctl2], [ovs-vswitchd-2.pid]) + # the process. + AT_CHECK([sed -n " + /|ERR|another ovs-vswitchd process is running/d ++/setting extended ack support failed/d + /|WARN|/p + /|ERR|/p + /|EMER|/p" fakelog +@@ -148,6 +149,7 @@ AT_CHECK([grep "wakeup due to" ovs-vswitchd.log], [ignore]) + + # check the log, should not see any WARN/ERR/EMER log. + AT_CHECK([sed -n " ++/setting extended ack support failed/d + /|WARN|/p + /|ERR|/p + /|EMER|/p" ovs-vswitchd.log +diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at +index fc6253cfe9..ee9c7b9379 100644 +--- a/tests/ovsdb-cluster.at ++++ b/tests/ovsdb-cluster.at +@@ -400,6 +400,61 @@ done + + AT_CLEANUP + ++AT_BANNER([OVSDB - cluster failure while joining]) ++AT_SETUP([OVSDB cluster - follower crash while joining]) ++AT_KEYWORDS([ovsdb server negative unix cluster join]) ++ ++n=3 ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++ordinal_schema > schema ++AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db dnl ++ $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) ++cid=`ovsdb-tool db-cid s1.db` ++schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) ++done ++ ++on_exit 'kill `cat *.pid`' ++ ++dnl Starting followers first, so we can configure them to crash on join. ++for j in `seq $n`; do ++ i=$(($n + 1 - $j)) ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl ++ --detach --no-chdir --log-file=s$i.log dnl ++ --pidfile=s$i.pid --unixctl=s$i dnl ++ --remote=punix:s$i.ovsdb s$i.db]) ++ if test $i != 1; then ++ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i dnl ++ cluster/failure-test crash-before-sending-install-snapshot-reply dnl ++ | grep -q "engaged"]) ++ fi ++done ++ ++dnl Make sure that followers really crashed. ++for i in `seq 2 $n`; do ++ OVS_WAIT_WHILE([test -s s$i.pid]) ++done ++ ++dnl Bring them back. ++for i in `seq 2 $n`; do ++ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl ++ --detach --no-chdir --log-file=s$i.log dnl ++ --pidfile=s$i.pid --unixctl=s$i dnl ++ --remote=punix:s$i.ovsdb s$i.db]) ++done ++ ++dnl Make sure that all servers joined the cluster. ++for i in `seq $n`; do ++ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) ++done ++ ++for i in `seq $n`; do ++ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) ++done ++ ++AT_CLEANUP ++ + + + OVS_START_SHELL_HELPERS +@@ -416,9 +471,8 @@ ovsdb_cluster_failure_test () { + + cp $top_srcdir/vswitchd/vswitch.ovsschema schema + schema=`ovsdb-tool schema-name schema` +- AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl +-ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns +-]) ++ AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) ++ AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"]) + + n=3 + join_cluster() { +@@ -629,9 +683,8 @@ ovsdb_torture_test () { + local variant=$3 # 'kill' and restart or 'remove' and add + cp $top_srcdir/vswitchd/vswitch.ovsschema schema + schema=`ovsdb-tool schema-name schema` +- AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl +-ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns +-]) ++ AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) ++ AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"]) + + join_cluster() { + local i=$1 +diff --git a/tests/pmd.at b/tests/pmd.at +index a2f9d34a2a..0a451f33c6 100644 +--- a/tests/pmd.at ++++ b/tests/pmd.at +@@ -199,7 +199,7 @@ pmd thread numa_id core_id : + OVS_VSWITCHD_STOP + AT_CLEANUP + +-AT_SETUP([PMD - pmd-cpu-mask - NUMA]) ++AT_SETUP([PMD - pmd-cpu-mask - dual NUMA]) + OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy-pmd options:n_rxq=8 options:numa_id=1 -- set Open_vSwitch . other_config:pmd-cpu-mask=1], + [], [], [--dummy-numa 1,1,0,0]) + +@@ -359,6 +359,44 @@ pmd thread numa_id 1 core_id 0: + OVS_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([PMD - pmd-cpu-mask - multi NUMA]) ++OVS_VSWITCHD_START([add-port br0 p0 \ ++ -- set Interface p0 type=dummy-pmd options:n_rxq=4 \ ++ -- set Interface p0 options:numa_id=0 \ ++ -- set Open_vSwitch . other_config:pmd-cpu-mask=0xf \ ++ -- set open_vswitch . other_config:pmd-rxq-assign=cycles], ++ [], [], [--dummy-numa 1,2,1,2]) ++ ++TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) ++AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=group]) ++ ++OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using group algorithm"]) ++OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) ++ ++# check all pmds from both non-local numas are assigned an rxq ++AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) ++ ++TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) ++AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=cycles]) ++ ++OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using cycles algorithm"]) ++OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) ++ ++# check all pmds from both non-local numas are assigned an rxq ++AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) ++ ++TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) ++AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=roundrobin]) ++ ++OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using roundrobin algorithm"]) ++OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) ++ ++# check all pmds from both non-local numas are assigned an rxq ++AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP ++ + AT_SETUP([PMD - stats]) + OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 ofport_request=7 type=dummy-pmd options:n_rxq=4], + [], [], [DUMMY_NUMA]) +diff --git a/tests/reconnect.at b/tests/reconnect.at +index 0f74709f5a..5bca84351c 100644 +--- a/tests/reconnect.at ++++ b/tests/reconnect.at +@@ -39,8 +39,19 @@ run + connected + + # Try timeout without noting that we tried to receive. +-# (This does nothing since we never timeout in this case.) ++# Timeout should be scheduled to the next probe interval. + timeout ++run ++ ++# Once we reached the timeout, it should not expire until the receive actually ++# attempted. However, we still need to wake up as soon as possible in order to ++# have a chance to mark the receive attempt even if nothing was received. ++timeout ++run ++ ++# Short time advance past the original probe interval, but not expired still. ++timeout ++run + + # Now disable the receive-attempted feature and timeout again. + receive-attempted LLONG_MAX +@@ -67,18 +78,37 @@ connected + last connected 0 ms ago, connected 0 ms total + + # Try timeout without noting that we tried to receive. +-# (This does nothing since we never timeout in this case.) +-timeout +- no timeout +- +-# Now disable the receive-attempted feature and timeout again. +-receive-attempted LLONG_MAX ++# Timeout should be scheduled to the next probe interval. + timeout + advance 5000 ms + + ### t=6000 ### + in ACTIVE for 5000 ms (0 ms backoff) + run ++ ++# Once we reached the timeout, it should not expire until the receive actually ++# attempted. However, we still need to wake up as soon as possible in order to ++# have a chance to mark the receive attempt even if nothing was received. ++timeout ++ advance 1 ms ++ ++### t=6001 ### ++ in ACTIVE for 5001 ms (0 ms backoff) ++run ++ ++# Short time advance past the original probe interval, but not expired still. ++timeout ++ advance 1 ms ++ ++### t=6002 ### ++ in ACTIVE for 5002 ms (0 ms backoff) ++run ++ ++# Now disable the receive-attempted feature and timeout again. ++receive-attempted LLONG_MAX ++timeout ++ advance 0 ms ++run + should send probe + in IDLE for 0 ms (0 ms backoff) + +@@ -86,7 +116,7 @@ run + timeout + advance 5000 ms + +-### t=11000 ### ++### t=11002 ### + in IDLE for 5000 ms (0 ms backoff) + run + should disconnect +@@ -94,7 +124,7 @@ disconnected + in BACKOFF for 0 ms (1000 ms backoff) + 1 successful connections out of 1 attempts, seqno 2 + disconnected +- disconnected at 11000 ms (0 ms ago) ++ disconnected at 11002 ms (0 ms ago) + ]) + + ###################################################################### +@@ -111,8 +141,19 @@ run + connected + + # Try timeout without noting that we tried to receive. +-# (This does nothing since we never timeout in this case.) ++# Timeout should be scheduled to the next probe interval. ++timeout ++run ++ ++# Once we reached the timeout, it should not expire until the receive actually ++# attempted. However, we still need to wake up as soon as possible in order to ++# have a chance to mark the receive attempt even if nothing was received. ++timeout ++run ++ ++# Short time advance past the original probe interval, but not expired still. + timeout ++run + + # Now disable the receive-attempted feature and timeout again. + receive-attempted LLONG_MAX +@@ -148,18 +189,37 @@ connected + last connected 0 ms ago, connected 0 ms total + + # Try timeout without noting that we tried to receive. +-# (This does nothing since we never timeout in this case.) +-timeout +- no timeout +- +-# Now disable the receive-attempted feature and timeout again. +-receive-attempted LLONG_MAX ++# Timeout should be scheduled to the next probe interval. + timeout + advance 5000 ms + + ### t=6500 ### + in ACTIVE for 5000 ms (0 ms backoff) + run ++ ++# Once we reached the timeout, it should not expire until the receive actually ++# attempted. However, we still need to wake up as soon as possible in order to ++# have a chance to mark the receive attempt even if nothing was received. ++timeout ++ advance 1 ms ++ ++### t=6501 ### ++ in ACTIVE for 5001 ms (0 ms backoff) ++run ++ ++# Short time advance past the original probe interval, but not expired still. ++timeout ++ advance 1 ms ++ ++### t=6502 ### ++ in ACTIVE for 5002 ms (0 ms backoff) ++run ++ ++# Now disable the receive-attempted feature and timeout again. ++receive-attempted LLONG_MAX ++timeout ++ advance 0 ms ++run + should send probe + in IDLE for 0 ms (0 ms backoff) + +@@ -167,7 +227,7 @@ run + timeout + advance 5000 ms + +-### t=11500 ### ++### t=11502 ### + in IDLE for 5000 ms (0 ms backoff) + run + should disconnect +@@ -175,7 +235,7 @@ disconnected + in BACKOFF for 0 ms (1000 ms backoff) + 1 successful connections out of 1 attempts, seqno 2 + disconnected +- disconnected at 11500 ms (0 ms ago) ++ disconnected at 11502 ms (0 ms ago) + ]) + + ###################################################################### +@@ -1271,14 +1331,14 @@ activity + created 1000, last activity 3000, last connected 2000 + + # Connection times out. +-timeout +- no timeout +-receive-attempted LLONG_MAX + timeout + advance 5000 ms + + ### t=8000 ### + in ACTIVE for 6000 ms (1000 ms backoff) ++receive-attempted LLONG_MAX ++timeout ++ advance 0 ms + run + should send probe + in IDLE for 0 ms (1000 ms backoff) +diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at +index 19a0b125b9..8b9f5c7525 100644 +--- a/tests/system-common-macros.at ++++ b/tests/system-common-macros.at +@@ -281,6 +281,14 @@ m4_define([OVS_START_L7], + # + m4_define([OFPROTO_CLEAR_DURATION_IDLE], [[sed -e 's/duration=.*s,/duration=,/g' -e 's/idle_age=[0-9]*,/idle_age=,/g']]) + ++# OVS_CHECK_TUNNEL_TSO() ++# ++# Macro to be used in general tunneling tests that could be also ++# used by system-tso. In that case, tunneling is not supported and ++# the test should be skipped. ++m4_define([OVS_CHECK_TUNNEL_TSO], ++ [m4_ifdef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)])]) ++ + # OVS_CHECK_VXLAN() + # + # Do basic check for vxlan functionality, skip the test if it's not there. +diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at +index c3ee6990ca..7d2715c4a7 100644 +--- a/tests/system-dpdk.at ++++ b/tests/system-dpdk.at +@@ -237,6 +237,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout]) + AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl + ]) + ++AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl ++DPIF implementation set to dpif_avx512. ++]) ++ + AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl + Miniflow extract implementation set to autovalidator. + ]) +@@ -265,6 +269,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout]) + AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl + ]) + ++AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl ++DPIF implementation set to dpif_avx512. ++]) ++ + AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl + Miniflow extract implementation set to autovalidator. + ]) +diff --git a/tests/system-route.at b/tests/system-route.at +index 1714273e35..270956d13f 100644 +--- a/tests/system-route.at ++++ b/tests/system-route.at +@@ -14,10 +14,9 @@ dnl Add ip address. + AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout]) + + dnl Check that OVS catches route updates. +-OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [0], [dnl +-Cached: 10.0.0.17/24 dev p1-route SRC 10.0.0.17 +-Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local +-]) ++OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [dnl ++Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17 ++Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local]) + + dnl Delete ip address. + AT_CHECK([ip addr del 10.0.0.17/24 dev p1-route], [0], [stdout]) +diff --git a/tests/system-traffic.at b/tests/system-traffic.at +index f22d86e466..1d20366280 100644 +--- a/tests/system-traffic.at ++++ b/tests/system-traffic.at +@@ -218,6 +218,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over vxlan tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_VXLAN() + + OVS_TRAFFIC_VSWITCHD_START() +@@ -258,7 +259,55 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI + OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + ++AT_SETUP([datapath - ping vlan over vxlan tunnel]) ++OVS_CHECK_TUNNEL_TSO() ++OVS_CHECK_VXLAN() ++ ++OVS_TRAFFIC_VSWITCHD_START() ++ADD_BR([br-underlay]) ++ ++AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) ++AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) ++ ++ADD_NAMESPACES(at_ns0) ++ ++dnl Set up underlay link from host into the namespace using veth pair. ++ADD_VETH(p0, at_ns0, br-underlay, "172.31.2.1/24") ++AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) ++AT_CHECK([ip link set dev br-underlay up]) ++ ++dnl Set up tunnel endpoints on OVS outside the namespace and with a native ++dnl linux device inside the namespace. ++ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24]) ++ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.2.1.1/24], ++ [id 0 dstport 4789]) ++ ++AT_CHECK([ovs-vsctl set port br0 tag=100]) ++AT_CHECK([ovs-vsctl set port br-underlay tag=42]) ++ ++ADD_VLAN(at_vxlan1, at_ns0, 100, "10.1.1.1/24") ++ADD_VLAN(p0, at_ns0, 42, "172.31.1.1/24") ++ ++dnl First, check the underlay ++NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++dnl Okay, now check the overlay with different packet sizes ++NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl ++3 packets transmitted, 3 received, 0% packet loss, time 0ms ++]) ++ ++OVS_TRAFFIC_VSWITCHD_STOP ++AT_CLEANUP ++ + AT_SETUP([datapath - ping over vxlan6 tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_VXLAN_UDP6ZEROCSUM() + + OVS_TRAFFIC_VSWITCHD_START() +@@ -302,6 +351,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over gre tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) + OVS_CHECK_GRE() + +@@ -343,6 +393,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over ip6gre L2 tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) + OVS_CHECK_GRE() + OVS_CHECK_ERSPAN() +@@ -383,6 +434,7 @@ AT_CLEANUP + + + AT_SETUP([datapath - ping over erspan v1 tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) + OVS_CHECK_GRE() + OVS_CHECK_ERSPAN() +@@ -419,6 +471,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over erspan v2 tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) + OVS_CHECK_GRE() + OVS_CHECK_ERSPAN() +@@ -455,6 +508,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over ip6erspan v1 tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) + OVS_CHECK_GRE() + OVS_CHECK_ERSPAN() +@@ -494,6 +548,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over ip6erspan v2 tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) + OVS_CHECK_GRE() + OVS_CHECK_ERSPAN() +@@ -534,6 +589,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over geneve tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_GENEVE() + + OVS_TRAFFIC_VSWITCHD_START() +@@ -575,6 +631,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over geneve tunnel, delete flow regression]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_GENEVE() + + OVS_TRAFFIC_VSWITCHD_START() +@@ -629,6 +686,7 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d + AT_CLEANUP + + AT_SETUP([datapath - flow resume with geneve tun_metadata]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_GENEVE() + + OVS_TRAFFIC_VSWITCHD_START() +@@ -680,6 +738,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over geneve6 tunnel]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_GENEVE_UDP6ZEROCSUM() + + OVS_TRAFFIC_VSWITCHD_START() +@@ -723,6 +782,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over gre tunnel by simulated packets]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_MIN_KERNEL(3, 10) + + OVS_TRAFFIC_VSWITCHD_START() +@@ -769,6 +829,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over erspan v1 tunnel by simulated packets]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_MIN_KERNEL(3, 10) + + OVS_TRAFFIC_VSWITCHD_START() +@@ -817,6 +878,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over erspan v2 tunnel by simulated packets]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_MIN_KERNEL(3, 10) + + OVS_TRAFFIC_VSWITCHD_START() +@@ -870,6 +932,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over ip6erspan v1 tunnel by simulated packets]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_MIN_KERNEL(3, 10) + + OVS_TRAFFIC_VSWITCHD_START() +@@ -925,6 +988,7 @@ OVS_TRAFFIC_VSWITCHD_STOP + AT_CLEANUP + + AT_SETUP([datapath - ping over ip6erspan v2 tunnel by simulated packets]) ++OVS_CHECK_TUNNEL_TSO() + OVS_CHECK_MIN_KERNEL(3, 10) + + OVS_TRAFFIC_VSWITCHD_START() +@@ -4100,15 +4164,15 @@ action=normal + + AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +-AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88deb13891389080803136161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl ++AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88deb13891389080803136161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl + "16161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161"dnl + "61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl + "1616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161, actions=ct(table=1)"]) + +-AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88deb6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl ++AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88deb6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl + "161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161, actions=ct(table=1)"]) + +-AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033d923861616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl ++AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033d923861616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl + "1616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161610a, actions=ct(table=1)"]) + + AT_CHECK([ovs-appctl dpctl/dump-flows | head -2 | tail -1 | grep -q -e ["]udp[(]src=5001["]]) +@@ -6454,7 +6518,7 @@ on_exit 'ovs-appctl revalidator/purge' + on_exit 'ovs-appctl dpif/dump-flows br0' + + dnl Should work with the virtual IP address through NAT +-for i in 1 2 3 4 5 6 7 8 9 10 11 12; do ++for i in $(seq 1 50); do + echo Request $i + NS_CHECK_EXEC([at_ns1], [wget 10.1.1.64 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) + done +diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at +index 406334f3e0..1a80047619 100644 +--- a/tests/system-tso-macros.at ++++ b/tests/system-tso-macros.at +@@ -29,3 +29,5 @@ m4_define([CONFIGURE_VETH_OFFLOADS], + [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])] + [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])] + ) ++ ++m4_define([CHECK_SYSTEM_TSO], []) +diff --git a/tests/test-cmap.c b/tests/test-cmap.c +index 0705475606..588a5dea63 100644 +--- a/tests/test-cmap.c ++++ b/tests/test-cmap.c +@@ -74,6 +74,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, + cmap_values[i++] = e->value; + } + assert(i == n); ++ assert(e == NULL); + + /* Here we test iteration with cmap_next_position() */ + i = 0; +@@ -107,6 +108,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, + count += e->value == values[i]; + } + assert(count == 1); ++ assert(e == NULL); + } + + /* Check that all the values are there in batched lookup. */ +@@ -130,6 +132,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, + CMAP_NODE_FOR_EACH (e, node, nodes[k]) { + count += e->value == values[i + k]; + } ++ assert(e == NULL); + } + assert(count == j); /* j elements in a batch. */ + } +@@ -584,7 +587,7 @@ benchmark_hmap(void) + { + struct helement *elements; + struct hmap hmap; +- struct helement *e, *next; ++ struct helement *e; + struct timeval start; + pthread_t *threads; + struct hmap_aux aux; +@@ -622,7 +625,7 @@ benchmark_hmap(void) + + /* Destruction. */ + xgettimeofday(&start); +- HMAP_FOR_EACH_SAFE (e, next, node, &hmap) { ++ HMAP_FOR_EACH_SAFE (e, node, &hmap) { + hmap_remove(&hmap, &e->node); + } + hmap_destroy(&hmap); +diff --git a/tests/test-hindex.c b/tests/test-hindex.c +index af06be5fcc..cc2b1b8bd9 100644 +--- a/tests/test-hindex.c ++++ b/tests/test-hindex.c +@@ -265,6 +265,43 @@ test_hindex_for_each_safe(hash_func *hash) + i = 0; + n_remaining = n; + HINDEX_FOR_EACH_SAFE (e, next, node, &hindex) { ++ if (hindex_next(&hindex, &e->node) == NULL) { ++ assert(next == NULL); ++ } else { ++ assert(&next->node == hindex_next(&hindex, &e->node)); ++ } ++ assert(i < n); ++ if (pattern & (1ul << e->value)) { ++ size_t j; ++ hindex_remove(&hindex, &e->node); ++ for (j = 0; ; j++) { ++ assert(j < n_remaining); ++ if (values[j] == e->value) { ++ values[j] = values[--n_remaining]; ++ break; ++ } ++ } ++ } ++ check_hindex(&hindex, values, n_remaining, hash); ++ i++; ++ } ++ assert(i == n); ++ assert(next == NULL); ++ ++ for (i = 0; i < n; i++) { ++ if (pattern & (1ul << i)) { ++ n_remaining++; ++ } ++ } ++ assert(n == n_remaining); ++ hindex_destroy(&hindex); ++ ++ /* Test short version (without the next variable). */ ++ make_hindex(&hindex, elements, values, n, hash); ++ ++ i = 0; ++ n_remaining = n; ++ HINDEX_FOR_EACH_SAFE (e, node, &hindex) { + assert(i < n); + if (pattern & (1ul << e->value)) { + size_t j; +diff --git a/tests/test-hmap.c b/tests/test-hmap.c +index 9259b0b3fc..e50c7c3807 100644 +--- a/tests/test-hmap.c ++++ b/tests/test-hmap.c +@@ -62,6 +62,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n, + hmap_values[i++] = e->value; + } + assert(i == n); ++ assert(e == NULL); + + memcpy(sort_values, values, sizeof *sort_values * n); + qsort(sort_values, n, sizeof *sort_values, compare_ints); +@@ -82,6 +83,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n, + count += e->value == values[i]; + } + assert(count == 1); ++ assert(e == NULL); + } + + /* Check counters. */ +@@ -243,6 +245,44 @@ test_hmap_for_each_safe(hash_func *hash) + i = 0; + n_remaining = n; + HMAP_FOR_EACH_SAFE (e, next, node, &hmap) { ++ if (hmap_next(&hmap, &e->node) == NULL) { ++ assert(next == NULL); ++ } else { ++ assert(&next->node == hmap_next(&hmap, &e->node)); ++ } ++ assert(i < n); ++ if (pattern & (1ul << e->value)) { ++ size_t j; ++ hmap_remove(&hmap, &e->node); ++ for (j = 0; ; j++) { ++ assert(j < n_remaining); ++ if (values[j] == e->value) { ++ values[j] = values[--n_remaining]; ++ break; ++ } ++ } ++ } ++ check_hmap(&hmap, values, n_remaining, hash); ++ i++; ++ } ++ assert(i == n); ++ assert(next == NULL); ++ assert(e == NULL); ++ ++ for (i = 0; i < n; i++) { ++ if (pattern & (1ul << i)) { ++ n_remaining++; ++ } ++ } ++ assert(n == n_remaining); ++ hmap_destroy(&hmap); ++ ++ /* Test short version (without next variable). */ ++ make_hmap(&hmap, elements, values, n, hash); ++ ++ i = 0; ++ n_remaining = n; ++ HMAP_FOR_EACH_SAFE (e, node, &hmap) { + assert(i < n); + if (pattern & (1ul << e->value)) { + size_t j; +@@ -259,6 +299,7 @@ test_hmap_for_each_safe(hash_func *hash) + i++; + } + assert(i == n); ++ assert(e == NULL); + + for (i = 0; i < n; i++) { + if (pattern & (1ul << i)) { +@@ -308,6 +349,7 @@ test_hmap_for_each_pop(hash_func *hash) + i++; + } + assert(i == n); ++ assert(e == NULL); + + hmap_destroy(&hmap); + } +diff --git a/tests/test-list.c b/tests/test-list.c +index 6f1fb059bc..2c6c444488 100644 +--- a/tests/test-list.c ++++ b/tests/test-list.c +@@ -61,7 +61,7 @@ check_list(struct ovs_list *list, const int values[], size_t n) + assert(e->value == values[i]); + i++; + } +- assert(&e->node == list); ++ assert(e == NULL); + assert(i == n); + + i = 0; +@@ -70,7 +70,7 @@ check_list(struct ovs_list *list, const int values[], size_t n) + assert(e->value == values[n - i - 1]); + i++; + } +- assert(&e->node == list); ++ assert(e == NULL); + assert(i == n); + + assert(ovs_list_is_empty(list) == !n); +@@ -135,6 +135,13 @@ test_list_for_each_safe(void) + values_idx = 0; + n_remaining = n; + LIST_FOR_EACH_SAFE (e, next, node, &list) { ++ /* "next" is valid as long as it's not pointing to &list. */ ++ if (&e->node == list.prev) { ++ assert(next == NULL); ++ } else { ++ assert(&next->node == e->node.next); ++ } ++ + assert(i < n); + if (pattern & (1ul << i)) { + ovs_list_remove(&e->node); +@@ -148,7 +155,8 @@ test_list_for_each_safe(void) + i++; + } + assert(i == n); +- assert(&e->node == &list); ++ assert(e == NULL); ++ assert(next == NULL); + + for (i = 0; i < n; i++) { + if (pattern & (1ul << i)) { +@@ -156,6 +164,35 @@ test_list_for_each_safe(void) + } + } + assert(n == n_remaining); ++ ++ /* Test short version (without next variable). */ ++ make_list(&list, elements, values, n); ++ ++ i = 0; ++ values_idx = 0; ++ n_remaining = n; ++ LIST_FOR_EACH_SAFE (e, node, &list) { ++ assert(i < n); ++ if (pattern & (1ul << i)) { ++ ovs_list_remove(&e->node); ++ n_remaining--; ++ memmove(&values[values_idx], &values[values_idx + 1], ++ sizeof *values * (n_remaining - values_idx)); ++ } else { ++ values_idx++; ++ } ++ ++ check_list(&list, values, n_remaining); ++ i++; ++ } ++ assert(i == n); ++ assert(e == NULL); ++ ++ for (i = 0; i < n; i++) { ++ if (pattern & (1ul << i)) { ++ n_remaining++; ++ } ++ } + } + } + } +diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at +index 57589758f4..c63344196b 100644 +--- a/tests/tunnel-push-pop.at ++++ b/tests/tunnel-push-pop.at +@@ -546,6 +546,28 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port [[37]]' | sort], [0], [dnl + port 7: rx pkts=5, bytes=434, drop=?, errs=?, frame=?, over=?, crc=? + ]) + ++dnl Send out packets received from L3GRE tunnel back to L3GRE tunnel ++AT_CHECK([ovs-ofctl del-flows int-br]) ++AT_CHECK([ovs-ofctl add-flow int-br "in_port=7,actions=set_field:3->in_port,7"]) ++AT_CHECK([ovs-vsctl -- set Interface br0 options:pcap=br0.pcap]) ++ ++AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) ++AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) ++ ++ovs-appctl time/warp 1000 ++ ++AT_CHECK([ovs-pcap p0.pcap > p0.pcap.txt 2>&1]) ++AT_CHECK([tail -6 p0.pcap.txt], [0], [dnl ++aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 ++001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 ++aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 ++001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 ++aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 ++001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 ++]) ++ ++ + dnl Check decapsulation of Geneve packet with options + AT_CAPTURE_FILE([ofctl_monitor.log]) + AT_CHECK([ovs-ofctl monitor int-br 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) +@@ -565,8 +587,8 @@ icmp,vlan_tci=0x0000,dl_src=be:b6:f4:e1:49:4a,dl_dst=fe:71:d8:83:72:4f,nw_src=30 + AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], [0], [dnl + port 5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? + ]) +-AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl +-tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=2,rule_cookie=0,controller_id=0,max_len=65535)) ++AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)' | sed -e 's/recirc_id=[[0-9]]*/recirc_id=/g'], [0], [dnl ++tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=,rule_cookie=0,controller_id=0,max_len=65535)) + ]) + + dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated +@@ -842,3 +864,54 @@ Datapath actions: 7 + + OVS_VSWITCHD_STOP + AT_CLEANUP ++ ++AT_SETUP([tunnel_push_pop - VXLAN access port]) ++ ++dnl Create bridge that has a MAC address. ++OVS_VSWITCHD_START([set bridge br0 datapath_type=dummy dnl ++ -- set Interface br0 other-config:hwaddr=aa:55:aa:55:00:00]) ++AT_CHECK([ovs-vsctl add-port br0 p8 dnl ++ -- set Interface p8 type=dummy ofport_request=8]) ++ ++dnl Create another bridge. ++AT_CHECK([ovs-vsctl add-br ovs-tun0 -- set bridge ovs-tun0 datapath_type=dummy]) ++ ++dnl Add VXLAN port to this bridge. ++AT_CHECK([ovs-vsctl add-port ovs-tun0 tun0 dnl ++ -- set int tun0 type=vxlan options:remote_ip=10.0.0.11 dnl ++ -- add-port ovs-tun0 p7 dnl ++ -- set interface p7 type=dummy ofport_request=7]) ++ ++dnl Set VLAN tags, so that br0 and its port p8 have the same tag, ++dnl but ovs-tun0's port p7 has a different tag. ++AT_CHECK([ovs-vsctl set port p8 tag=42 dnl ++ -- set port br0 tag=42 dnl ++ -- set port p7 tag=200]) ++ ++dnl Set IP address and route for br0. ++AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 10.0.0.2/24], [0], [OK ++]) ++AT_CHECK([ovs-appctl ovs/route/add 10.0.0.11/24 br0], [0], [OK ++]) ++ ++dnl Send an ARP reply to port b8 on br0, so that packets will be forwarded ++dnl to learned port. ++AT_CHECK([ovs-ofctl add-flow br0 action=normal]) ++ ++AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),dnl ++ eth(src=aa:55:aa:66:00:00,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl ++ arp(sip=10.0.0.11,tip=10.0.0.2,op=2,sha=aa:55:aa:66:00:00,tha=00:00:00:00:00:00)']) ++ ++AT_CHECK([ovs-appctl ofproto/trace ovs-tun0 in_port=p7], [0], [stdout]) ++AT_CHECK([tail -2 stdout], [0], [dnl ++Megaflow: recirc_id=0,eth,in_port=7,dl_src=00:00:00:00:00:00,dnl ++dl_dst=00:00:00:00:00:00,dl_type=0x0000 ++Datapath actions: push_vlan(vid=200,pcp=0),1,clone(tnl_push(tnl_port(4789),dnl ++header(size=50,type=4,eth(dst=aa:55:aa:66:00:00,src=aa:55:aa:55:00:00,dnl ++dl_type=0x0800),ipv4(src=10.0.0.2,dst=10.0.0.11,proto=17,tos=0,ttl=64,dnl ++frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x0)),dnl ++out_port(100)),8) ++]) ++ ++OVS_VSWITCHD_STOP ++AT_CLEANUP +diff --git a/tests/tunnel.at b/tests/tunnel.at +index b8ae7caa9b..fd482aa872 100644 +--- a/tests/tunnel.at ++++ b/tests/tunnel.at +@@ -126,7 +126,7 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl + AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"]) + + AT_CHECK([ovs-appctl dpctl/dump-flows | tail -1], [0], [dnl +-tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 ++tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 + ]) + + OVS_VSWITCHD_STOP +diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c +index ede7f1e61a..6771973ae9 100644 +--- a/utilities/ovs-ofctl.c ++++ b/utilities/ovs-ofctl.c +@@ -730,12 +730,12 @@ static void + bundle_print_errors(struct ovs_list *errors, struct ovs_list *requests, + const char *vconn_name) + { +- struct ofpbuf *error, *next; ++ struct ofpbuf *error; + struct ofpbuf *bmsg; + + INIT_CONTAINER(bmsg, requests, list_node); + +- LIST_FOR_EACH_SAFE (error, next, list_node, errors) { ++ LIST_FOR_EACH_SAFE (error, list_node, errors) { + const struct ofp_header *error_oh = error->data; + ovs_be32 error_xid = error_oh->xid; + enum ofperr ofperr; +diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c +index 37cc72d401..1032089fc2 100644 +--- a/utilities/ovs-vsctl.c ++++ b/utilities/ovs-vsctl.c +@@ -1100,14 +1100,14 @@ cmd_emer_reset(struct ctl_context *ctx) + const struct ovsrec_bridge *br; + const struct ovsrec_port *port; + const struct ovsrec_interface *iface; +- const struct ovsrec_mirror *mirror, *next_mirror; +- const struct ovsrec_controller *ctrl, *next_ctrl; +- const struct ovsrec_manager *mgr, *next_mgr; +- const struct ovsrec_netflow *nf, *next_nf; +- const struct ovsrec_ssl *ssl, *next_ssl; +- const struct ovsrec_sflow *sflow, *next_sflow; +- const struct ovsrec_ipfix *ipfix, *next_ipfix; +- const struct ovsrec_flow_sample_collector_set *fscset, *next_fscset; ++ const struct ovsrec_mirror *mirror; ++ const struct ovsrec_controller *ctrl; ++ const struct ovsrec_manager *mgr; ++ const struct ovsrec_netflow *nf; ++ const struct ovsrec_ssl *ssl; ++ const struct ovsrec_sflow *sflow; ++ const struct ovsrec_ipfix *ipfix; ++ const struct ovsrec_flow_sample_collector_set *fscset; + + /* Reset the Open_vSwitch table. */ + ovsrec_open_vswitch_set_manager_options(vsctl_ctx->ovs, NULL, 0); +@@ -1145,35 +1145,35 @@ cmd_emer_reset(struct ctl_context *ctx) + ovsrec_interface_set_ingress_policing_burst(iface, 0); + } + +- OVSREC_MIRROR_FOR_EACH_SAFE (mirror, next_mirror, idl) { ++ OVSREC_MIRROR_FOR_EACH_SAFE (mirror, idl) { + ovsrec_mirror_delete(mirror); + } + +- OVSREC_CONTROLLER_FOR_EACH_SAFE (ctrl, next_ctrl, idl) { ++ OVSREC_CONTROLLER_FOR_EACH_SAFE (ctrl, idl) { + ovsrec_controller_delete(ctrl); + } + +- OVSREC_MANAGER_FOR_EACH_SAFE (mgr, next_mgr, idl) { ++ OVSREC_MANAGER_FOR_EACH_SAFE (mgr, idl) { + ovsrec_manager_delete(mgr); + } + +- OVSREC_NETFLOW_FOR_EACH_SAFE (nf, next_nf, idl) { ++ OVSREC_NETFLOW_FOR_EACH_SAFE (nf, idl) { + ovsrec_netflow_delete(nf); + } + +- OVSREC_SSL_FOR_EACH_SAFE (ssl, next_ssl, idl) { ++ OVSREC_SSL_FOR_EACH_SAFE (ssl, idl) { + ovsrec_ssl_delete(ssl); + } + +- OVSREC_SFLOW_FOR_EACH_SAFE (sflow, next_sflow, idl) { ++ OVSREC_SFLOW_FOR_EACH_SAFE (sflow, idl) { + ovsrec_sflow_delete(sflow); + } + +- OVSREC_IPFIX_FOR_EACH_SAFE (ipfix, next_ipfix, idl) { ++ OVSREC_IPFIX_FOR_EACH_SAFE (ipfix, idl) { + ovsrec_ipfix_delete(ipfix); + } + +- OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, next_fscset, idl) { ++ OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, idl) { + ovsrec_flow_sample_collector_set_delete(fscset); + } + +@@ -1510,13 +1510,13 @@ cmd_add_br(struct ctl_context *ctx) + static void + del_port(struct vsctl_context *vsctl_ctx, struct vsctl_port *port) + { +- struct vsctl_iface *iface, *next_iface; ++ struct vsctl_iface *iface; + + bridge_delete_port((port->bridge->parent + ? port->bridge->parent->br_cfg + : port->bridge->br_cfg), port->port_cfg); + +- LIST_FOR_EACH_SAFE (iface, next_iface, ifaces_node, &port->ifaces) { ++ LIST_FOR_EACH_SAFE (iface, ifaces_node, &port->ifaces) { + del_cached_iface(vsctl_ctx, iface); + } + del_cached_port(vsctl_ctx, port); +@@ -1525,19 +1525,19 @@ del_port(struct vsctl_context *vsctl_ctx, struct vsctl_port *port) + static void + del_bridge(struct vsctl_context *vsctl_ctx, struct vsctl_bridge *br) + { +- struct vsctl_bridge *child, *next_child; +- struct vsctl_port *port, *next_port; +- const struct ovsrec_flow_sample_collector_set *fscset, *next_fscset; ++ struct vsctl_bridge *child; ++ struct vsctl_port *port; ++ const struct ovsrec_flow_sample_collector_set *fscset; + +- HMAP_FOR_EACH_SAFE (child, next_child, children_node, &br->children) { ++ HMAP_FOR_EACH_SAFE (child, children_node, &br->children) { + del_bridge(vsctl_ctx, child); + } + +- LIST_FOR_EACH_SAFE (port, next_port, ports_node, &br->ports) { ++ LIST_FOR_EACH_SAFE (port, ports_node, &br->ports) { + del_port(vsctl_ctx, port); + } + +- OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, next_fscset, ++ OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, + vsctl_ctx->base.idl) { + if (fscset->bridge == br->br_cfg) { + ovsrec_flow_sample_collector_set_delete(fscset); +diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c +index 5223aa8970..e328d8ead1 100644 +--- a/vswitchd/bridge.c ++++ b/vswitchd/bridge.c +@@ -543,13 +543,13 @@ bridge_exit(bool delete_datapath) + if_notifier_destroy(ifnotifier); + seq_destroy(ifaces_changed); + +- struct datapath *dp, *next; +- HMAP_FOR_EACH_SAFE (dp, next, node, &all_datapaths) { ++ struct datapath *dp; ++ HMAP_FOR_EACH_SAFE (dp, node, &all_datapaths) { + datapath_destroy(dp); + } + +- struct bridge *br, *next_br; +- HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) { ++ struct bridge *br; ++ HMAP_FOR_EACH_SAFE (br, node, &all_bridges) { + bridge_destroy(br, delete_datapath); + } + +@@ -716,8 +716,8 @@ static void + datapath_destroy(struct datapath *dp) + { + if (dp) { +- struct ct_zone *ct_zone, *next; +- HMAP_FOR_EACH_SAFE (ct_zone, next, node, &dp->ct_zones) { ++ struct ct_zone *ct_zone; ++ HMAP_FOR_EACH_SAFE (ct_zone, node, &dp->ct_zones) { + ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone_id); + ct_zone_remove_and_destroy(dp, ct_zone); + } +@@ -733,7 +733,7 @@ datapath_destroy(struct datapath *dp) + static void + ct_zones_reconfigure(struct datapath *dp, struct ovsrec_datapath *dp_cfg) + { +- struct ct_zone *ct_zone, *next; ++ struct ct_zone *ct_zone; + + /* Add new 'ct_zone's or update existing 'ct_zone's based on the database + * state. */ +@@ -760,7 +760,7 @@ ct_zones_reconfigure(struct datapath *dp, struct ovsrec_datapath *dp_cfg) + } + + /* Purge 'ct_zone's no longer found in the database. */ +- HMAP_FOR_EACH_SAFE (ct_zone, next, node, &dp->ct_zones) { ++ HMAP_FOR_EACH_SAFE (ct_zone, node, &dp->ct_zones) { + if (ct_zone->last_used != idl_seqno) { + ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone_id); + ct_zone_remove_and_destroy(dp, ct_zone); +@@ -788,7 +788,7 @@ dp_capability_reconfigure(struct datapath *dp, + static void + datapath_reconfigure(const struct ovsrec_open_vswitch *cfg) + { +- struct datapath *dp, *next; ++ struct datapath *dp; + + /* Add new 'datapath's or update existing ones. */ + for (size_t i = 0; i < cfg->n_datapaths; i++) { +@@ -805,7 +805,7 @@ datapath_reconfigure(const struct ovsrec_open_vswitch *cfg) + } + + /* Purge deleted 'datapath's. */ +- HMAP_FOR_EACH_SAFE (dp, next, node, &all_datapaths) { ++ HMAP_FOR_EACH_SAFE (dp, node, &all_datapaths) { + if (dp->last_used != idl_seqno) { + datapath_destroy(dp); + } +@@ -816,7 +816,7 @@ static void + bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) + { + struct sockaddr_in *managers; +- struct bridge *br, *next; ++ struct bridge *br; + int sflow_bridge_number; + size_t n_managers; + +@@ -875,7 +875,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) + * - Create ofprotos that are missing. + * + * - Add ports that are missing. */ +- HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) { ++ HMAP_FOR_EACH_SAFE (br, node, &all_bridges) { + if (!br->ofproto) { + int error; + +@@ -1020,7 +1020,7 @@ bridge_delete_or_reconfigure_ports(struct bridge *br) + struct ofproto_port_dump dump; + + struct sset ofproto_ports; +- struct port *port, *port_next; ++ struct port *port; + + /* List of "ofp_port"s to delete. We make a list instead of deleting them + * right away because ofproto implementations aren't necessarily able to +@@ -1132,10 +1132,10 @@ bridge_delete_or_reconfigure_ports(struct bridge *br) + * device destroyed via "tunctl -d", a physical Ethernet device + * whose module was just unloaded via "rmmod", or a virtual NIC for a + * VM whose VM was just terminated. */ +- HMAP_FOR_EACH_SAFE (port, port_next, hmap_node, &br->ports) { +- struct iface *iface, *iface_next; ++ HMAP_FOR_EACH_SAFE (port, hmap_node, &br->ports) { ++ struct iface *iface; + +- LIST_FOR_EACH_SAFE (iface, iface_next, port_elem, &port->ifaces) { ++ LIST_FOR_EACH_SAFE (iface, port_elem, &port->ifaces) { + if (!sset_contains(&ofproto_ports, iface->name)) { + iface_destroy__(iface); + } +@@ -1967,7 +1967,7 @@ port_is_bond_fake_iface(const struct port *port) + static void + add_del_bridges(const struct ovsrec_open_vswitch *cfg) + { +- struct bridge *br, *next; ++ struct bridge *br; + struct shash_node *node; + struct shash new_br; + size_t i; +@@ -1993,7 +1993,7 @@ add_del_bridges(const struct ovsrec_open_vswitch *cfg) + + /* Get rid of deleted bridges or those whose types have changed. + * Update 'cfg' of bridges that still exist. */ +- HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) { ++ HMAP_FOR_EACH_SAFE (br, node, &all_bridges) { + br->cfg = shash_find_data(&new_br, br->name); + if (!br->cfg || strcmp(br->type, ofproto_normalize_type( + br->cfg->datapath_type))) { +@@ -3266,13 +3266,13 @@ bridge_run(void) + + if (ovsdb_idl_is_lock_contended(idl)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); +- struct bridge *br, *next_br; ++ struct bridge *br; + + VLOG_ERR_RL(&rl, "another ovs-vswitchd process is running, " + "disabling this process (pid %ld) until it goes away", + (long int) getpid()); + +- HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) { ++ HMAP_FOR_EACH_SAFE (br, node, &all_bridges) { + bridge_destroy(br, false); + } + /* Since we will not be running system_stats_run() in this process +@@ -3594,13 +3594,13 @@ static void + bridge_destroy(struct bridge *br, bool del) + { + if (br) { +- struct mirror *mirror, *next_mirror; +- struct port *port, *next_port; ++ struct mirror *mirror; ++ struct port *port; + +- HMAP_FOR_EACH_SAFE (port, next_port, hmap_node, &br->ports) { ++ HMAP_FOR_EACH_SAFE (port, hmap_node, &br->ports) { + port_destroy(port); + } +- HMAP_FOR_EACH_SAFE (mirror, next_mirror, hmap_node, &br->mirrors) { ++ HMAP_FOR_EACH_SAFE (mirror, hmap_node, &br->mirrors) { + mirror_destroy(mirror); + } + +@@ -3746,11 +3746,11 @@ static void + bridge_del_ports(struct bridge *br, const struct shash *wanted_ports) + { + struct shash_node *port_node; +- struct port *port, *next; ++ struct port *port; + + /* Get rid of deleted ports. + * Get rid of deleted interfaces on ports that still exist. */ +- HMAP_FOR_EACH_SAFE (port, next, hmap_node, &br->ports) { ++ HMAP_FOR_EACH_SAFE (port, hmap_node, &br->ports) { + port->cfg = shash_find_data(wanted_ports, port->name); + if (!port->cfg) { + port_destroy(port); +@@ -4211,7 +4211,7 @@ bridge_configure_aa(struct bridge *br) + const struct ovsdb_datum *mc; + struct ovsrec_autoattach *auto_attach = br->cfg->auto_attach; + struct aa_settings aa_s; +- struct aa_mapping *m, *next; ++ struct aa_mapping *m; + size_t i; + + if (!auto_attach) { +@@ -4227,7 +4227,7 @@ bridge_configure_aa(struct bridge *br) + mc = ovsrec_autoattach_get_mappings(auto_attach, + OVSDB_TYPE_INTEGER, + OVSDB_TYPE_INTEGER); +- HMAP_FOR_EACH_SAFE (m, next, hmap_node, &br->mappings) { ++ HMAP_FOR_EACH_SAFE (m, hmap_node, &br->mappings) { + union ovsdb_atom atom; + + atom.integer = m->isid; +@@ -4341,12 +4341,12 @@ static void + bridge_aa_refresh_queued(struct bridge *br) + { + struct ovs_list *list = xmalloc(sizeof *list); +- struct bridge_aa_vlan *node, *next; ++ struct bridge_aa_vlan *node; + + ovs_list_init(list); + ofproto_aa_vlan_get_queued(br->ofproto, list); + +- LIST_FOR_EACH_SAFE (node, next, list_node, list) { ++ LIST_FOR_EACH_SAFE (node, list_node, list) { + struct port *port; + + VLOG_INFO("ifname=%s, vlan=%u, oper=%u", node->port_name, node->vlan, +@@ -4387,7 +4387,7 @@ port_create(struct bridge *br, const struct ovsrec_port *cfg) + static void + port_del_ifaces(struct port *port) + { +- struct iface *iface, *next; ++ struct iface *iface; + struct sset new_ifaces; + size_t i; + +@@ -4398,7 +4398,7 @@ port_del_ifaces(struct port *port) + } + + /* Get rid of deleted interfaces. */ +- LIST_FOR_EACH_SAFE (iface, next, port_elem, &port->ifaces) { ++ LIST_FOR_EACH_SAFE (iface, port_elem, &port->ifaces) { + if (!sset_contains(&new_ifaces, iface->name)) { + iface_destroy(iface); + } +@@ -4412,13 +4412,13 @@ port_destroy(struct port *port) + { + if (port) { + struct bridge *br = port->bridge; +- struct iface *iface, *next; ++ struct iface *iface; + + if (br->ofproto) { + ofproto_bundle_unregister(br->ofproto, port); + } + +- LIST_FOR_EACH_SAFE (iface, next, port_elem, &port->ifaces) { ++ LIST_FOR_EACH_SAFE (iface, port_elem, &port->ifaces) { + iface_destroy__(iface); + } + +@@ -5013,12 +5013,12 @@ bridge_configure_mirrors(struct bridge *br) + { + const struct ovsdb_datum *mc; + unsigned long *flood_vlans; +- struct mirror *m, *next; ++ struct mirror *m; + size_t i; + + /* Get rid of deleted mirrors. */ + mc = ovsrec_bridge_get_mirrors(br->cfg, OVSDB_TYPE_UUID); +- HMAP_FOR_EACH_SAFE (m, next, hmap_node, &br->mirrors) { ++ HMAP_FOR_EACH_SAFE (m, hmap_node, &br->mirrors) { + union ovsdb_atom atom; + + atom.uuid = m->uuid; +diff --git a/vtep/vtep-ctl.c b/vtep/vtep-ctl.c +index ab552457d9..99c4adcd53 100644 +--- a/vtep/vtep-ctl.c ++++ b/vtep/vtep-ctl.c +@@ -801,16 +801,16 @@ vtep_ctl_context_invalidate_cache(struct ctl_context *ctx) + + SHASH_FOR_EACH (node, &vtepctl_ctx->lswitches) { + struct vtep_ctl_lswitch *ls = node->data; +- struct shash_node *node2, *next_node2; ++ struct shash_node *node2; + + shash_destroy(&ls->ucast_local); + shash_destroy(&ls->ucast_remote); + +- SHASH_FOR_EACH_SAFE (node2, next_node2, &ls->mcast_local) { ++ SHASH_FOR_EACH_SAFE (node2, &ls->mcast_local) { + struct vtep_ctl_mcast_mac *mcast_mac = node2->data; +- struct vtep_ctl_ploc *ploc, *next_ploc; ++ struct vtep_ctl_ploc *ploc; + +- LIST_FOR_EACH_SAFE (ploc, next_ploc, locators_node, ++ LIST_FOR_EACH_SAFE (ploc, locators_node, + &mcast_mac->locators) { + free(ploc); + } +@@ -818,11 +818,11 @@ vtep_ctl_context_invalidate_cache(struct ctl_context *ctx) + } + shash_destroy(&ls->mcast_local); + +- SHASH_FOR_EACH_SAFE (node2, next_node2, &ls->mcast_remote) { ++ SHASH_FOR_EACH_SAFE (node2, &ls->mcast_remote) { + struct vtep_ctl_mcast_mac *mcast_mac = node2->data; +- struct vtep_ctl_ploc *ploc, *next_ploc; ++ struct vtep_ctl_ploc *ploc; + +- LIST_FOR_EACH_SAFE (ploc, next_ploc, locators_node, ++ LIST_FOR_EACH_SAFE (ploc, locators_node, + &mcast_mac->locators) { + free(ploc); + } +@@ -1229,9 +1229,9 @@ del_port(struct vtep_ctl_context *vtepctl_ctx, struct vtep_ctl_port *port) + static void + del_pswitch(struct vtep_ctl_context *vtepctl_ctx, struct vtep_ctl_pswitch *ps) + { +- struct vtep_ctl_port *port, *next_port; ++ struct vtep_ctl_port *port; + +- LIST_FOR_EACH_SAFE (port, next_port, ports_node, &ps->ports) { ++ LIST_FOR_EACH_SAFE (port, ports_node, &ps->ports) { + del_port(vtepctl_ctx, port); + } + diff --git a/SPECS/openvswitch2.16.spec b/SPECS/openvswitch2.16.spec deleted file mode 100644 index beb2e3b..0000000 --- a/SPECS/openvswitch2.16.spec +++ /dev/null @@ -1,1910 +0,0 @@ -# Copyright (C) 2009, 2010, 2013, 2014 Nicira Networks, Inc. -# -# Copying and distribution of this file, with or without modification, -# are permitted in any medium without royalty provided the copyright -# notice and this notice are preserved. This file is offered as-is, -# without warranty of any kind. -# -# If tests have to be skipped while building, specify the '--without check' -# option. For example: -# rpmbuild -bb --without check rhel/openvswitch-fedora.spec - -# This defines the base package name's version. - -%define pkgname openvswitch2.16 - - -%if 0%{?commit:1} -%global shortcommit %(c=%{commit}; echo ${c:0:7}) -%endif - -# Enable PIE, bz#955181 -%global _hardened_build 1 - -# RHEL-7 doesn't define _rundir macro yet -# Fedora 15 onwards uses /run as _rundir -%if 0%{!?_rundir:1} -%define _rundir /run -%endif - -# FIXME Test "STP - flush the fdb and mdb when topology changed" fails on s390x -# FIXME 2 tests fails on ppc64le. They will be hopefully fixed before official 2.11 -%ifarch %{ix86} x86_64 aarch64 -%bcond_without check -%else -%bcond_with check -%endif -# option to run kernel datapath tests, requires building as root! -%bcond_with check_datapath_kernel -# option to build with libcap-ng, needed for running OVS as regular user -%bcond_without libcapng -# option to build with ipsec support -%bcond_without ipsec - -# Build python2 (that provides python) and python3 subpackages on Fedora -# Build only python3 (that provides python) subpackage on RHEL8 -# Build only python subpackage on RHEL7 -%if 0%{?rhel} > 7 || 0%{?fedora} -# On RHEL8 Sphinx is included in buildroot -%global external_sphinx 1 -%else -# Don't use external sphinx (RHV doesn't have optional repositories enabled) -%global external_sphinx 0 -%endif - -Name: %{pkgname} -Summary: Open vSwitch -Group: System Environment/Daemons daemon/database/utilities -URL: http://www.openvswitch.org/ -Version: 2.16.0 -Release: 81%{?dist} - -# Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the -# lib/sflow*.[ch] files are SISSL -# datapath/ is GPLv2 (although not built into any of the binary packages) -License: ASL 2.0 and LGPLv2+ and SISSL - -%define dpdkver 20.11.1 -%define dpdkdir dpdk -%define dpdksver %(echo %{dpdkver} | cut -d. -f-2) -# NOTE: DPDK does not currently build for s390x -# DPDK on aarch64 is not stable enough to be enabled in FDP -%if 0%{?rhel} > 7 || 0%{?fedora} -%define dpdkarches x86_64 ppc64le -%else -%define dpdkarches -%endif - -%if 0%{?commit:1} -Source: https://github.com/openvswitch/ovs/archive/%{commit}.tar.gz#/openvswitch-%{commit}.tar.gz -%else -Source: https://github.com/openvswitch/ovs/archive/v%{version}.tar.gz#/openvswitch-%{version}.tar.gz -%endif -Source10: https://fast.dpdk.org/rel/dpdk-%{dpdkver}.tar.xz - -%define docutilsver 0.12 -%define pygmentsver 1.4 -%define sphinxver 1.2.3 -Source100: https://pypi.io/packages/source/d/docutils/docutils-%{docutilsver}.tar.gz -Source101: https://pypi.io/packages/source/P/Pygments/Pygments-%{pygmentsver}.tar.gz -Source102: https://pypi.io/packages/source/S/Sphinx/Sphinx-%{sphinxver}.tar.gz - -Patch: openvswitch-%{version}.patch - -# The DPDK is designed to optimize througput of network traffic using, among -# other techniques, carefully crafted assembly instructions. As such it -# needs extensive work to port it to other architectures. -ExclusiveArch: x86_64 aarch64 ppc64le s390x - -# Do not enable this otherwise YUM will break on any upgrade. -# Provides: openvswitch -Conflicts: openvswitch < 2.16 -Conflicts: openvswitch-dpdk < 2.16 -Conflicts: openvswitch2.10 -Conflicts: openvswitch2.11 -Conflicts: openvswitch2.12 -Conflicts: openvswitch2.13 -Conflicts: openvswitch2.14 -Conflicts: openvswitch2.15 - -# FIXME Sphinx is used to generate some manpages, unfortunately, on RHEL, it's -# in the -optional repository and so we can't require it directly since RHV -# doesn't have the -optional repository enabled and so TPS fails -%if %{external_sphinx} -BuildRequires: python3-sphinx -%else -# Sphinx dependencies -BuildRequires: python-devel -BuildRequires: python-setuptools -#BuildRequires: python2-docutils -BuildRequires: python-jinja2 -BuildRequires: python-nose -#BuildRequires: python2-pygments -# docutils dependencies -BuildRequires: python-imaging -# pygments dependencies -BuildRequires: python-nose -%endif - -BuildRequires: gcc gcc-c++ make -BuildRequires: autoconf automake libtool -BuildRequires: systemd-units openssl openssl-devel -BuildRequires: python3-devel python3-setuptools -BuildRequires: desktop-file-utils -BuildRequires: groff-base graphviz -BuildRequires: unbound-devel -# make check dependencies -BuildRequires: procps-ng -%if 0%{?rhel} > 7 || 0%{?fedora} -BuildRequires: python3-pyOpenSSL -%endif -%if %{with check_datapath_kernel} -BuildRequires: nmap-ncat -# would be useful but not available in RHEL or EPEL -#BuildRequires: pyftpdlib -%endif - -%if %{with libcapng} -BuildRequires: libcap-ng libcap-ng-devel -%endif - -%ifarch %{dpdkarches} -BuildRequires: meson -# DPDK driver dependencies -BuildRequires: zlib-devel numactl-devel -%ifarch x86_64 -BuildRequires: rdma-core-devel >= 15 libmnl-devel -%endif - -# Required by packaging policy for the bundled DPDK -Provides: bundled(dpdk) = %{dpdkver} -%endif - -Requires: openssl iproute module-init-tools -#Upstream kernel commit 4f647e0a3c37b8d5086214128614a136064110c3 -#Requires: kernel >= 3.15.0-0 -Requires: openvswitch-selinux-extra-policy - -Requires(pre): shadow-utils -Requires(post): /bin/sed -Requires(post): /usr/sbin/usermod -Requires(post): /usr/sbin/groupadd -Requires(post): systemd-units -Requires(preun): systemd-units -Requires(postun): systemd-units -Obsoletes: openvswitch-controller <= 0:2.1.0-1 - -%description -Open vSwitch provides standard network bridging functions and -support for the OpenFlow protocol for remote per-flow control of -traffic. - -%package -n python3-%{pkgname} -Summary: Open vSwitch python3 bindings -License: ASL 2.0 -Requires: %{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} -Provides: python-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} - -%description -n python3-%{pkgname} -Python bindings for the Open vSwitch database - -%package test -Summary: Open vSwitch testing utilities -License: ASL 2.0 -BuildArch: noarch -Requires: python3-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} -Requires: tcpdump - -%description test -Utilities that are useful to diagnose performance and connectivity -issues in Open vSwitch setup. - -%package devel -Summary: Open vSwitch OpenFlow development package (library, headers) -License: ASL 2.0 -Requires: %{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} - -%description devel -This provides shared library, libopenswitch.so and the openvswitch header -files needed to build an external application. - -%if 0%{?rhel} > 7 || 0%{?fedora} > 28 -%package -n network-scripts-%{name} -Summary: Open vSwitch legacy network service support -License: ASL 2.0 -Requires: network-scripts -Supplements: (%{name} and network-scripts) - -%description -n network-scripts-%{name} -This provides the ifup and ifdown scripts for use with the legacy network -service. -%endif - -%if %{with ipsec} -%package ipsec -Summary: Open vSwitch IPsec tunneling support -License: ASL 2.0 -Requires: python3-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} -Requires: libreswan - -%description ipsec -This package provides IPsec tunneling support for OVS tunnels. -%endif - -%prep -%if 0%{?commit:1} -%setup -q -n ovs-%{commit} -a 10 -%else -%setup -q -n ovs-%{version} -a 10 -%endif -%if ! %{external_sphinx} -%if 0%{?commit:1} -%setup -n ovs-%{commit} -q -D -T -a 100 -a 101 -a 102 -%else -%setup -n ovs-%{version} -q -D -T -a 100 -a 101 -a 102 -%endif -%endif - -mv dpdk-*/ %{dpdkdir}/ - -# FIXME should we propose a way to do that upstream? -sed -ri "/^subdir\('(usertools|app)'\)/d" %{dpdkdir}/meson.build - -%patch -p1 - -%build -# Build Sphinx on RHEL -%if ! %{external_sphinx} -export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}%{_builddir}/pytmp/lib/python" -for x in docutils-%{docutilsver} Pygments-%{pygmentsver} Sphinx-%{sphinxver}; do - pushd "$x" - python2 setup.py install --home %{_builddir}/pytmp - popd -done - -export PATH="$PATH:%{_builddir}/pytmp/bin" -%endif - -./boot.sh - -%ifarch %{dpdkarches} # build dpdk -# Lets build DPDK first -cd %{dpdkdir} - -ENABLED_DRIVERS=( - bus/pci - bus/vdev - mempool/ring - net/failsafe - net/i40e - net/ring - net/vhost - net/virtio - net/tap -) - -%ifarch x86_64 -ENABLED_DRIVERS+=( - bus/vmbus - common/iavf - common/mlx5 - net/bnxt - net/enic - net/iavf - net/ice - net/mlx5 - net/netvsc - net/nfp - net/qede - net/vdev_netvsc -) -%endif - -%ifarch aarch64 x86_64 -ENABLED_DRIVERS+=( - net/e1000 - net/ixgbe -) -%endif - -# Since upstream doesn't have a way -for driver in drivers/*/*/; do - driver=${driver#drivers/} - driver=${driver%/} - [[ " ${ENABLED_DRIVERS[@]} " == *" $driver "* ]] || \ - disable_drivers="${disable_drivers:+$disable_drivers,}"$driver -done - -#CFLAGS="$(echo %{optflags} | sed -e 's:-Wall::g' -e 's:-march=[[:alnum:]]* ::g') -Wformat -fPIC %{_hardening_ldflags}" \ -%set_build_flags -%__meson --prefix=%{_builddir}/dpdk-build \ - --buildtype=plain \ - -Ddisable_drivers="$disable_drivers" \ - -Dmachine=default \ - -Dmax_ethports=1024 \ - -Dmax_numa_nodes=8 \ - -Dtests=false \ - %{_vpath_builddir} -%meson_build -%__meson install -C %{_vpath_builddir} --no-rebuild - -# FIXME currently with LTO enabled OVS tries to link with both static and shared libraries -rm -v %{_builddir}/dpdk-build/%{_lib}/*.so* - -# Generate a list of supported drivers, its hard to tell otherwise. -cat << EOF > README.DPDK-PMDS -DPDK drivers included in this package: - -EOF - -for f in %{_builddir}/dpdk-build/%{_lib}/librte_net_*.a; do - basename ${f} | cut -c12- | cut -d. -f1 | tr [:lower:] [:upper:] -done >> README.DPDK-PMDS - -cat << EOF >> README.DPDK-PMDS - -For further information about the drivers, see -http://dpdk.org/doc/guides-%{dpdksver}/nics/index.html -EOF - -cd - -%endif # build dpdk - -# And now for OVS... -mkdir build-shared build-static -pushd build-shared -ln -s ../configure -%configure \ -%if %{with libcapng} - --enable-libcapng \ -%else - --disable-libcapng \ -%endif - --disable-static \ - --enable-shared \ - --enable-ssl \ - --with-pkidir=%{_sharedstatedir}/openvswitch/pki -make %{?_smp_mflags} -popd -pushd build-static -ln -s ../configure -%ifarch %{dpdkarches} -PKG_CONFIG_PATH=%{_builddir}/dpdk-build/%{_lib}/pkgconfig \ -%endif -%configure \ -%if %{with libcapng} - --enable-libcapng \ -%else - --disable-libcapng \ -%endif - --enable-ssl \ -%ifarch %{dpdkarches} - --with-dpdk=static \ -%endif - --with-pkidir=%{_sharedstatedir}/openvswitch/pki -make %{?_smp_mflags} -popd - -/usr/bin/python3 build-aux/dpdkstrip.py \ - --dpdk \ - < rhel/usr_lib_systemd_system_ovs-vswitchd.service.in \ - > rhel/usr_lib_systemd_system_ovs-vswitchd.service - -%install -rm -rf $RPM_BUILD_ROOT -make -C build-shared install-libLTLIBRARIES DESTDIR=$RPM_BUILD_ROOT -make -C build-static install DESTDIR=$RPM_BUILD_ROOT - -install -d -m 0755 $RPM_BUILD_ROOT%{_rundir}/openvswitch -install -d -m 0750 $RPM_BUILD_ROOT%{_localstatedir}/log/openvswitch -install -d -m 0755 $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch - -install -p -D -m 0644 rhel/usr_lib_udev_rules.d_91-vfio.rules \ - $RPM_BUILD_ROOT%{_udevrulesdir}/91-vfio.rules - -install -p -D -m 0644 \ - rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template \ - $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/openvswitch - -for service in openvswitch ovsdb-server ovs-vswitchd \ - ovs-delete-transient-ports; do - install -p -D -m 0644 \ - rhel/usr_lib_systemd_system_${service}.service \ - $RPM_BUILD_ROOT%{_unitdir}/${service}.service -done - -%if %{with ipsec} -install -p -D -m 0644 rhel/usr_lib_systemd_system_openvswitch-ipsec.service \ - $RPM_BUILD_ROOT%{_unitdir}/openvswitch-ipsec.service -%endif - -install -m 0755 rhel/etc_init.d_openvswitch \ - $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/openvswitch.init - -install -p -D -m 0644 rhel/etc_openvswitch_default.conf \ - $RPM_BUILD_ROOT/%{_sysconfdir}/openvswitch/default.conf - -install -p -D -m 0644 rhel/etc_logrotate.d_openvswitch \ - $RPM_BUILD_ROOT/%{_sysconfdir}/logrotate.d/openvswitch - -install -m 0644 vswitchd/vswitch.ovsschema \ - $RPM_BUILD_ROOT/%{_datadir}/openvswitch/vswitch.ovsschema - -install -d -m 0755 $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ -install -p -m 0755 rhel/etc_sysconfig_network-scripts_ifdown-ovs \ - $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs -install -p -m 0755 rhel/etc_sysconfig_network-scripts_ifup-ovs \ - $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs - -install -d -m 0755 $RPM_BUILD_ROOT%{python3_sitelib} -cp -a $RPM_BUILD_ROOT/%{_datadir}/openvswitch/python/ovstest \ - $RPM_BUILD_ROOT%{python3_sitelib} - -# Build the JSON C extension for the Python lib (#1417738) -pushd python -( -export CPPFLAGS="-I ../include -I ../build-shared/include" -export LDFLAGS="%{__global_ldflags} -L $RPM_BUILD_ROOT%{_libdir}" -%py3_build -%py3_install -[ -f "$RPM_BUILD_ROOT/%{python3_sitearch}/ovs/_json$(python3-config --extension-suffix)" ] -) -popd - -rm -rf $RPM_BUILD_ROOT/%{_datadir}/openvswitch/python/ - -install -d -m 0755 $RPM_BUILD_ROOT/%{_sharedstatedir}/openvswitch - -install -d -m 0755 $RPM_BUILD_ROOT%{_prefix}/lib/firewalld/services/ - -install -p -D -m 0755 \ - rhel/usr_share_openvswitch_scripts_ovs-systemd-reload \ - $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/ovs-systemd-reload - -touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/conf.db -# The db needs special permission as IPsec Pre-shared keys are stored in it. -chmod 0640 $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/conf.db - -touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/system-id.conf - -# remove unpackaged files -rm -f $RPM_BUILD_ROOT/%{_bindir}/ovs-benchmark \ - $RPM_BUILD_ROOT/%{_bindir}/ovs-docker \ - $RPM_BUILD_ROOT/%{_bindir}/ovs-parse-backtrace \ - $RPM_BUILD_ROOT/%{_bindir}/ovs-testcontroller \ - $RPM_BUILD_ROOT/%{_sbindir}/ovs-vlan-bug-workaround \ - $RPM_BUILD_ROOT/%{_mandir}/man1/ovs-benchmark.1* \ - $RPM_BUILD_ROOT/%{_mandir}/man8/ovs-testcontroller.* \ - $RPM_BUILD_ROOT/%{_mandir}/man8/ovs-vlan-bug-workaround.8* - -%if ! %{with ipsec} -rm -f $RPM_BUILD_ROOT/%{_datadir}/openvswitch/scripts/ovs-monitor-ipsec -%endif - -# remove ovn unpackages files -rm -f $RPM_BUILD_ROOT%{_bindir}/ovn* -rm -f $RPM_BUILD_ROOT%{_mandir}/man1/ovn* -rm -f $RPM_BUILD_ROOT%{_mandir}/man5/ovn* -rm -f $RPM_BUILD_ROOT%{_mandir}/man7/ovn* -rm -f $RPM_BUILD_ROOT%{_mandir}/man8/ovn* -rm -f $RPM_BUILD_ROOT%{_datadir}/openvswitch/ovn* -rm -f $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/ovn* -rm -f $RPM_BUILD_ROOT%{_includedir}/ovn/* - -%check -%if %{with check} - pushd build-static - touch resolv.conf - export OVS_RESOLV_CONF=$(pwd)/resolv.conf - if make check TESTSUITEFLAGS='%{_smp_mflags}' || - make check TESTSUITEFLAGS='--recheck'; then :; - else - cat tests/testsuite.log - exit 1 - fi - popd -%endif -%if %{with check_datapath_kernel} - pushd build-static - if make check-kernel RECHECK=yes; then :; - else - cat tests/system-kmod-testsuite.log - exit 1 - fi - popd -%endif - -%clean -rm -rf $RPM_BUILD_ROOT - -%preun -%if 0%{?systemd_preun:1} - %systemd_preun openvswitch.service -%else - if [ $1 -eq 0 ] ; then - # Package removal, not upgrade - /bin/systemctl --no-reload disable openvswitch.service >/dev/null 2>&1 || : - /bin/systemctl stop openvswitch.service >/dev/null 2>&1 || : - fi -%endif - -%pre -getent group openvswitch >/dev/null || groupadd -r openvswitch -getent passwd openvswitch >/dev/null || \ - useradd -r -g openvswitch -d / -s /sbin/nologin \ - -c "Open vSwitch Daemons" openvswitch - -%ifarch %{dpdkarches} - getent group hugetlbfs >/dev/null || groupadd hugetlbfs - usermod -a -G hugetlbfs openvswitch -%endif -exit 0 - -%post -if [ $1 -eq 1 ]; then - sed -i 's:^#OVS_USER_ID=:OVS_USER_ID=:' /etc/sysconfig/openvswitch - -%ifarch %{dpdkarches} - sed -i \ - 's@OVS_USER_ID="openvswitch:openvswitch"@OVS_USER_ID="openvswitch:hugetlbfs"@'\ - /etc/sysconfig/openvswitch -%endif -fi -chown -R openvswitch:openvswitch /etc/openvswitch - -%if 0%{?systemd_post:1} - %systemd_post openvswitch.service -%else - # Package install, not upgrade - if [ $1 -eq 1 ]; then - /bin/systemctl daemon-reload >dev/null || : - fi -%endif - -%postun -%if 0%{?systemd_postun:1} - %systemd_postun openvswitch.service -%else - /bin/systemctl daemon-reload >/dev/null 2>&1 || : -%endif - -%triggerun -- openvswitch < 2.5.0-22.git20160727%{?dist} -# old rpm versions restart the service in postun, but -# due to systemd some preparation is needed. -if systemctl is-active openvswitch >/dev/null 2>&1 ; then - /usr/share/openvswitch/scripts/ovs-ctl stop >/dev/null 2>&1 || : - systemctl daemon-reload >/dev/null 2>&1 || : - systemctl stop openvswitch ovsdb-server ovs-vswitchd >/dev/null 2>&1 || : - systemctl start openvswitch >/dev/null 2>&1 || : -fi -exit 0 - -%files -n python3-%{pkgname} -%{python3_sitearch}/ovs -%{python3_sitearch}/ovs-*.egg-info -%doc LICENSE - -%files test -%{_bindir}/ovs-pcap -%{_bindir}/ovs-tcpdump -%{_bindir}/ovs-tcpundump -%{_mandir}/man1/ovs-pcap.1* -%{_mandir}/man8/ovs-tcpdump.8* -%{_mandir}/man1/ovs-tcpundump.1* -%{_bindir}/ovs-test -%{_bindir}/ovs-vlan-test -%{_bindir}/ovs-l3ping -%{_mandir}/man8/ovs-test.8* -%{_mandir}/man8/ovs-vlan-test.8* -%{_mandir}/man8/ovs-l3ping.8* -%{python3_sitelib}/ovstest - -%files devel -%{_libdir}/*.so -%{_libdir}/pkgconfig/*.pc -%{_includedir}/openvswitch/* -%{_includedir}/openflow/* -%exclude %{_libdir}/*.a -%exclude %{_libdir}/*.la - -%if 0%{?rhel} > 7 || 0%{?fedora} > 28 -%files -n network-scripts-%{name} -%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs -%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs -%endif - -%files -%defattr(-,openvswitch,openvswitch) -%dir %{_sysconfdir}/openvswitch -%{_sysconfdir}/openvswitch/default.conf -%config %ghost %verify(not owner group md5 size mtime) %{_sysconfdir}/openvswitch/conf.db -%ghost %attr(0600,-,-) %verify(not owner group md5 size mtime) %{_sysconfdir}/openvswitch/.conf.db.~lock~ -%config %ghost %{_sysconfdir}/openvswitch/system-id.conf -%defattr(-,root,root) -%config(noreplace) %verify(not md5 size mtime) %{_sysconfdir}/sysconfig/openvswitch -%{_sysconfdir}/bash_completion.d/ovs-appctl-bashcomp.bash -%{_sysconfdir}/bash_completion.d/ovs-vsctl-bashcomp.bash -%config(noreplace) %{_sysconfdir}/logrotate.d/openvswitch -%{_unitdir}/openvswitch.service -%{_unitdir}/ovsdb-server.service -%{_unitdir}/ovs-vswitchd.service -%{_unitdir}/ovs-delete-transient-ports.service -%{_datadir}/openvswitch/scripts/openvswitch.init -%{_datadir}/openvswitch/scripts/ovs-check-dead-ifs -%{_datadir}/openvswitch/scripts/ovs-lib -%{_datadir}/openvswitch/scripts/ovs-save -%{_datadir}/openvswitch/scripts/ovs-vtep -%{_datadir}/openvswitch/scripts/ovs-ctl -%{_datadir}/openvswitch/scripts/ovs-kmod-ctl -%{_datadir}/openvswitch/scripts/ovs-systemd-reload -%config %{_datadir}/openvswitch/vswitch.ovsschema -%config %{_datadir}/openvswitch/vtep.ovsschema -%{_bindir}/ovs-appctl -%{_bindir}/ovs-dpctl -%{_bindir}/ovs-ofctl -%{_bindir}/ovs-vsctl -%{_bindir}/ovsdb-client -%{_bindir}/ovsdb-tool -%{_bindir}/ovs-pki -%{_bindir}/vtep-ctl -%{_libdir}/*.so.* -%{_sbindir}/ovs-vswitchd -%{_sbindir}/ovsdb-server -%{_mandir}/man1/ovsdb-client.1* -%{_mandir}/man1/ovsdb-server.1* -%{_mandir}/man1/ovsdb-tool.1* -%{_mandir}/man5/ovsdb.5* -%{_mandir}/man5/ovsdb-server.5.* -%{_mandir}/man5/ovs-vswitchd.conf.db.5* -%{_mandir}/man5/vtep.5* -%{_mandir}/man7/ovsdb-server.7* -%{_mandir}/man7/ovsdb.7* -%{_mandir}/man7/ovs-actions.7* -%{_mandir}/man7/ovs-fields.7* -%{_mandir}/man8/vtep-ctl.8* -%{_mandir}/man8/ovs-appctl.8* -%{_mandir}/man8/ovs-ctl.8* -%{_mandir}/man8/ovs-dpctl.8* -%{_mandir}/man8/ovs-kmod-ctl.8.* -%{_mandir}/man8/ovs-ofctl.8* -%{_mandir}/man8/ovs-pki.8* -%{_mandir}/man8/ovs-vsctl.8* -%{_mandir}/man8/ovs-vswitchd.8* -%{_mandir}/man8/ovs-parse-backtrace.8* -%{_udevrulesdir}/91-vfio.rules -%doc LICENSE NOTICE README.rst NEWS rhel/README.RHEL.rst -%ifarch %{dpdkarches} -%doc %{dpdkdir}/README.DPDK-PMDS -%attr(750,openvswitch,hugetlbfs) %verify(not owner group) /var/log/openvswitch -%else -%attr(750,openvswitch,openvswitch) %verify(not owner group) /var/log/openvswitch -%endif -/var/lib/openvswitch -%ghost %attr(755,root,root) %verify(not owner group) %{_rundir}/openvswitch -%{_datadir}/openvswitch/bugtool-plugins/ -%{_datadir}/openvswitch/scripts/ovs-bugtool-* -%{_bindir}/ovs-dpctl-top -%{_sbindir}/ovs-bugtool -%{_mandir}/man8/ovs-dpctl-top.8* -%{_mandir}/man8/ovs-bugtool.8* -%if (0%{?rhel} && 0%{?rhel} <= 7) || (0%{?fedora} && 0%{?fedora} < 29) -%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs -%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs -%endif - -%if %{with ipsec} -%files ipsec -%{_datadir}/openvswitch/scripts/ovs-monitor-ipsec -%{_unitdir}/openvswitch-ipsec.service -%endif - -%changelog -* Wed Jun 15 2022 Open vSwitch CI - 2.16.0-81 -- Merging upstream branch-2.16 [RH git: 4e6f367244] - Commit list: - 64f289dc97 Prepare for 2.16.5. - b614d27ebe Set release date for 2.16.4. - - -* Tue Jun 07 2022 Open vSwitch CI - 2.16.0-80 -- Merging upstream branch-2.16 [RH git: 45dcf738b0] - Commit list: - 87922569f3 ofproto-dpif-xlate: Fix internal CT state for non-recirc traffic. - 51aa8dd106 classifier: Adjust segment boundary to execute prerequisite processing. (#2081773) - - -* Tue May 31 2022 Open vSwitch CI - 2.16.0-79 -- Merging upstream branch-2.16 [RH git: c224775aed] - Commit list: - 840c3fcb12 ofproto-dpif: Fix meter use-after-free. - 77c89b0d25 ovs-rcu: Add ovsrcu_barrier. - - -* Thu May 26 2022 Ilya Maximets - 2.16.0-78 -- Merging upstream branch-2.16 [RH git: d7d5f09849] - Commit list: - c8c78a76e5 ovsdb: raft: Fix transaction double commit due to lost leadership. (#2046340) - 2809af022a Revert "odp-util: Always report ODP_FIT_TOO_LITTLE for IGMP." - 90e31552be ofproto-dpif: Trigger revalidation if ct tp changes. - - -* Wed May 25 2022 Open vSwitch CI - 2.16.0-77 -- Merging upstream branch-2.16 [RH git: 3e3d3725d3] - Commit list: - 72bad27674 Carefully release NBL in Windows - - -* Wed May 18 2022 Open vSwitch CI - 2.16.0-76 -- Merging upstream branch-2.16 [RH git: 72426100fe] - Commit list: - 6a304c7866 tests: Properly kill ovsdb test processes. - 44dfae2991 ovs-save: Get highest ofp version error. - 63754ac391 netdev-linux: Properly access 32-bit aligned rtnl_link_stats64 structs. - - -* Wed May 04 2022 Open vSwitch CI - 2.16.0-75 -- Merging upstream branch-2.16 [RH git: 0c22edcd05] - Commit list: - df77b74438 ofproto-dpif-xlate: Remove mirror assert. - c81571d602 netdev-dpdk: Fix tx drops statistic for a down netdev. - - -* Thu Apr 28 2022 Timothy Redaelli - 2.16.0-74 -- vhost: fix queue number check when setting inflight FD [RH git: d084ce15a7] - [ upstream commit 6442c329b9d2ded0f44b27d2016aaba8ba5844c5 ] - - In function vhost_user_set_inflight_fd, queue number in inflight - message is used to access virtqueue. However, queue number could - be larger than VHOST_MAX_VRING and cause write OOB as this number - will be used to write inflight info in virtqueue structure. This - patch checks the queue number to avoid the issue and also make - sure virtqueues are allocated before setting inflight information. - - Fixes: ad0a4ae491fe ("vhost: checkout resubmit inflight information") - - Reported-by: Wenxiang Qian - Signed-off-by: Chenbo Xia - Reviewed-by: Maxime Coquelin - - -* Thu Apr 28 2022 Timothy Redaelli - 2.16.0-73 -- vhost: fix FD leak with inflight messages [RH git: fafbd8f642] - [ upstream commit af74f7db384ed149fe42b21dbd7975f8a54ef227 ] - - Even if unlikely, a buggy vhost-user master might attach fds to inflight - messages. Add checks like for other types of vhost-user messages. - - Fixes: d87f1a1cb7b6 ("vhost: support inflight info sharing") - - Signed-off-by: David Marchand - Reviewed-by: Maxime Coquelin - - -* Wed Apr 27 2022 Open vSwitch CI - 2.16.0-72 -- Merging upstream branch-2.16 [RH git: 1c2e3ff275] - Commit list: - a51dd4685d ofproto-dpif-xlate: Clear out vlan flow fields while processing native tunnel. (#393566 - 2060552) - - -* Tue Apr 26 2022 Open vSwitch CI - 2.16.0-71 -- Merging upstream branch-2.16 [RH git: a0490a292c] - Commit list: - 271bea0ee0 ofproto-xlate: Fix crash when forwarding packet between legacy_l3 tunnels. - 9f9d59aeae system-traffic: Fix fragment reassembly with L3 L4 protocol information. - - -* Thu Apr 21 2022 Timothy Redaelli - 2.16.0-70 -- Really set RTE_ETH_MAXPORTS to 1024 [RH git: 104da44ad6] - Fixes: 81ff7c5a60f0 ("Change RTE_ETH_MAXPORTS to 1024") - - -* Mon Apr 18 2022 Open vSwitch CI - 2.16.0-69 -- Merging upstream branch-2.16 [RH git: c9969bac2f] - Commit list: - 2afa9d2285 cirrus: Update FreeBSD versions. - - -* Fri Apr 08 2022 Open vSwitch CI - 2.16.0-68 -- Merging upstream branch-2.16 [RH git: 2ee98fa0ff] - Commit list: - be8b35fddf Prepare for 2.16.4. - d8639f81c1 Set release date for 2.16.3. - 71a5a38c83 NEWS: Highlight libopenvswitch API change caused by UB fixes. - - -* Wed Apr 06 2022 Open vSwitch CI - 2.16.0-67 -- Merging upstream branch-2.16 [RH git: 4936a7194b] - Commit list: - 2c666b9791 netdev-offload-tc: Check for ct_state flag combinations that are not offloadable. - - -* Mon Apr 04 2022 Open vSwitch CI - 2.16.0-66 -- Merging upstream branch-2.16 [RH git: 1418edaf18] - Commit list: - 26189fd264 dpif-netdev: Fix dp_netdev_get_pmd() function getting correct core_id. - a5af081bc6 alb.at: Add tests for cross-numa polling. - 78c8f8a7f6 dpif-netdev: Fix PMD auto load balance with pmd-rxq-isolate. - 6731e581c4 pmd.at: Add tests for multi non-local numa pmds. - 60652bb3eb dpif-netdev: Fix non-local numa selection for more than two numas. - c113039503 ofproto-dpif-xlate: Fix NULL pointer dereference in xlate_normal(). - - -* Wed Mar 30 2022 Open vSwitch CI - 2.16.0-65 -- Merging upstream branch-2.16 [RH git: b4c45acc47] - Commit list: - 7644c924e8 sparse: bump recommended version and include headers. - 20b87feba9 rculist: use multi-variable helpers for loop macros. - 05a440fafb hindex: use multi-variable iterators. - 04dca15004 cmap: use multi-variable iterators. - 80e64f712d hmap: implement UB-safe hmap pop iterator. - 3b4b0af690 hmap: use multi-variable helpers for hmap loops. - 05e899ea8f list: use multi-variable helpers for list loops. - d2406399ae util: add helpers to overload SAFE macro. - f22f9d947a util: add safe multi-variable iterators. - 72c3e8627c util: add multi-variable loop iterator macros. - - -* Wed Mar 30 2022 Open vSwitch CI - 2.16.0-64 -- Merging upstream branch-2.16 [RH git: 32008eb008] - Commit list: - 1570924c3f ovsdb: raft: Fix inability to read the database with DNS host names. (#2055097) - - -* Mon Mar 28 2022 Open vSwitch CI - 2.16.0-63 -- Merging upstream branch-2.16 [RH git: a3c48a5aeb] - Commit list: - c50a0f080d system-traffic.at: Fix flaky DNAT load balancing test. - 9928344ea7 dpif-netdev: Keep orig_in_port as a field of the flow. - aee2e66287 tests: Fix incorrect usage of OVS_WAIT_UNTIL. - 5881545bd0 odp-util: Fix output for tc to be equal to kernel. - 4a80c322f9 netdev-offload-tc: Fix IP and port ranges in flower returns. - 49e0bb72bc netdev-offload-tc: Fix use of ICMP values instead of masks defines. - 0fb545c7d9 netdev-offload-tc: Always include conntrack information to tc. - 13a3f57976 netdev-offload-tc: Check for valid netdev ifindex in flow_put. - 6e72fd96d3 netdev-offload-tc: Set the correct VLAN_VID and VLAN_PCP masks. - e43157f303 netdev-offload-tc: Add debug logs on tc rule verify failures. - 37297e7ee6 tc: Keep header rewrite actions order. - 823be413ec dpdk: Use DPDK 20.11.4 release - - -* Fri Mar 11 2022 Open vSwitch CI - 2.16.0-62 -- Merging upstream branch-2.16 [RH git: 561b178a3d] - Commit list: - 47b5374280 system-dpdk: Fix mfex autovalidator tests. - 98a74bd487 ofp-prop: Silence the 'may be uninitialized' warning. - ab4f30e02b ovsdb-cluster.at: Avoid test failures due to different hashing. - - -* Mon Mar 07 2022 Open vSwitch CI - 2.16.0-61 -- Merging upstream branch-2.16 [RH git: 0e0cf86cf5] - Commit list: - d5d2bd3c09 ofproto: Use xlate map for uuid lookups. - d158b29fb6 ofproto: Add refcount to ofproto to fix ofproto use-after-free. - - -* Sat Mar 05 2022 Open vSwitch CI - 2.16.0-60 -- Merging upstream branch-2.16 [RH git: 67312d8bee] - Commit list: - 43882d8372 ofproto-dpif: Trigger revalidation when ipfix config set. - 218bb05fb2 system-tso: Skip encap tests when userspace TSO is enabled. - - -* Fri Mar 04 2022 Open vSwitch CI - 2.16.0-59 -- Merging upstream branch-2.16 [RH git: 832e52bea7] - Commit list: - 1515e085b9 tc: Fix stats byte count on fragmented packets. - 7a3b46d517 compat: Add gen_stats include to define tc hw stats. - - -* Tue Mar 01 2022 Timothy Redaelli - 2.16.0-58 -- Change RTE_ETH_MAXPORTS to 1024 [RH git: 81ff7c5a60] (#2059758) - Resolves: #2059758 - - -* Fri Feb 25 2022 Open vSwitch CI - 2.16.0-57 -- Merging upstream branch-2.16 [RH git: 897937f6d3] - Commit list: - 9598f0529c ovsdb: raft: Fix inability to join the cluster after interrupted attempt. (#2033514) - - -* Fri Feb 25 2022 Open vSwitch CI - 2.16.0-56 -- Merging upstream branch-2.16 [RH git: e4d6d108a3] - Commit list: - fb4767b472 dpif-netdev: Fix a race condition in deletion of offloaded flows. - 3e72eae031 dpif-netdev: Move port flush after datapath reconfiguration. - - -* Thu Feb 24 2022 Open vSwitch CI - 2.16.0-55 -- Merging upstream branch-2.16 [RH git: 970214133d] - Commit list: - 0168e7989d reconnect: Fix broken inactivity probe if there is no other reason to wake up. - - -* Thu Feb 24 2022 Open vSwitch CI - 2.16.0-54 -- Merging upstream branch-2.16 [RH git: ac5da61d03] - Commit list: - dee52795e6 datapath-windows: Fix NXM_OF_IP_TOS issue - - -* Wed Feb 16 2022 Open vSwitch CI - 2.16.0-53 -- Merging upstream branch-2.16 [RH git: b2df459e49] - Commit list: - dcde9771c5 ovsdb-idl: Fix use-after-free when destroying an IDL loop. - - -* Wed Feb 16 2022 Open vSwitch CI - 2.16.0-52 -- Merging upstream branch-2.16 [RH git: bba08b5363] - Commit list: - 8e23c06f24 dpif-netdev-dpcls: Make subtable reprobe thread-safe. - ac0e3dd3ba ci: Fix typo in variable name. - fc25e0397a dp-packet: Ensure packet base is always non-NULL. - dbae56e702 bfd: lldp: stp: Fix misaligned packet field access. - ee17b06cf9 ovsdb-idlc: Avoid accessing member within NULL idl index cursors. - 1d799a5d17 stopwatch: Fix buffer underflow when computing percentiles. - - -* Wed Feb 09 2022 Open vSwitch CI - 2.16.0-51 -- Merging upstream branch-2.16 [RH git: 7b6570c65f] - Commit list: - 0954c2911d ofproto: Fix ipfix not always sampling on egress. (#2016346) - - -* Wed Feb 09 2022 Open vSwitch CI - 2.16.0-50 -- Merging upstream branch-2.16 [RH git: c5ad7f71c5] - Commit list: - 867e586b45 tc: Fix incorrect TC rule for decap+encap datapath flow. - - -* Tue Feb 08 2022 Open vSwitch CI - 2.16.0-49 -- Merging upstream branch-2.16 [RH git: 4541c91b99] - Commit list: - 418e6a0b8e dpif-netdev: fix vlan and ipv4 parsing in avx512 - - -* Mon Feb 07 2022 Michael Santana - 2.16.0-48 -- Merging upstream branch-2.16 [RH git: 9d51785142] - Commit list: - 1ec567a752 ci: Install wheel before installing any other python packages. - 031a99cef0 odp-util: Fix tunnel key attr for GTP-U. - 558699c73c ovsdb-idl: Only process successful txn in ovsdb_idl_loop_run. - - -* Wed Feb 02 2022 Open vSwitch CI - 2.16.0-47 -- Merging upstream branch-2.16 [RH git: 6e6f66ffd0] - Commit list: - 0276bdb30a ofproto-dpif-upcall: Fix n_revalidators on upcall show. - - -* Wed Feb 02 2022 Open vSwitch CI - 2.16.0-46 -- Merging upstream branch-2.16 [RH git: 513117cbb0] - Commit list: - 16575362dc acinclude: Detect avx512 vpopcntdq compiler support. - - -* Tue Feb 01 2022 Ilya Maximets - 2.16.0-45 -- ovsdb: transaction: Keep one entry in the transaction history. [RH git: 7665f42d12] (#2044621) - commit 6e13565dd32fb2cf5517f51ca06956e2052c4bba - Author: Ilya Maximets - Date: Sun Dec 19 15:09:38 2021 +0100 - - ovsdb: transaction: Keep one entry in the transaction history. - - If a single transaction exceeds the size of the whole database (e.g., - a lot of rows got removed and new ones added), transaction history will - be drained. This leads to sending UUID_ZERO to the clients as the last - transaction id in the next monitor update, because monitor doesn't - know what was the actual last transaction id. In case of a re-connect - that will cause re-downloading of the whole database, since the - client's last_id will be out of sync. - - One solution would be to store the last transaction ID separately - from the actual transactions, but that will require a careful - management in cases where database gets reset and the history needs - to be cleared. Keeping the one last transaction instead to avoid - the problem. That should not be a big concern in terms of memory - consumption, because this last transaction will be removed from the - history once the next transaction appeared. This is also not a concern - for a fast re-sync, because this last transaction will not be used - for the monitor reply; it's either client already has it, so no need - to send, or it's a history miss. - - The test updated to not check the number of atoms if there is only - one transaction in the history. - - Fixes: 317b1bfd7dd3 ("ovsdb: Don't let transaction history grow larger than the database.") - Acked-by: Mike Pattrick - Acked-by: Han Zhou - Signed-off-by: Ilya Maximets - - Reported-at: https://bugzilla.redhat.com/2044621 - Signed-off-by: Ilya Maximets - - -* Mon Jan 31 2022 Open vSwitch CI - 2.16.0-44 -- Merging upstream branch-2.16 [RH git: d202cd6da1] - Commit list: - 34c830c540 ovsdb-idl: ovsdb_idl_loop_destroy must also destroy the committing txn. - 13009736b2 ovsdb-cs: Clear last_id on reconnect if condition changes in-flight. - 017e2ae50e ofp-flow: Skip flow reply if it exceeds the maximum message size. - e0c6f92a95 ovsdb-cs: Fix ignoring of the last id from the initial monitor reply. (#2044624) - - -* Fri Jan 28 2022 Ilya Maximets - 2.16.0-43 -- ovsdb: storage: Randomize should_snapshot checks when the minimum time passed. [RH git: abe61535ca] (#2044614) - commit 339f97044e3c2312fbb65b932fa14a181acf40d5 - Author: Ilya Maximets - Date: Mon Dec 13 16:43:33 2021 +0100 - - ovsdb: storage: Randomize should_snapshot checks when the minimum time passed. - - Snapshots are scheduled for every 10-20 minutes. It's a random value - in this interval for each server. Once the time is up, but the maximum - time (24 hours) not reached yet, ovsdb will start checking if the log - grew a lot on every iteration. Once the growth is detected, compaction - is triggered. - - OTOH, it's very common for an OVSDB cluster to not have the log growing - very fast. If the log didn't grow 2x in 20 minutes, the randomness of - the initial scheduled time is gone and all the servers are checking if - they need to create snapshot on every iteration. And since all of them - are part of the same cluster, their logs are growing with the same - speed. Once the critical mass is reached, all the servers will start - creating snapshots at the same time. If the database is big enough, - that might leave the cluster unresponsive for an extended period of - time (e.g. 10-15 seconds for OVN_Southbound database in a larger scale - OVN deployment) until the compaction completed. - - Fix that by re-scheduling a quick retry if the minimal time already - passed. Effectively, this will work as a randomized 1-2 min delay - between checks, so the servers will not synchronize. - - Scheduling function updated to not change the upper limit on quick - reschedules to avoid delaying the snapshot creation indefinitely. - Currently quick re-schedules are only used for the error cases, and - there is always a 'slow' re-schedule after the successful compaction. - So, the change of a scheduling function doesn't change the current - behavior much. - - Signed-off-by: Ilya Maximets - Acked-by: Han Zhou - Acked-by: Dumitru Ceara - - Reported-at: https://bugzilla.redhat.com/2044614 - Signed-off-by: Ilya Maximets - - -* Fri Jan 28 2022 Ilya Maximets - 2.16.0-42 -- raft: Only allow followers to snapshot. [RH git: 915efc8c00] (#2044614) - commit bf07cc9cdb2f37fede8c0363937f1eb9f4cfd730 - Author: Dumitru Ceara - Date: Mon Dec 13 20:46:03 2021 +0100 - - raft: Only allow followers to snapshot. - - Commit 3c2d6274bcee ("raft: Transfer leadership before creating - snapshots.") made it such that raft leaders transfer leadership before - snapshotting. However, there's still the case when the next leader to - be is in the process of snapshotting. To avoid delays in that case too, - we now explicitly allow snapshots only on followers. Cluster members - will have to wait until the current election is settled before - snapshotting. - - Given the following logs taken from an OVN_Southbound 3-server cluster - during a scale test: - - S1 (old leader): - 19:07:51.226Z|raft|INFO|Transferring leadership to write a snapshot. - 19:08:03.830Z|ovsdb|INFO|OVN_Southbound: Database compaction took 12601ms - 19:08:03.940Z|raft|INFO|server 8b8d is leader for term 43 - - S2 (follower): - 19:08:00.870Z|raft|INFO|server 8b8d is leader for term 43 - - S3 (new leader): - 19:07:51.242Z|raft|INFO|received leadership transfer from f5c9 in term 42 - 19:07:51.244Z|raft|INFO|term 43: starting election - 19:08:00.805Z|ovsdb|INFO|OVN_Southbound: Database compaction took 9559ms - 19:08:00.869Z|raft|INFO|term 43: elected leader by 2+ of 3 servers - - We see that the leader to be (S3) receives the leadership transfer, - initiates the election and immediately after starts a snapshot that - takes ~9.5 seconds. During this time, S2 votes for S3 electing it - as cluster leader but S3 doesn't effectively become leader until it - finishes snapshotting, essentially keeping the cluster without a - leader for up to ~9.5 seconds. - - With the current change, S3 will delay compaction and snapshotting until - the election is finished. - - The only exception is the case of single-node clusters for which we - allow the node to snapshot regardless of role. - - Acked-by: Han Zhou - Signed-off-by: Dumitru Ceara - Signed-off-by: Ilya Maximets - - Reported-at: https://bugzilla.redhat.com/2044614 - Signed-off-by: Ilya Maximets - - -* Wed Jan 26 2022 Open vSwitch CI - 2.16.0-41 -- Merging upstream branch-2.16 [RH git: f1ca7b8ac3] - Commit list: - 2571b1a464 ofproto-dpif: Fix issue with non-reversible actions on a patch ports. - - -* Fri Jan 21 2022 Open vSwitch CI - 2.16.0-40 -- Merging upstream branch-2.16 [RH git: 60b19f443c] - Commit list: - 07a115f7d9 ovs-monitor-ipsec: Fix generated strongSwan ipsec.conf for IPv6. - - -* Thu Jan 20 2022 Open vSwitch CI - 2.16.0-39 -- Merging upstream branch-2.16 [RH git: 349d687673] - Commit list: - f2ee013f73 datapath-windows: Pickup Ct tuple as CT lookup key in function OvsCtSetupLookupCtx - - -* Tue Jan 18 2022 Open vSwitch CI - 2.16.0-38 -- Merging upstream branch-2.16 [RH git: e370e283cf] - Commit list: - bd8ebcd10c Documentation: Fix Rx/Tx queue configuration section. - - -* Mon Jan 17 2022 Open vSwitch CI - 2.16.0-37 -- Merging upstream branch-2.16 [RH git: c9297f5ef7] - Commit list: - 29936a853f ofproto-dpif: Fix memory leak in dpif/show-dp-features appctl. - - -* Thu Jan 13 2022 Open vSwitch CI - 2.16.0-36 -- Merging upstream branch-2.16 [RH git: edae801e00] - Commit list: - ba7fffb832 dpif-netdev: Improve loading of packet data for undersized packets. - - -* Sat Dec 18 2021 Open vSwitch CI - 2.16.0-35 -- Merging upstream branch-2.16 [RH git: 6ad0375ff5] - Commit list: - 2595b7b3d1 Prepare for 2.16.3. - 6caaae525c Set release date for 2.16.2. - 443e3657d7 ofproto-dpif-xlate: Snoop ingress packets and update neigh cache if needed. - 75d2ef9a60 tnl-neigh-cache: Do not refresh the entry while revalidating. - 5d88836566 tnl-neigh-cache: Read/write expires atomically. - fb42c99c15 dpif-netdev: Improve handling of IP/TCP in avx512 mfex. - - -* Thu Dec 09 2021 Open vSwitch CI - 2.16.0-34 -- Merging upstream branch-2.16 [RH git: 07b9bf085a] - Commit list: - f42c484445 compat: handle NF_REPEAT error on nf_conntrack_in. - - -* Mon Dec 06 2021 Open vSwitch CI - 2.16.0-33 -- Merging upstream branch-2.16 [RH git: 8708b55152] - Commit list: - 3e527f21cf flow: Consider dataofs when parsing TCP packets. - b537e049ad tests/flowgen: Fix packet data endianness. - 35244b4980 ofproto: Fix resource usage explosion due to removal of large number of flows. - a201297639 ofproto: Fix resource usage explosion while processing bundled FLOW_MOD. - cd0133402c tests/flowgen: Fix length field of 802.2 data link header. - 2d65b8ffd2 ovs-lib: Backup and remove existing DB when joining cluster. - ab01177637 docs/dpdk: Fix install doc. - 38a2129524 ovs-save: Save igmp flows in ofp_parse syntax. - dc77857ce2 faq: Update OVS/DPDK version table for OVS 2.13/2.14. - - -* Thu Nov 18 2021 Open vSwitch CI - 2.16.0-32 -- Merging upstream branch-2.16 [RH git: e90e06a818] - Commit list: - 1d8e0f861f ofproto-dpif-xlate: Fix check_pkt_larger incomplete translation. - - -* Mon Nov 15 2021 Open vSwitch CI - 2.16.0-31 -- Merging upstream branch-2.16 [RH git: 77a249d38b] - Commit list: - f8f2f7c9cb datapath-windows: Reset flow key after Ipv4 fragments are reassembled - - -* Wed Nov 10 2021 Timothy Redaelli - 2.16.0-30 -- python: Replace pyOpenSSL with ssl. [RH git: 0cd5867531] (#1988429) - Currently, pyOpenSSL is half-deprecated upstream and so it's removed on - some distributions (for example on CentOS Stream 9, - https://issues.redhat.com/browse/CS-336), but since OVS only - supports Python 3 it's possible to replace pyOpenSSL with "import ssl" - included in base Python 3. - - Stream recv and send had to be splitted as _recv and _send, since SSLError - is a subclass of socket.error and so it was not possible to except for - SSLWantReadError and SSLWantWriteError in recv and send of SSLStream. - - TCPstream._open cannot be used in SSLStream, since Python ssl module - requires the SSL socket to be created before connecting it, so - SSLStream._open needs to create the socket, create SSL socket and then - connect the SSL socket. - - Reported-by: Timothy Redaelli - Reported-at: https://bugzilla.redhat.com/1988429 - Signed-off-by: Timothy Redaelli - Acked-by: Terry Wilson - Tested-by: Terry Wilson - Signed-off-by: Ilya Maximets - Signed-off-by: Timothy Redaelli - - -* Wed Nov 10 2021 Timothy Redaelli - 2.16.0-29 -- python: socket-util: Split inet_open_active function and use connect_ex. [RH git: 2e704b371c] - In an upcoming patch, PyOpenSSL will be replaced with Python ssl module, - but in order to do an async connection with Python ssl module the ssl - socket must be created when the socket is created, but before the - socket is connected. - - So, inet_open_active function is splitted in 3 parts: - - inet_create_socket_active: creates the socket and returns the family and - the socket, or (error, None) if some error needs to be returned. - - inet_connect_active: connect the socket and returns the errno (it - returns 0 if errno is EINPROGRESS or EWOULDBLOCK). - - connect is replaced by connect_ex, since Python suggest to use it for - asynchronous connects and it's also cleaner since inet_connect_active - returns errno that connect_ex already returns, moreover due to a Python - limitation connect cannot not be used with ssl module. - - inet_open_active function is changed in order to use the new functions - inet_create_socket_active and inet_connect_active. - - Signed-off-by: Timothy Redaelli - Acked-by: Terry Wilson - Tested-by: Terry Wilson - Signed-off-by: Ilya Maximets - Signed-off-by: Timothy Redaelli - - -* Wed Nov 10 2021 Timothy Redaelli - 2.16.0-28 -- redhat: remove mlx4 support [RH git: 4c846afd24] (#1998122) - Resolves: #1998122 - - -* Tue Nov 09 2021 Ilya Maximets - 2.16.0-27 -- ovsdb: Don't let transaction history grow larger than the database. [RH git: 93d1fa0bdf] (#2012949) - commit 317b1bfd7dd315e241c158e6d4095002ff391ee3 - Author: Ilya Maximets - Date: Tue Sep 28 13:17:21 2021 +0200 - - ovsdb: Don't let transaction history grow larger than the database. - - If user frequently changes a lot of rows in a database, transaction - history could grow way larger than the database itself. This wastes - a lot of memory and also makes monitor_cond_since slower than - usual monotor_cond if the transaction id is old enough, because - re-construction of the changes from a history is slower than just - creation of initial database snapshot. This is also the case if - user deleted a lot of data, so transaction history still holds all of - it while the database itself doesn't. - - In case of current lb-per-service model in ovn-kubernetes, each - load-balancer is added to every logical switch/router. Such a - transaction touches more than a half of a OVN_Northbound database. - And each of these transactions is added to the transaction history. - Since transaction history depth is 100, in worst case scenario, - it will hold 100 copies of a database increasing memory consumption - dramatically. In tests with 3000 LBs and 120 LSs, memory goes up - to 3 GB, while holding at 30 MB if transaction history disabled in - the code. - - Fixing that by keeping count of the number of ovsdb_atom's in the - database and not allowing the total number of atoms in transaction - history to grow larger than this value. Counting atoms is fairly - cheap because we don't need to iterate over them, so it doesn't have - significant performance impact. It would be ideal to measure the - size of individual atoms, but that will hit the performance. - Counting cells instead of atoms is not sufficient, because OVN - users are adding hundreds or thousands of atoms to a single cell, - so they are largely different in size. - - Signed-off-by: Ilya Maximets - Acked-by: Han Zhou - Acked-by: Dumitru Ceara - - Reported-at: https://bugzilla.redhat.com/2012949 - Signed-off-by: Ilya Maximets - - -* Tue Nov 09 2021 Ilya Maximets - 2.16.0-26 -- ovsdb: transaction: Incremental reassessment of weak refs. [RH git: e8a363db49] (#2005958) - commit 4dbff9f0a68579241ac1a040726be3906afb8fe9 - Author: Ilya Maximets - Date: Sat Oct 16 03:20:23 2021 +0200 - - ovsdb: transaction: Incremental reassessment of weak refs. - - The main idea is to not store list of weak references in the source - row, so they all don't need to be re-checked/updated on every - modification of that source row. The point is that source row already - knows UUIDs of all destination rows stored in the data, so there is no - much profit in storing this information somewhere else. If needed, - destination row can be looked up and reference can be looked up in the - destination row. For the fast lookup, destination row now stores - references in a hash map. - - Weak reference structure now contains the table and uuid of a source - row instead of a direct pointer. This allows to replace/update the - source row without breaking any weak references stored in destination - rows. - - Structure also now contains the key-value pair of atoms that triggered - creation of this reference. These atoms can be used to quickly - subtract removed references from a source row. During reassessment, - ovsdb now only needs to care about new added or removed atoms, and - atoms that got removed due to removal of the destination rows, but - these are marked for reassessment by the destination row. - - ovsdb_datum_subtract() is used to remove atoms that points to removed - or incorrect rows, so there is no need to re-sort datum in the end. - - Results of an OVN load-balancer benchmark that adds 3K load-balancers - to each of 120 logical switches and 120 logical routers in the OVN - sandbox with clustered Northbound database and then removes them: - - Before: - - %CPU CPU Time CMD - 86.8 00:16:05 ovsdb-server nb1.db - 44.1 00:08:11 ovsdb-server nb2.db - 43.2 00:08:00 ovsdb-server nb3.db - - After: - - %CPU CPU Time CMD - 54.9 00:02:58 ovsdb-server nb1.db - 33.3 00:01:48 ovsdb-server nb2.db - 32.2 00:01:44 ovsdb-server nb3.db - - So, on a cluster leader the processing time dropped by 5.4x, on - followers - by 4.5x. More load-balancers - larger the performance - difference. There is a slight increase of memory usage, because new - reference structure is larger, but the difference is not significant. - - Signed-off-by: Ilya Maximets - Acked-by: Dumitru Ceara - - Reported-at: https://bugzilla.redhat.com/2005958 - Signed-off-by: Ilya Maximets - - -* Thu Oct 28 2021 Open vSwitch CI - 2.16.0-25 -- Merging upstream branch-2.16 [RH git: f5366890c5] - Commit list: - c221c8e613 datapath-windows:Reset PseudoChecksum value only for TX direction offload case - - -* Wed Oct 27 2021 Open vSwitch CI - 2.16.0-24 -- Merging upstream branch-2.16 [RH git: 4682b76694] - Commit list: - b79f0369f2 ci: Make linux-prepare trust system installs. - - -* Mon Oct 25 2021 Open vSwitch CI - 2.16.0-23 -- Merging upstream branch-2.16 [RH git: cce913794e] - Commit list: - 2a4c87f300 Prepare for 2.16.2. - aaa1439b8e Set release date for 2.16.1. - - -* Thu Oct 21 2021 Open vSwitch CI - 2.16.0-22 -- Merging upstream branch-2.16 [RH git: 29f01c4fdb] - Commit list: - 108176ab5a github: Stick to python 3.9. - - -* Tue Oct 19 2021 Open vSwitch CI - 2.16.0-21 -- Merging upstream branch-2.16 [RH git: 2546fa9646] - Commit list: - 5c5e34603b datapath-windows: add layers when adding the deferred actions - - -* Thu Oct 14 2021 Open vSwitch CI - 2.16.0-20 -- Merging upstream branch-2.16 [RH git: d572c95f69] - Commit list: - 458a4f75f3 ofproto-dpif-xlate: Fix zone set from non-frozen-metadata fields. - - -* Wed Oct 13 2021 Open vSwitch CI - 2.16.0-19 -- Merging upstream branch-2.16 [RH git: 557ca689f7] - Commit list: - 6d8190584a dpif-netdev: Fix use-after-free on PACKET_OUT of IP fragments. - 44a66cc1d0 tunnel-push-pop.at: Mask source port in tunnel header. - - -* Tue Oct 12 2021 Open vSwitch CI - 2.16.0-18 -- Merging upstream branch-2.16 [RH git: a6c4770398] - Commit list: - 27a5848a33 ovs-ctl: Add missing description for --ovs-vswitchd-options and --ovsdb-server-options to usage(). - 0300d0c0c2 dpdk-stub: Change the ERR log to DBG. - cdd6dd821d dpif-netlink: Fix feature negotiation for older kernels. - c2682c42cb dpif-netdev: Fix pmd thread comments to include SMC. - 9377f4a465 python: idl: Avoid sending transactions when the DB is not synced up. - - -* Tue Oct 12 2021 Open vSwitch CI - 2.16.0-17 -- Merging upstream branch-2.16 [RH git: c1145b5236] - Commit list: - 0fd17fbb09 ipf: release unhandled packets from the batch - - -* Thu Sep 30 2021 Open vSwitch CI - 2.16.0-16 -- Merging upstream branch-2.16 [RH git: 5c05133179] - Commit list: - 3f692fba98 datapath-windows:adjust Offset when processing packet in POP_VLAN action - - -* Wed Sep 29 2021 Dumitru Ceara - 2.16.0-15 -- ovsdb-data: Deduplicate string atoms. [RH git: 24e7d1140e] (#2006839) - commit 429b114c5aadee24ccfb16ad7d824f45cdcea75a - Author: Ilya Maximets - Date: Wed Sep 22 09:28:50 2021 +0200 - - ovsdb-server spends a lot of time cloning atoms for various reasons, - e.g. to create a diff of two rows or to clone a row to the transaction. - All atoms, except for strings, contains a simple value that could be - copied in efficient way, but duplicating strings every time has a - significant performance impact. - - Introducing a new reference-counted structure 'ovsdb_atom_string' - that allows to not copy strings every time, but just increase a - reference counter. - - This change allows to increase transaction throughput in benchmarks - up to 2x for standalone databases and 3x for clustered databases, i.e. - number of transactions that ovsdb-server can handle per second. - It also noticeably reduces memory consumption of ovsdb-server. - - Next step will be to consolidate this structure with json strings, - so we will not need to duplicate strings while converting database - objects to json and back. - - Signed-off-by: Ilya Maximets - Acked-by: Dumitru Ceara - Acked-by: Mark D. Gray - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2006839 - Signed-off-by: Dumitru Ceara - - -* Wed Sep 29 2021 Dumitru Ceara - 2.16.0-14 -- ovsdb-data: Add function to apply diff in-place. [RH git: df0e4bda98] (#2006851) - commit 32b51326ef9c307b4acd0bacafb0218dd1372f3d - Author: Ilya Maximets - Date: Thu Sep 23 01:47:24 2021 +0200 - - ovsdb_datum_apply_diff() is heavily used in ovsdb transactions, but - it's linear in terms of number of comparisons. And it also clones - all the atoms along the way. In most cases size of a diff is much - smaller than the size of the original datum, this allows to perform - the same operation in-place with only O(diff->n * log2(old->n)) - comparisons and O(old->n + diff->n) memory copies with memcpy. - Using this function while applying diffs read from the storage gives - a significant performance boost and allows to execute much more - transactions per second. - - Signed-off-by: Ilya Maximets - Acked-by: Mark D. Gray - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2006851 - Signed-off-by: Dumitru Ceara - - -* Wed Sep 29 2021 Dumitru Ceara - 2.16.0-13 -- ovsdb-data: Optimize subtraction of sets. [RH git: 5bace82405] (#2005483) - commit bb12b63176389e516ddfefce20dfa165f24430fb - Author: Ilya Maximets - Date: Thu Sep 23 01:47:23 2021 +0200 - - Current algorithm for ovsdb_datum_subtract looks like this: - - for-each atom in a: - if atom in b: - swap(atom, ) - destroy(atom) - quicksort(a) - - Complexity: - - Na * log2(Nb) + (Na - Nb) * log2(Na - Nb) - Search Comparisons for quicksort - - It's not optimal, especially because Nb << Na in a vast majority of - cases. - - Reversing the search phase to look up atoms from 'b' in 'a', and - closing gaps from deleted elements in 'a' by plain memory copy to - avoid quicksort. - - Resulted complexity: - - Nb * log2(Na) + (Na - Nb) - Search Memory copies - - Subtraction is heavily used while executing database transactions. - For example, to remove one port from a logical switch in OVN. - Complexity of such operation if original logical switch had 100 ports - goes down from - - 100 * log2(1) = 100 comparisons for search and - 99 * log2(99) = 656 comparisons for quicksort - ------------------------------ - 756 comparisons in total - to only - - 1 * log2(100) = 7 comparisons for search - + memory copy of 99 * sizeof (union ovsdb_atom) bytes. - - We could use memmove to close the gaps after removing atoms, but - it will lead to 2 memory copies inside the call, while we can perform - only one to the temporary 'result' and swap pointers. - - Performance in cases, where sizes of 'a' and 'b' are comparable, - should not change. Cases with Nb >> Na should not happen in practice. - - All in all, this change allows ovsdb-server to perform several times - more transactions, that removes elements from sets, per second. - - Signed-off-by: Ilya Maximets - Acked-by: Han Zhou - Acked-by: Mark D. Gray - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2005483 - Signed-off-by: Dumitru Ceara - - -* Wed Sep 29 2021 Dumitru Ceara - 2.16.0-12 -- ovsdb-data: Optimize union of sets. [RH git: e2a4c7d794] (#2005483) - commit 51946d22274cd591dc061358fb507056fbd91420 - Author: Ilya Maximets - Date: Thu Sep 23 01:47:22 2021 +0200 - - Current algorithm of ovsdb_datum_union looks like this: - - for-each atom in b: - if not bin_search(a, atom): - push(a, clone(atom)) - quicksort(a) - - So, the complexity looks like this: - - Nb * log2(Na) + Nb + (Na + Nb) * log2(Na + Nb) - Comparisons clones Comparisons for quicksort - for search - - ovsdb_datum_union() is heavily used in database transactions while - new element is added to a set. For example, if new logical switch - port is added to a logical switch in OVN. This is a very common - use case where CMS adds one new port to an existing switch that - already has, let's say, 100 ports. For this case ovsdb-server will - have to perform: - - 1 * log2(100) + 1 clone + 101 * log2(101) - Comparisons Comparisons for - for search quicksort. - ~7 1 ~707 - Roughly 714 comparisons of atoms and 1 clone. - - Since binary search can give us position, where new atom should go - (it's the 'low' index after the search completion) for free, the - logic can be re-worked like this: - - copied = 0 - for-each atom in b: - desired_position = bin_search(a, atom) - push(result, a[ copied : desired_position - 1 ]) - copied = desired_position - push(result, clone(atom)) - push(result, a[ copied : Na ]) - swap(a, result) - - Complexity of this schema: - - Nb * log2(Na) + Nb + Na - Comparisons clones memory copy on push - for search - - 'swap' is just a swap of a few pointers. 'push' is not a 'clone', - but a simple memory copy of 'union ovsdb_atom'. - - In general, this schema substitutes complexity of a quicksort - with complexity of a memory copy of Na atom structures, where we're - not even copying strings that these atoms are pointing to. - - Complexity in the example above goes down from 714 comparisons - to 7 comparisons and memcpy of 100 * sizeof (union ovsdb_atom) bytes. - - General complexity of a memory copy should always be lower than - complexity of a quicksort, especially because these copies usually - performed in bulk, so this new schema should work faster for any input. - - All in all, this change allows to execute several times more - transactions per second for transactions that adds new entries to sets. - - Alternatively, union can be implemented as a linear merge of two - sorted arrays, but this will result in O(Na) comparisons, which - is more than Nb * log2(Na) in common case, since Na is usually - far bigger than Nb. Linear merge will also mean per-atom memory - copies instead of copying in bulk. - - 'replace' functionality of ovsdb_datum_union() had no users, so it - just removed. But it can easily be added back if needed in the future. - - Signed-off-by: Ilya Maximets - Acked-by: Han Zhou - Acked-by: Mark D. Gray - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2005483 - Signed-off-by: Dumitru Ceara - - -* Wed Sep 29 2021 Dumitru Ceara - 2.16.0-11 -- ovsdb: transaction: Use diffs for strong reference counting. [RH git: 85da133eaa] (#2003203) - commit b2712d026eae2d9a5150c2805310eaf506e1f162 - Author: Ilya Maximets - Date: Tue Sep 14 00:19:57 2021 +0200 - - Currently, even if one reference added to the set of strong references - or removed from it, ovsdb-server will walk through the whole set and - re-count references to other rows. These referenced rows will also be - added to the transaction in order to re-count their references. - - For example, every time Logical Switch Port added to a Logical Switch, - OVN Northbound database server will walk through all ports of this - Logical Switch, clone their rows, and re-count references. This is - not very efficient. Instead, it can only increase reference counters - for added references and reduce for removed ones. In many cases this - will be only one row affected in the Logical_Switch_Port table. - - Introducing new function that generates a diff of two datum objects, - but stores added and removed atoms separately, so they can be used - to increase or decrease row reference counters accordingly. - - This change allows to perform several times more transactions that - adds or removes strong references to/from sets per second, because - ovsdb-server no longer clones and re-counts rows that are irrelevant - to current transaction. - - Acked-by: Dumitru Ceara - Signed-off-by: Ilya Maximets - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2003203 - Signed-off-by: Dumitru Ceara - - -* Mon Sep 27 2021 Open vSwitch CI - 2.16.0-10 -- Merging upstream branch-2.16 [RH git: 2114714012] - Commit list: - 547371ecdb cirrus: Reduce memory requirements for FreeBSD VMs. - - -* Thu Sep 23 2021 Timothy Redaelli - 2.16.0-9 -- redhat: use hugetlbfs group for /var/log/openvswitch when dpdk is enabled [RH git: 4e5928b671] (#2004543) - Resolves: #2004543 - - -* Thu Sep 16 2021 Open vSwitch CI - 2.16.0-8 -- Merging upstream branch-2.16 [RH git: 7332b410fc] - Commit list: - facaf5bc71 netdev-linux: Fix a null pointer dereference in netdev_linux_notify_sock(). - 6e203d4873 pcap-file: Fix memory leak in ovs_pcap_open(). - f50da0b267 odp-util: Fix a null pointer dereference in odp_flow_format(). - 7da752e43f odp-util: Fix a null pointer dereference in odp_nsh_key_from_attr__(). - bc22b01459 netdev-dpdk: Fix RSS configuration for virtio. - 81706c5d43 ipf: Fix only nat the first fragment in the reass process. - - -* Wed Sep 08 2021 Open vSwitch CI - 2.16.0-7 -- Merging upstream branch-2.16 [RH git: e71f31dfd6] - Commit list: - 242c280f0e dpif-netdev: Fix crash when PACKET_OUT is metered. - - -* Tue Aug 31 2021 Ilya Maximets - 2.16.0-6 -- ovsdb: monitor: Store serialized json in a json cache. [RH git: bc20330c85] (#1996152) - commit 43e66fc27659af2a5c976bdd27fe747b442b5554 - Author: Ilya Maximets - Date: Tue Aug 24 21:00:39 2021 +0200 - - Same json from a json cache is typically sent to all the clients, - e.g., in case of OVN deployment with ovn-monitor-all=true. - - There could be hundreds or thousands connected clients and ovsdb - will serialize the same json object for each of them before sending. - - Serializing it once before storing into json cache to speed up - processing. - - This change allows to save a lot of CPU cycles and a bit of memory - since we need to store in memory only a string and not the full json - object. - - Testing with ovn-heater on 120 nodes using density-heavy scenario - shows reduction of the total CPU time used by Southbound DB processes - from 256 minutes to 147. Duration of unreasonably long poll intervals - also reduced dramatically from 7 to 2 seconds: - - Count Min Max Median Mean 95 percentile - ------------------------------------------------------------- - Before 1934 1012 7480 4302.5 4875.3 7034.3 - After 1909 1004 2730 1453.0 1532.5 2053.6 - - Acked-by: Dumitru Ceara - Acked-by: Han Zhou - Signed-off-by: Ilya Maximets - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1996152 - Signed-off-by: Ilya Maximets - - -* Tue Aug 31 2021 Ilya Maximets - 2.16.0-5 -- raft: Don't keep full json objects in memory if no longer needed. [RH git: 4606423e8b] (#1990058) - commit 0de882954032aa37dc943bafd72c33324aa0c95a - Author: Ilya Maximets - Date: Tue Aug 24 21:00:38 2021 +0200 - - raft: Don't keep full json objects in memory if no longer needed. - - Raft log entries (and raft database snapshot) contains json objects - of the data. Follower receives append requests with data that gets - parsed and added to the raft log. Leader receives execution requests, - parses data out of them and adds to the log. In both cases, later - ovsdb-server reads the log with ovsdb_storage_read(), constructs - transaction and updates the database. On followers these json objects - in common case are never used again. Leader may use them to send - append requests or snapshot installation requests to followers. - However, all these operations (except for ovsdb_storage_read()) are - just serializing the json in order to send it over the network. - - Json objects are significantly larger than their serialized string - representation. For example, the snapshot of the database from one of - the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as - a json object from the total 3.8 GB consumed by ovsdb-server process. - - ovsdb_storage_read() for a given raft entry happens only once in a - lifetime, so after this call, we can serialize the json object, store - the string representation and free the actual json object that ovsdb - will never need again. This can save a lot of memory and can also - save serialization time, because each raft entry for append requests - and snapshot installation requests serialized only once instead of - doing that every time such request needs to be sent. - - JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate - pre-serialized data into raft_header and similar json objects. - - One major special case is creation of a database snapshot. - Snapshot installation request received over the network will be parsed - and read by ovsdb-server just like any other raft log entry. However, - snapshots created locally with raft_store_snapshot() will never be - read back, because they reflect the current state of the database, - hence already applied. For this case we can free the json object - right after writing snapshot on disk. - - Tests performed with ovn-heater on 60 node density-light scenario, - where on-disk database goes up to 97 MB, shows average memory - consumption of ovsdb-server Southbound DB processes decreased by 58% - (from 602 MB to 256 MB per process) and peak memory consumption - decreased by 40% (from 1288 MB to 771 MB). - - Test with 120 nodes on density-heavy scenario with 270 MB on-disk - database shows 1.5 GB memory consumption decrease as expected. - Also, total CPU time consumed by the Southbound DB process reduced - from 296 to 256 minutes. Number of unreasonably long poll intervals - reduced from 2896 down to 1934. - - Deserialization is also implemented just in case. I didn't see this - function being invoked in practice. - - Acked-by: Dumitru Ceara - Acked-by: Han Zhou - Signed-off-by: Ilya Maximets - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1990058 - Signed-off-by: Ilya Maximets - - -* Tue Aug 31 2021 Ilya Maximets - 2.16.0-4 -- json: Add support for partially serialized json objects. [RH git: 885e5ce1b5] (#1990058) - commit b0bca6f27aae845c3ca8b48d66a7dbd3d978162a - Author: Ilya Maximets - Date: Tue Aug 24 21:00:37 2021 +0200 - - json: Add support for partially serialized json objects. - - Introducing a new json type JSON_SERIALIZED_OBJECT. It's not an - actual type that can be seen in a json message on a wire, but - internal type that is intended to hold a serialized version of - some other json object. For this reason it's defined after the - JSON_N_TYPES to not confuse parsers and other parts of the code - that relies on compliance with RFC 4627. - - With this JSON type internal users may construct large JSON objects, - parts of which are already serialized. This way, while serializing - the larger object, data from JSON_SERIALIZED_OBJECT can be added - directly to the result, without additional processing. - - This will be used by next commits to add pre-serialized JSON data - to the raft_header structure, that can be converted to a JSON - before writing the file transaction on disk or sending to other - servers. Same technique can also be used to pre-serialize json_cache - for ovsdb monitors, this should allow to not perform serialization - for every client and will save some more memory. - - Since serialized JSON is just a string, reusing the 'json->string' - pointer for it. - - Acked-by: Dumitru Ceara - Acked-by: Han Zhou - Signed-off-by: Ilya Maximets - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1990058 - Signed-off-by: Ilya Maximets - - -* Tue Aug 31 2021 Ilya Maximets - 2.16.0-3 -- json: Optimize string serialization. [RH git: bb1654da63] (#1990069) - commit 748010ff304b7cd2c43f4eb98a554433f0df07f9 - Author: Ilya Maximets - Date: Tue Aug 24 23:07:22 2021 +0200 - - json: Optimize string serialization. - - Current string serialization code puts all characters one by one. - This is slow because dynamic string needs to perform length checks - on every ds_put_char() and it's also doesn't allow compiler to use - better memory copy operations, i.e. doesn't allow copying few bytes - at once. - - Special symbols are rare in a typical database. Quotes are frequent, - but not too frequent. In databases created by ovn-kubernetes, for - example, usually there are at least 10 to 50 chars between quotes. - So, it's better to count characters that doesn't require escaping - and use fast data copy for the whole sequential block. - - Testing with a synthetic benchmark (included) on my laptop shows - following performance improvement: - - Size Q S Before After Diff - ----------------------------------------------------- - 100000 0 0 : 0.227 ms 0.142 ms -37.4 % - 100000 2 1 : 0.277 ms 0.186 ms -32.8 % - 100000 10 1 : 0.361 ms 0.309 ms -14.4 % - 10000000 0 0 : 22.720 ms 12.160 ms -46.4 % - 10000000 2 1 : 27.470 ms 19.300 ms -29.7 % - 10000000 10 1 : 37.950 ms 31.250 ms -17.6 % - 100000000 0 0 : 239.600 ms 126.700 ms -47.1 % - 100000000 2 1 : 292.400 ms 188.600 ms -35.4 % - 100000000 10 1 : 387.700 ms 321.200 ms -17.1 % - - Here Q - probability (%) for a character to be a '\"' and - S - probability (%) to be a special character ( < 32). - - Testing with a closer to real world scenario shows overall decrease - of the time needed for database compaction by ~5-10 %. And this - change also decreases CPU consumption in general, because string - serialization is used in many different places including ovsdb - monitors and raft. - - Signed-off-by: Ilya Maximets - Acked-by: Numan Siddique - Acked-by: Dumitru Ceara - - Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1990069 - Signed-off-by: Ilya Maximets - - -* Fri Aug 20 2021 Open vSwitch CI - 2.16.0-2 -- Merging upstream branch-2.16 [RH git: 7d7567e339] - Commit list: - 0991ea8d19 Prepare for 2.16.1. - - -* Wed Aug 18 2021 Flavio Leitner - 2.16.0-1 -- redhat: First 2.16.0 release. [RH git: 0a1c4276cc] - - diff --git a/SPECS/openvswitch2.17.spec b/SPECS/openvswitch2.17.spec new file mode 100644 index 0000000..edfb655 --- /dev/null +++ b/SPECS/openvswitch2.17.spec @@ -0,0 +1,908 @@ +# Copyright (C) 2009, 2010, 2013, 2014 Nicira Networks, Inc. +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. This file is offered as-is, +# without warranty of any kind. +# +# If tests have to be skipped while building, specify the '--without check' +# option. For example: +# rpmbuild -bb --without check rhel/openvswitch-fedora.spec + +# This defines the base package name's version. + +%define pkgname openvswitch2.17 + + +%if 0%{?commit:1} +%global shortcommit %(c=%{commit}; echo ${c:0:7}) +%endif + +# Enable PIE, bz#955181 +%global _hardened_build 1 + +# RHEL-7 doesn't define _rundir macro yet +# Fedora 15 onwards uses /run as _rundir +%if 0%{!?_rundir:1} +%define _rundir /run +%endif + +# FIXME Test "STP - flush the fdb and mdb when topology changed" fails on s390x +# FIXME 2 tests fails on ppc64le. They will be hopefully fixed before official 2.11 +%ifarch %{ix86} x86_64 aarch64 +%bcond_without check +%else +%bcond_with check +%endif +# option to run kernel datapath tests, requires building as root! +%bcond_with check_datapath_kernel +# option to build with libcap-ng, needed for running OVS as regular user +%bcond_without libcapng +# option to build with ipsec support +%bcond_without ipsec + +# Build python2 (that provides python) and python3 subpackages on Fedora +# Build only python3 (that provides python) subpackage on RHEL8 +# Build only python subpackage on RHEL7 +%if 0%{?rhel} > 7 || 0%{?fedora} +# On RHEL8 Sphinx is included in buildroot +%global external_sphinx 1 +%else +# Don't use external sphinx (RHV doesn't have optional repositories enabled) +%global external_sphinx 0 +%endif + +Name: %{pkgname} +Summary: Open vSwitch +Group: System Environment/Daemons daemon/database/utilities +URL: http://www.openvswitch.org/ +Version: 2.17.0 +Release: 15%{?dist} + +# Nearly all of openvswitch is ASL 2.0. The bugtool is LGPLv2+, and the +# lib/sflow*.[ch] files are SISSL +# datapath/ is GPLv2 (although not built into any of the binary packages) +License: ASL 2.0 and LGPLv2+ and SISSL + +%define dpdkver 21.11 +%define dpdkdir dpdk +%define dpdksver %(echo %{dpdkver} | cut -d. -f-2) +# NOTE: DPDK does not currently build for s390x +# DPDK on aarch64 is not stable enough to be enabled in FDP +%if 0%{?rhel} > 7 || 0%{?fedora} +%define dpdkarches x86_64 ppc64le +%else +%define dpdkarches +%endif + +%if 0%{?commit:1} +Source: https://github.com/openvswitch/ovs/archive/%{commit}.tar.gz#/openvswitch-%{commit}.tar.gz +%else +Source: https://github.com/openvswitch/ovs/archive/v%{version}.tar.gz#/openvswitch-%{version}.tar.gz +%endif +Source10: https://fast.dpdk.org/rel/dpdk-%{dpdkver}.tar.xz + +%define docutilsver 0.12 +%define pygmentsver 1.4 +%define sphinxver 1.2.3 +%define pyelftoolsver 0.27 +Source100: https://pypi.io/packages/source/d/docutils/docutils-%{docutilsver}.tar.gz +Source101: https://pypi.io/packages/source/P/Pygments/Pygments-%{pygmentsver}.tar.gz +Source102: https://pypi.io/packages/source/S/Sphinx/Sphinx-%{sphinxver}.tar.gz +Source103: https://pypi.io/packages/source/p/pyelftools/pyelftools-%{pyelftoolsver}.tar.gz + +Patch: openvswitch-%{version}.patch + +# The DPDK is designed to optimize througput of network traffic using, among +# other techniques, carefully crafted assembly instructions. As such it +# needs extensive work to port it to other architectures. +ExclusiveArch: x86_64 aarch64 ppc64le s390x + +# Do not enable this otherwise YUM will break on any upgrade. +# Provides: openvswitch +Conflicts: openvswitch < 2.17 +Conflicts: openvswitch-dpdk < 2.17 +Conflicts: openvswitch2.10 +Conflicts: openvswitch2.11 +Conflicts: openvswitch2.12 +Conflicts: openvswitch2.13 +Conflicts: openvswitch2.14 +Conflicts: openvswitch2.15 +Conflicts: openvswitch2.16 + +# FIXME Sphinx is used to generate some manpages, unfortunately, on RHEL, it's +# in the -optional repository and so we can't require it directly since RHV +# doesn't have the -optional repository enabled and so TPS fails +%if %{external_sphinx} +BuildRequires: python3-sphinx +%else +# Sphinx dependencies +BuildRequires: python-devel +BuildRequires: python-setuptools +#BuildRequires: python2-docutils +BuildRequires: python-jinja2 +BuildRequires: python-nose +#BuildRequires: python2-pygments +# docutils dependencies +BuildRequires: python-imaging +# pygments dependencies +BuildRequires: python-nose +%endif + +BuildRequires: gcc gcc-c++ make +BuildRequires: autoconf automake libtool +BuildRequires: systemd-units openssl openssl-devel +BuildRequires: python3-devel python3-setuptools +BuildRequires: desktop-file-utils +BuildRequires: groff-base graphviz +BuildRequires: unbound-devel +BuildRequires: systemtap-sdt-devel +# make check dependencies +BuildRequires: procps-ng +%if %{with check_datapath_kernel} +BuildRequires: nmap-ncat +# would be useful but not available in RHEL or EPEL +#BuildRequires: pyftpdlib +%endif + +%if %{with libcapng} +BuildRequires: libcap-ng libcap-ng-devel +%endif + +%ifarch %{dpdkarches} +BuildRequires: meson +%if 0%{?rhel} > 8 || 0%{?fedora} +BuildRequires: python3-pyelftools +%endif +# DPDK driver dependencies +BuildRequires: zlib-devel numactl-devel +%ifarch x86_64 +BuildRequires: rdma-core-devel >= 15 libmnl-devel +%endif + +# Required by packaging policy for the bundled DPDK +Provides: bundled(dpdk) = %{dpdkver} +%endif + +Requires: openssl iproute module-init-tools +#Upstream kernel commit 4f647e0a3c37b8d5086214128614a136064110c3 +#Requires: kernel >= 3.15.0-0 +Requires: openvswitch-selinux-extra-policy + +Requires(pre): shadow-utils +Requires(post): /bin/sed +Requires(post): /usr/sbin/usermod +Requires(post): /usr/sbin/groupadd +Requires(post): systemd-units +Requires(preun): systemd-units +Requires(postun): systemd-units +Obsoletes: openvswitch-controller <= 0:2.1.0-1 + +%if 0%{?rhel} +# sortedcontainers are not packaged on RHEL yet, but ovs includes it +%global __requires_exclude ^python%{python3_version}dist\\(sortedcontainers\\)$ +%endif + +%description +Open vSwitch provides standard network bridging functions and +support for the OpenFlow protocol for remote per-flow control of +traffic. + +%package -n python3-%{pkgname} +Summary: Open vSwitch python3 bindings +License: ASL 2.0 +Requires: %{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +Provides: python-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} + +%description -n python3-%{pkgname} +Python bindings for the Open vSwitch database + +%package test +Summary: Open vSwitch testing utilities +License: ASL 2.0 +BuildArch: noarch +Requires: python3-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +Requires: tcpdump + +%description test +Utilities that are useful to diagnose performance and connectivity +issues in Open vSwitch setup. + +%package devel +Summary: Open vSwitch OpenFlow development package (library, headers) +License: ASL 2.0 +Requires: %{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} + +%description devel +This provides shared library, libopenswitch.so and the openvswitch header +files needed to build an external application. + +%if 0%{?rhel} == 8 || 0%{?fedora} > 28 +%package -n network-scripts-%{name} +Summary: Open vSwitch legacy network service support +License: ASL 2.0 +Requires: network-scripts +Supplements: (%{name} and network-scripts) + +%description -n network-scripts-%{name} +This provides the ifup and ifdown scripts for use with the legacy network +service. +%endif + +%if %{with ipsec} +%package ipsec +Summary: Open vSwitch IPsec tunneling support +License: ASL 2.0 +Requires: python3-%{pkgname} = %{?epoch:%{epoch}:}%{version}-%{release} +Requires: libreswan + +%description ipsec +This package provides IPsec tunneling support for OVS tunnels. +%endif + +%prep +%if 0%{?commit:1} +%setup -q -n ovs-%{commit} -a 10 +%else +%setup -q -n ovs-%{version} -a 10 +%endif +%if ! %{external_sphinx} +%if 0%{?commit:1} +%setup -n ovs-%{commit} -q -D -T -a 100 -a 101 -a 102 +%else +%setup -n ovs-%{version} -q -D -T -a 100 -a 101 -a 102 +%endif +%endif +%if 0%{?rhel} && 0%{?rhel} < 9 +%if 0%{?commit:1} +%setup -n ovs-%{commit} -q -D -T -a 103 +%else +%setup -n ovs-%{version} -q -D -T -a 103 +%endif +%endif + +mv dpdk-*/ %{dpdkdir}/ + +# FIXME should we propose a way to do that upstream? +sed -ri "/^subdir\('(usertools|app)'\)/d" %{dpdkdir}/meson.build + +%patch -p1 + +%build +%if 0%{?rhel} && 0%{?rhel} < 9 +export PYTHONPATH="${PWD}/pyelftools-%{pyelftoolsver}" +%endif +# Build Sphinx on RHEL +%if ! %{external_sphinx} +export PYTHONPATH="${PYTHONPATH:+$PYTHONPATH:}%{_builddir}/pytmp/lib/python" +for x in docutils-%{docutilsver} Pygments-%{pygmentsver} Sphinx-%{sphinxver}; do + pushd "$x" + python2 setup.py install --home %{_builddir}/pytmp + popd +done + +export PATH="$PATH:%{_builddir}/pytmp/bin" +%endif + +./boot.sh + +%ifarch %{dpdkarches} # build dpdk +# Lets build DPDK first +cd %{dpdkdir} + +ENABLED_DRIVERS=( + bus/pci + bus/vdev + mempool/ring + net/failsafe + net/i40e + net/ring + net/vhost + net/virtio + net/tap +) + +%ifarch x86_64 +ENABLED_DRIVERS+=( + bus/auxiliary + bus/vmbus + common/iavf + common/mlx5 + net/bnxt + net/enic + net/iavf + net/ice + net/mlx5 + net/netvsc + net/nfp + net/qede + net/vdev_netvsc +) +%endif + +%ifarch aarch64 x86_64 +ENABLED_DRIVERS+=( + net/e1000 + net/ixgbe +) +%endif + +for driver in "${ENABLED_DRIVERS[@]}"; do + enable_drivers="${enable_drivers:+$enable_drivers,}"$driver +done + +# As of 21.11-rc3, following libraries can be disabled: +# optional_libs = [ +# 'bitratestats', +# 'gpudev', +# 'gro', +# 'gso', +# 'kni', +# 'jobstats', +# 'latencystats', +# 'metrics', +# 'pdump', +# 'power', +# 'vhost', +# ] +# If doing any updates, this must be aligned with: +# https://access.redhat.com/articles/3538141 +DISABLED_LIBS=( + gpudev + kni + jobstats + power +) + +for lib in "${DISABLED_LIBS[@]}"; do + disable_libs="${disable_libs:+$disable_libs,}"$lib +done + +%set_build_flags +%__meson --prefix=%{_builddir}/dpdk-build \ + --buildtype=plain \ + -Ddisable_libs="$disable_libs" \ + -Denable_drivers="$enable_drivers" \ + -Dplatform=generic \ + -Dmax_ethports=1024 \ + -Dmax_numa_nodes=8 \ + -Dtests=false \ + %{_vpath_builddir} +%meson_build +%__meson install -C %{_vpath_builddir} --no-rebuild + +# FIXME currently with LTO enabled OVS tries to link with both static and shared libraries +rm -v %{_builddir}/dpdk-build/%{_lib}/*.so* + +# Generate a list of supported drivers, its hard to tell otherwise. +cat << EOF > README.DPDK-PMDS +DPDK drivers included in this package: + +EOF + +for f in %{_builddir}/dpdk-build/%{_lib}/librte_net_*.a; do + basename ${f} | cut -c12- | cut -d. -f1 | tr [:lower:] [:upper:] +done >> README.DPDK-PMDS + +cat << EOF >> README.DPDK-PMDS + +For further information about the drivers, see +http://dpdk.org/doc/guides-%{dpdksver}/nics/index.html +EOF + +cd - +%endif # build dpdk + +# And now for OVS... +mkdir build-shared build-static +pushd build-shared +ln -s ../configure +%configure \ +%if %{with libcapng} + --enable-libcapng \ +%else + --disable-libcapng \ +%endif + --disable-static \ + --enable-shared \ + --enable-ssl \ + --with-pkidir=%{_sharedstatedir}/openvswitch/pki \ + --enable-usdt-probes +make %{?_smp_mflags} +popd +pushd build-static +ln -s ../configure +%ifarch %{dpdkarches} +PKG_CONFIG_PATH=%{_builddir}/dpdk-build/%{_lib}/pkgconfig \ +%endif +%configure \ +%if %{with libcapng} + --enable-libcapng \ +%else + --disable-libcapng \ +%endif + --enable-ssl \ +%ifarch %{dpdkarches} + --with-dpdk=static \ +%endif + --with-pkidir=%{_sharedstatedir}/openvswitch/pki \ + --enable-usdt-probes +make %{?_smp_mflags} +popd + +/usr/bin/python3 build-aux/dpdkstrip.py \ + --dpdk \ + < rhel/usr_lib_systemd_system_ovs-vswitchd.service.in \ + > rhel/usr_lib_systemd_system_ovs-vswitchd.service + +%install +rm -rf $RPM_BUILD_ROOT +make -C build-shared install-libLTLIBRARIES DESTDIR=$RPM_BUILD_ROOT +make -C build-static install DESTDIR=$RPM_BUILD_ROOT + +install -d -m 0755 $RPM_BUILD_ROOT%{_rundir}/openvswitch +install -d -m 0750 $RPM_BUILD_ROOT%{_localstatedir}/log/openvswitch +install -d -m 0755 $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch + +install -p -D -m 0644 rhel/usr_lib_udev_rules.d_91-vfio.rules \ + $RPM_BUILD_ROOT%{_udevrulesdir}/91-vfio.rules + +install -p -D -m 0644 \ + rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template \ + $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/openvswitch + +for service in openvswitch ovsdb-server ovs-vswitchd \ + ovs-delete-transient-ports; do + install -p -D -m 0644 \ + rhel/usr_lib_systemd_system_${service}.service \ + $RPM_BUILD_ROOT%{_unitdir}/${service}.service +done + +%if %{with ipsec} +install -p -D -m 0644 rhel/usr_lib_systemd_system_openvswitch-ipsec.service \ + $RPM_BUILD_ROOT%{_unitdir}/openvswitch-ipsec.service +%endif + +install -m 0755 rhel/etc_init.d_openvswitch \ + $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/openvswitch.init + +install -p -D -m 0644 rhel/etc_openvswitch_default.conf \ + $RPM_BUILD_ROOT/%{_sysconfdir}/openvswitch/default.conf + +install -p -D -m 0644 rhel/etc_logrotate.d_openvswitch \ + $RPM_BUILD_ROOT/%{_sysconfdir}/logrotate.d/openvswitch + +install -m 0644 vswitchd/vswitch.ovsschema \ + $RPM_BUILD_ROOT/%{_datadir}/openvswitch/vswitch.ovsschema + +%if 0%{?rhel} < 9 +install -d -m 0755 $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ +install -p -m 0755 rhel/etc_sysconfig_network-scripts_ifdown-ovs \ + $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs +install -p -m 0755 rhel/etc_sysconfig_network-scripts_ifup-ovs \ + $RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs +%endif + +install -d -m 0755 $RPM_BUILD_ROOT%{python3_sitelib} +cp -a $RPM_BUILD_ROOT/%{_datadir}/openvswitch/python/ovstest \ + $RPM_BUILD_ROOT%{python3_sitelib} + +# Build the JSON C extension for the Python lib (#1417738) +pushd python +( +export CPPFLAGS="-I ../include -I ../build-shared/include" +export LDFLAGS="%{__global_ldflags} -L $RPM_BUILD_ROOT%{_libdir}" +%py3_build +%py3_install +[ -f "$RPM_BUILD_ROOT/%{python3_sitearch}/ovs/_json$(python3-config --extension-suffix)" ] +) +popd + +rm -rf $RPM_BUILD_ROOT/%{_datadir}/openvswitch/python/ + +install -d -m 0755 $RPM_BUILD_ROOT/%{_sharedstatedir}/openvswitch + +install -d -m 0755 $RPM_BUILD_ROOT%{_prefix}/lib/firewalld/services/ + +install -p -D -m 0755 \ + rhel/usr_share_openvswitch_scripts_ovs-systemd-reload \ + $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/ovs-systemd-reload + +touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/conf.db +# The db needs special permission as IPsec Pre-shared keys are stored in it. +chmod 0640 $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/conf.db + +touch $RPM_BUILD_ROOT%{_sysconfdir}/openvswitch/system-id.conf + +# remove unpackaged files +rm -f $RPM_BUILD_ROOT/%{_bindir}/ovs-benchmark \ + $RPM_BUILD_ROOT/%{_bindir}/ovs-docker \ + $RPM_BUILD_ROOT/%{_bindir}/ovs-parse-backtrace \ + $RPM_BUILD_ROOT/%{_bindir}/ovs-testcontroller \ + $RPM_BUILD_ROOT/%{_sbindir}/ovs-vlan-bug-workaround \ + $RPM_BUILD_ROOT/%{_mandir}/man1/ovs-benchmark.1* \ + $RPM_BUILD_ROOT/%{_mandir}/man8/ovs-testcontroller.* \ + $RPM_BUILD_ROOT/%{_mandir}/man8/ovs-vlan-bug-workaround.8* + +%if ! %{with ipsec} +rm -f $RPM_BUILD_ROOT/%{_datadir}/openvswitch/scripts/ovs-monitor-ipsec +%endif + +# remove ovn unpackages files +rm -f $RPM_BUILD_ROOT%{_bindir}/ovn* +rm -f $RPM_BUILD_ROOT%{_mandir}/man1/ovn* +rm -f $RPM_BUILD_ROOT%{_mandir}/man5/ovn* +rm -f $RPM_BUILD_ROOT%{_mandir}/man7/ovn* +rm -f $RPM_BUILD_ROOT%{_mandir}/man8/ovn* +rm -f $RPM_BUILD_ROOT%{_datadir}/openvswitch/ovn* +rm -f $RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/ovn* +rm -f $RPM_BUILD_ROOT%{_includedir}/ovn/* + +%check +%if %{with check} + pushd build-static + touch resolv.conf + export OVS_RESOLV_CONF=$(pwd)/resolv.conf + if make check TESTSUITEFLAGS='%{_smp_mflags}' || + make check TESTSUITEFLAGS='--recheck'; then :; + else + cat tests/testsuite.log + exit 1 + fi + popd +%endif +%if %{with check_datapath_kernel} + pushd build-static + if make check-kernel RECHECK=yes; then :; + else + cat tests/system-kmod-testsuite.log + exit 1 + fi + popd +%endif + +%clean +rm -rf $RPM_BUILD_ROOT + +%preun +%if 0%{?systemd_preun:1} + %systemd_preun openvswitch.service +%else + if [ $1 -eq 0 ] ; then + # Package removal, not upgrade + /bin/systemctl --no-reload disable openvswitch.service >/dev/null 2>&1 || : + /bin/systemctl stop openvswitch.service >/dev/null 2>&1 || : + fi +%endif + +%pre +getent group openvswitch >/dev/null || groupadd -r openvswitch +getent passwd openvswitch >/dev/null || \ + useradd -r -g openvswitch -d / -s /sbin/nologin \ + -c "Open vSwitch Daemons" openvswitch + +%ifarch %{dpdkarches} + getent group hugetlbfs >/dev/null || groupadd hugetlbfs + usermod -a -G hugetlbfs openvswitch +%endif +exit 0 + +%post +if [ $1 -eq 1 ]; then + sed -i 's:^#OVS_USER_ID=:OVS_USER_ID=:' /etc/sysconfig/openvswitch + +%ifarch %{dpdkarches} + sed -i \ + 's@OVS_USER_ID="openvswitch:openvswitch"@OVS_USER_ID="openvswitch:hugetlbfs"@'\ + /etc/sysconfig/openvswitch +%endif +fi +chown -R openvswitch:openvswitch /etc/openvswitch + +%if 0%{?systemd_post:1} + %systemd_post openvswitch.service +%else + # Package install, not upgrade + if [ $1 -eq 1 ]; then + /bin/systemctl daemon-reload >dev/null || : + fi +%endif + +%postun +%if 0%{?systemd_postun:1} + %systemd_postun openvswitch.service +%else + /bin/systemctl daemon-reload >/dev/null 2>&1 || : +%endif + +%triggerun -- openvswitch < 2.5.0-22.git20160727%{?dist} +# old rpm versions restart the service in postun, but +# due to systemd some preparation is needed. +if systemctl is-active openvswitch >/dev/null 2>&1 ; then + /usr/share/openvswitch/scripts/ovs-ctl stop >/dev/null 2>&1 || : + systemctl daemon-reload >/dev/null 2>&1 || : + systemctl stop openvswitch ovsdb-server ovs-vswitchd >/dev/null 2>&1 || : + systemctl start openvswitch >/dev/null 2>&1 || : +fi +exit 0 + +%files -n python3-%{pkgname} +%{python3_sitearch}/ovs +%{python3_sitearch}/ovs-*.egg-info +%doc LICENSE + +%files test +%{_bindir}/ovs-pcap +%{_bindir}/ovs-tcpdump +%{_bindir}/ovs-tcpundump +%{_mandir}/man1/ovs-pcap.1* +%{_mandir}/man8/ovs-tcpdump.8* +%{_mandir}/man1/ovs-tcpundump.1* +%{_bindir}/ovs-test +%{_bindir}/ovs-vlan-test +%{_bindir}/ovs-l3ping +%{_mandir}/man8/ovs-test.8* +%{_mandir}/man8/ovs-vlan-test.8* +%{_mandir}/man8/ovs-l3ping.8* +%{python3_sitelib}/ovstest + +%files devel +%{_libdir}/*.so +%{_libdir}/pkgconfig/*.pc +%{_includedir}/openvswitch/* +%{_includedir}/openflow/* +%exclude %{_libdir}/*.a +%exclude %{_libdir}/*.la + +%if 0%{?rhel} == 8 || 0%{?fedora} > 28 +%files -n network-scripts-%{name} +%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs +%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs +%endif + +%files +%defattr(-,openvswitch,openvswitch) +%dir %{_sysconfdir}/openvswitch +%{_sysconfdir}/openvswitch/default.conf +%config %ghost %verify(not owner group md5 size mtime) %{_sysconfdir}/openvswitch/conf.db +%ghost %attr(0600,-,-) %verify(not owner group md5 size mtime) %{_sysconfdir}/openvswitch/.conf.db.~lock~ +%config %ghost %{_sysconfdir}/openvswitch/system-id.conf +%defattr(-,root,root) +%config(noreplace) %verify(not md5 size mtime) %{_sysconfdir}/sysconfig/openvswitch +%{_sysconfdir}/bash_completion.d/ovs-appctl-bashcomp.bash +%{_sysconfdir}/bash_completion.d/ovs-vsctl-bashcomp.bash +%config(noreplace) %{_sysconfdir}/logrotate.d/openvswitch +%{_unitdir}/openvswitch.service +%{_unitdir}/ovsdb-server.service +%{_unitdir}/ovs-vswitchd.service +%{_unitdir}/ovs-delete-transient-ports.service +%{_datadir}/openvswitch/scripts/openvswitch.init +%{_datadir}/openvswitch/scripts/ovs-check-dead-ifs +%{_datadir}/openvswitch/scripts/ovs-lib +%{_datadir}/openvswitch/scripts/ovs-save +%{_datadir}/openvswitch/scripts/ovs-vtep +%{_datadir}/openvswitch/scripts/ovs-ctl +%{_datadir}/openvswitch/scripts/ovs-kmod-ctl +%{_datadir}/openvswitch/scripts/ovs-systemd-reload +%config %{_datadir}/openvswitch/vswitch.ovsschema +%config %{_datadir}/openvswitch/vtep.ovsschema +%{_bindir}/ovs-appctl +%{_bindir}/ovs-dpctl +%{_bindir}/ovs-ofctl +%{_bindir}/ovs-vsctl +%{_bindir}/ovsdb-client +%{_bindir}/ovsdb-tool +%{_bindir}/ovs-pki +%{_bindir}/vtep-ctl +%{_libdir}/*.so.* +%{_sbindir}/ovs-vswitchd +%{_sbindir}/ovsdb-server +%{_mandir}/man1/ovsdb-client.1* +%{_mandir}/man1/ovsdb-server.1* +%{_mandir}/man1/ovsdb-tool.1* +%{_mandir}/man5/ovsdb.5* +%{_mandir}/man5/ovsdb-server.5.* +%{_mandir}/man5/ovs-vswitchd.conf.db.5* +%{_mandir}/man5/vtep.5* +%{_mandir}/man7/ovsdb-server.7* +%{_mandir}/man7/ovsdb.7* +%{_mandir}/man7/ovs-actions.7* +%{_mandir}/man7/ovs-fields.7* +%{_mandir}/man8/vtep-ctl.8* +%{_mandir}/man8/ovs-appctl.8* +%{_mandir}/man8/ovs-ctl.8* +%{_mandir}/man8/ovs-dpctl.8* +%{_mandir}/man8/ovs-kmod-ctl.8.* +%{_mandir}/man8/ovs-ofctl.8* +%{_mandir}/man8/ovs-pki.8* +%{_mandir}/man8/ovs-vsctl.8* +%{_mandir}/man8/ovs-vswitchd.8* +%{_mandir}/man8/ovs-parse-backtrace.8* +%{_udevrulesdir}/91-vfio.rules +%doc LICENSE NOTICE README.rst NEWS rhel/README.RHEL.rst +%ifarch %{dpdkarches} +%doc %{dpdkdir}/README.DPDK-PMDS +%attr(750,openvswitch,hugetlbfs) %verify(not owner group) /var/log/openvswitch +%else +%attr(750,openvswitch,openvswitch) %verify(not owner group) /var/log/openvswitch +%endif +/var/lib/openvswitch +%ghost %attr(755,root,root) %verify(not owner group) %{_rundir}/openvswitch +%{_datadir}/openvswitch/bugtool-plugins/ +%{_datadir}/openvswitch/scripts/ovs-bugtool-* +%{_bindir}/ovs-dpctl-top +%{_sbindir}/ovs-bugtool +%{_mandir}/man8/ovs-dpctl-top.8* +%{_mandir}/man8/ovs-bugtool.8* +%if (0%{?rhel} && 0%{?rhel} <= 7) || (0%{?fedora} && 0%{?fedora} < 29) +%{_sysconfdir}/sysconfig/network-scripts/ifup-ovs +%{_sysconfdir}/sysconfig/network-scripts/ifdown-ovs +%endif + +%if %{with ipsec} +%files ipsec +%{_datadir}/openvswitch/scripts/ovs-monitor-ipsec +%{_unitdir}/openvswitch-ipsec.service +%endif + +%changelog +* Mon May 02 2022 Open vSwitch CI - 2.17.0-15 +- Merging upstream branch-2.17 [RH git: e706ea8148] + Commit list: + 522c46884d python: idl: Raise AttributeError from uuid_to_row. + cb24c524e4 ofproto-dpif-xlate: Clear out vlan flow fields while processing native tunnel. (#2060552) + a665b75dec dpif-netdev-avx512: Fix overflow of UINT32_C(1). + + +* Thu Apr 28 2022 Timothy Redaelli - 2.17.0-14 +- vhost: fix queue number check when setting inflight FD [RH git: 2ac21853a2] + [ upstream commit 6442c329b9d2ded0f44b27d2016aaba8ba5844c5 ] + + In function vhost_user_set_inflight_fd, queue number in inflight + message is used to access virtqueue. However, queue number could + be larger than VHOST_MAX_VRING and cause write OOB as this number + will be used to write inflight info in virtqueue structure. This + patch checks the queue number to avoid the issue and also make + sure virtqueues are allocated before setting inflight information. + + Fixes: ad0a4ae491fe ("vhost: checkout resubmit inflight information") + + Reported-by: Wenxiang Qian + Signed-off-by: Chenbo Xia + Reviewed-by: Maxime Coquelin + + +* Thu Apr 28 2022 Timothy Redaelli - 2.17.0-13 +- vhost: fix FD leak with inflight messages [RH git: bff69b098f] + [ upstream commit af74f7db384ed149fe42b21dbd7975f8a54ef227 ] + + Even if unlikely, a buggy vhost-user master might attach fds to inflight + messages. Add checks like for other types of vhost-user messages. + + Fixes: d87f1a1cb7b6 ("vhost: support inflight info sharing") + + Signed-off-by: David Marchand + Reviewed-by: Maxime Coquelin + + +* Wed Apr 27 2022 Open vSwitch CI - 2.17.0-12 +- Merging upstream branch-2.17 [RH git: 7a9f21a896] + Commit list: + 60e7badd6e dpif-netdev-avx512: Fix ubsan shift error in bitmasks. + 9cc329ec5b python: Politely handle misuse of table.condition. + 0631be2b5a ofproto-xlate: Fix crash when forwarding packet between legacy_l3 tunnels. + df97903099 system-traffic: Fix fragment reassembly with L3 L4 protocol information. + ba159ee0f9 cirrus: Update FreeBSD versions. + + +* Thu Apr 21 2022 Timothy Redaelli - 2.17.0-11 +- Set RTE_ETH_MAXPORTS to 1024 [RH git: c02e6bcdc4] (#2077451) + Resolves: #2077451 + + +* Wed Apr 13 2022 Timothy Redaelli - 2.17.0-10 +- redhat: network-scripts are gone in RHEL9 [RH git: 613e0e5190] + + +* Fri Apr 08 2022 Open vSwitch CI - 2.17.0-9 +- Merging upstream branch-2.17 [RH git: 4b4333522a] + Commit list: + bd1a3b6b49 Prepare for 2.17.2. + 41bb202fb3 Set release date for 2.17.1. + 8f42d4f597 NEWS: Highlight libopenvswitch API change caused by UB fixes. + + +* Fri Apr 08 2022 Open vSwitch CI - 2.17.0-8 +- Merging upstream branch-2.17 [RH git: 11b19654f7] + Commit list: + 14301b3a3c netdev-offload-tc: Check for ct_state flag combinations that are not offloadable. + + +* Mon Apr 04 2022 Open vSwitch CI - 2.17.0-7 +- Merging upstream branch-2.17 [RH git: 6cd8201ead] + Commit list: + 3a2eef7927 python: idl: Set cond_changed to true if condition change requested. + d05ccf288d dpif-netdev: Fix dp_netdev_get_pmd() function getting correct core_id. + 79e291f983 alb.at: Add tests for cross-numa polling. + 9c3b74fb24 dpif-netdev: Fix PMD auto load balance with pmd-rxq-isolate. + 8580ff9ddd pmd.at: Add tests for multi non-local numa pmds. + 6bf4eeddbb dpif-netdev: Fix non-local numa selection for more than two numas. + c41434b3b7 dpif-netdev: Fix typo in function name. + e0aa5e1329 python: idl: Set cond_changed to false if last id is zero. + 8da40d31c7 ofproto-dpif-xlate: Fix NULL pointer dereference in xlate_normal(). + 63a903ab42 ofproto/ofproto-dpif: Fix dpif_type for userspace tunnels. + + +* Mon Apr 04 2022 Timothy Redaelli - 2.17.0-6 +- downstream: Enable usdt probes in build [RH git: b366bbed8f] + Fixes: BZ1840877 + + +* Thu Mar 31 2022 Michael Santana - 2.17.0-5 +- redhat/makefile: fix support for more rhel versions [RH git: 7c8bb3babb] + Signed-off-by: Michael Santana + + +* Thu Mar 31 2022 Open vSwitch CI - 2.17.0-4 +- Merging upstream branch-2.17 [RH git: 95e1c005d2] + Commit list: + 14d54eb8b6 sset: add SHORT version of SAFE loop macros. + f0e63b115f sparse: bump recommended version and include headers. + 70b87cf722 idlc: support short version of SAFE macros. + 3777ed90c9 rculist: use multi-variable helpers for loop macros. + bb52e9bebf hindex: remove the next variable in safe loops. + 7d6cbfa24b hindex: use multi-variable iterators. + 97ad96b63a cmap: use multi-variable iterators. + cd62fda22d hmap: use short version of safe loops if possible. + d56bfd7521 hmap: implement UB-safe hmap pop iterator. + e2c8354861 hmap: use multi-variable helpers for hmap loops. + 897d6647e6 list: use short version of safe loops if possible. + 979a9eb5b5 list: use multi-variable helpers for list loops. + 6bacf802c6 util: add helpers to overload SAFE macro. + f127123d66 util: add safe multi-variable iterators. + 38e73f0b68 util: add multi-variable loop iterator macros. + e91edf4568 ovsdb: raft: Fix inability to read the database with DNS host names. (#2055097) + 2404d45367 system-traffic.at: Fix flaky DNAT load balancing test. + 6b8adfdd8d dpif-netdev: Keep orig_in_port as a field of the flow. + 6098b7f250 tests: Fix incorrect usage of OVS_WAIT_UNTIL. + cf9018d373 odp-util: Fix output for tc to be equal to kernel. + 992de24063 netdev-offload-tc: Fix IP and port ranges in flower returns. + 7e26796c03 netdev-offload-tc: Fix use of ICMP values instead of masks defines. + e319e27064 netdev-offload-tc: Always include conntrack information to tc. + 51ef81ad78 netdev-offload-tc: Check for valid netdev ifindex in flow_put. + 974253dc2e netdev-offload-tc: Set the correct VLAN_VID and VLAN_PCP masks. + c43c159aea netdev-offload-tc: Add debug logs on tc rule verify failures. + d34622a03b tc: Keep header rewrite actions order. + 5255713d1f faq: Update OVS/DPDK version table for OVS 2.15/2.16 + 05cf36a620 system-dpdk: Fix mfex autovalidator tests. + 87540e3b9f ofp-prop: Silence the 'may be uninitialized' warning. + 812164adef tests: Ignore log about failing to set NETLINK_EXT_ACK. + ae51ccc12c ovsdb-cluster.at: Avoid test failures due to different hashing. + f33cde23c7 ofproto: Use xlate map for uuid lookups. + 6ac255496c ofproto: Add refcount to ofproto to fix ofproto use-after-free. + db0cc8be38 ofproto-dpif: Trigger revalidation when ipfix config set. + 31b86e5c98 conntrack: Prefer dst port range during unique tuple search. + b761b532c3 conntrack: Select correct sport range for well-known origin sport. + 78bd058c36 ipsec: StrongSwan report connection update failures to ovs logs. + aa05596dfb ipsec: Libreswan report connection failures to ovs logs. + 427776ceae system-tso: Skip encap tests when userspace TSO is enabled. + 66d16e2883 tc: Fix stats byte count on fragmented packets. + b63c41f31f compat: Add gen_stats include to define tc hw stats. + c531b3828f ovsdb: raft: Fix inability to join the cluster after interrupted attempt. (#2033514) + 498cedc483 reconnect: Fix broken inactivity probe if there is no other reason to wake up. + 5dc1423d80 datapath-windows: Fix NXM_OF_IP_TOS issue + 91c0f0068d Prepare for 2.17.1. + + +* Wed Mar 30 2022 Timothy Redaelli - 2.17.0-3 +- redhat: fix setup on RHEL8 [RH git: 769c7d89ac] + + +* Wed Mar 30 2022 Timothy Redaelli - 2.17.0-2 +- rhel: avoid including sortedcontainers [RH git: 3c5b820d14] + + +* Mon Feb 21 2022 Timothy Redaelli - 2.17.0-1 +- redhat: Imported Red Hat build files. [RH git: 00b5f7b51b] + +