diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh index 863f023888..c06e88c577 100755 --- a/.ci/linux-build.sh +++ b/.ci/linux-build.sh @@ -216,7 +216,7 @@ fi if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then if [ -z "$DPDK_VER" ]; then - DPDK_VER="20.11.1" + DPDK_VER="20.11.4" fi install_dpdk $DPDK_VER if [ "$CC" = "clang" ]; then @@ -246,8 +246,8 @@ if [ "$ASAN" ]; then export ASAN_OPTIONS='detect_leaks=1' # -O2 generates few false-positive memory leak reports in test-ovsdb # application, so lowering optimizations to -O1 here. - CLFAGS_ASAN="-O1 -fno-omit-frame-pointer -fno-common -fsanitize=address" - CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} ${CLFAGS_ASAN}" + CFLAGS_ASAN="-O1 -fno-omit-frame-pointer -fno-common -fsanitize=address" + CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} ${CFLAGS_ASAN}" fi save_OPTS="${OPTS} $*" diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh index c55125cf78..c0b7473eda 100755 --- a/.ci/linux-prepare.sh +++ b/.ci/linux-prepare.sh @@ -20,9 +20,13 @@ cd sparse make -j4 HAVE_LLVM= HAVE_SQLITE= install cd .. +# Installing wheel separately because it may be needed to build some +# of the packages during dependency backtracking and pip >= 22.0 will +# abort backtracking on build failures: +# https://github.com/pypa/pip/issues/10655 +pip3 install --disable-pip-version-check --user wheel pip3 install --disable-pip-version-check --user \ - flake8 hacking sphinx pyOpenSSL wheel setuptools -pip3 install --user --upgrade docutils + flake8 'hacking>=3.0' sphinx setuptools pip3 install --user 'meson==0.47.1' if [ "$M32" ]; then diff --git a/.cirrus.yml b/.cirrus.yml index 358f2ba256..a4d2a5bbcd 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -2,14 +2,14 @@ freebsd_build_task: freebsd_instance: matrix: - image_family: freebsd-12-2-snap - image_family: freebsd-11-4-snap + image_family: freebsd-12-3-snap + image_family: freebsd-13-0-snap cpu: 4 - memory: 8G + memory: 4G env: DEPENDENCIES: automake libtool gmake gcc wget openssl python3 - PY_DEPS: sphinx|openssl + PY_DEPS: sphinx matrix: COMPILER: gcc COMPILER: clang diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index e2350c6d9d..7434ad18ec 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -127,7 +127,7 @@ jobs: - name: set up python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: '3.9' - name: create ci signature file for the dpdk cache key if: matrix.dpdk != '' || matrix.dpdk_shared != '' @@ -215,7 +215,7 @@ jobs: - name: set up python uses: actions/setup-python@v2 with: - python-version: '3.x' + python-version: '3.9' - name: install dependencies run: brew install automake libtool - name: prepare diff --git a/.travis.yml b/.travis.yml index 51d0511080..c7aeede06e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,7 +17,6 @@ addons: - libjemalloc-dev - libnuma-dev - libpcap-dev - - python3-openssl - python3-pip - python3-sphinx - libelf-dev diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 68c9867b19..d62d575eba 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -205,10 +205,10 @@ Q: What DPDK version does each Open vSwitch release work with? 2.10.x 17.11.10 2.11.x 18.11.9 2.12.x 18.11.9 - 2.13.x 19.11.8 - 2.14.x 19.11.8 - 2.15.x 20.11.1 - 2.16.x 20.11.1 + 2.13.x 19.11.10 + 2.14.x 19.11.10 + 2.15.x 20.11.4 + 2.16.x 20.11.4 ============ ======== Q: Are all the DPDK releases that OVS versions work with maintained? diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst index d8fa931fab..9ce5285c58 100644 --- a/Documentation/intro/install/dpdk.rst +++ b/Documentation/intro/install/dpdk.rst @@ -42,7 +42,7 @@ Build requirements In addition to the requirements described in :doc:`general`, building Open vSwitch with DPDK will require the following: -- DPDK 20.11.1 +- DPDK 20.11.4 - A `DPDK supported NIC`_ @@ -73,9 +73,9 @@ Install DPDK #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``:: $ cd /usr/src/ - $ wget https://fast.dpdk.org/rel/dpdk-20.11.1.tar.xz - $ tar xf dpdk-20.11.1.tar.xz - $ export DPDK_DIR=/usr/src/dpdk-stable-20.11.1 + $ wget https://fast.dpdk.org/rel/dpdk-20.11.4.tar.xz + $ tar xf dpdk-20.11.4.tar.xz + $ export DPDK_DIR=/usr/src/dpdk-stable-20.11.4 $ cd $DPDK_DIR #. Configure and install DPDK using Meson @@ -219,7 +219,7 @@ To verify hugepage configuration:: Mount the hugepages, if not already mounted by default:: - $ mount -t hugetlbfs none /dev/hugepages`` + $ mount -t hugetlbfs none /dev/hugepages .. note:: diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst index c4300cd53e..a297aadac8 100644 --- a/Documentation/intro/install/general.rst +++ b/Documentation/intro/install/general.rst @@ -169,7 +169,7 @@ other than plain text, only if you have the following: If you are going to extensively modify Open vSwitch, consider installing the following to obtain better warnings: -- "sparse" version 0.5.1 or later +- "sparse" version 0.6.2 or later (https://git.kernel.org/pub/scm/devel/sparse/sparse.git/). - GNU make. diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst index 95fa7af128..c1a35eb13a 100644 --- a/Documentation/topics/dpdk/pmd.rst +++ b/Documentation/topics/dpdk/pmd.rst @@ -31,17 +31,19 @@ input ports for packets, classifying packets once received, and executing actions on the packets once they are classified. PMD threads utilize Receive (Rx) and Transmit (Tx) queues, commonly known as -*rxq*\s and *txq*\s. While Tx queue configuration happens automatically, Rx -queues can be configured by the user. This can happen in one of two ways: +*rxq*\s and *txq*\s to receive and send packets from/to an interface. -- For physical interfaces, configuration is done using the - :program:`ovs-appctl` utility. +- For physical interfaces, the number of Tx Queues is automatically configured + based on the number of PMD thread cores. The number of Rx queues can be + configured with:: -- For virtual interfaces, configuration is done using the :program:`ovs-appctl` - utility, but this configuration must be reflected in the guest configuration - (e.g. QEMU command line arguments). + $ ovs-vsctl set Interface options:n_rxq=N -The :program:`ovs-appctl` utility also provides a number of commands for +- For virtual interfaces, the number of Tx and Rx queues are configured by + libvirt/QEMU and enabled/disabled in the guest. Refer to :doc:'vhost-user' + for more information. + +The :program:`ovs-appctl` utility provides a number of commands for querying PMD threads and their respective queues. This, and all of the above, is discussed here. diff --git a/NEWS b/NEWS index 559a51ba3f..c3c5c16ae6 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,39 @@ +v2.16.5 - xx xxx xxxx +--------------------- + +v2.16.4 - 15 Jun 2022 +--------------------- + - Bug fixes + +v2.16.3 - 08 Apr 2022 +--------------------- + - Bug fixes + - libopenvswitch API change: + * To fix the Undefined Behavior issue causing the compiler to incorrectly + optimize important parts of code, container iteration macros (e.g., + LIST_FOR_EACH) have been re-implemented in a UB-safe way. + * Backwards compatibility has mostly been preserved, however the + user-provided pointer is now set to NULL after the loop (unless it + exited via "break;") + * Users of libopenvswitch will need to double-check the use of such loop + macros before compiling with a new version. + * Since the change is limited to the definitions within the headers, the + ABI is not affected. + - DPDK: + * OVS validated with DPDK 20.11.4. It is recommended to use this version + until further releases. + - Python: + * For SSL support, the use of the pyOpenSSL library has been replaced + with the native 'ssl' module. + +v2.16.2 - 17 Dec 2021 +--------------------- + - Bug fixes + +v2.16.1 - 21 Oct 2021 +--------------------- + - Bug fixes + v2.16.0 - 16 Aug 2021 --------------------- - Removed support for 1024-bit Diffie-Hellman key exchange, which is now diff --git a/acinclude.m4 b/acinclude.m4 index dba365ea1a..1b957c3dcd 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -77,7 +77,7 @@ dnl Checks if compiler and binutils supports AVX512. AC_DEFUN([OVS_CHECK_AVX512], [ OVS_CHECK_BINUTILS_AVX512 OVS_CHECK_CC_OPTION( - [-mavx512f], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no]) + [-mavx512f -mavx512vpopcntdq], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no]) AM_CONDITIONAL([HAVE_AVX512F], [test $ovs_have_cc_mavx512f = yes]) if test "$ovs_have_cc_mavx512f" = yes; then AC_DEFINE([HAVE_AVX512F], [1], @@ -305,6 +305,13 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [ ])], [AC_DEFINE([HAVE_TCA_SKBEDIT_FLAGS], [1], [Define to 1 if TCA_SKBEDIT_FLAGS is available.])]) + + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([#include ], [ + int x = TCA_STATS_PKT64; + ])], + [AC_DEFINE([HAVE_TCA_STATS_PKT64], [1], + [Define to 1 if TCA_STATS_PKT64 is available.])]) ]) dnl OVS_CHECK_LINUX_SCTP_CT @@ -1417,7 +1424,7 @@ AC_DEFUN([OVS_ENABLE_SPARSE], : ${SPARSE=sparse} AC_SUBST([SPARSE]) AC_CONFIG_COMMANDS_PRE( - [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')']) + [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse -I $(top_srcdir)/include $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')']) AC_ARG_ENABLE( [sparse], diff --git a/configure.ac b/configure.ac index 16b32be965..406df116ee 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 2.16.0, bugs@openvswitch.org) +AC_INIT(openvswitch, 2.16.5, bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index e130c2f966..218e7db814 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -1112,9 +1112,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx, * should split the function and refactor. */ if (!bufferData) { EthHdr *ethHdr = (EthHdr *)bufferStart; - /* If the frame is not VLAN make it a no op */ if (ethHdr->Type != ETH_TYPE_802_1PQ_NBO) { - return NDIS_STATUS_SUCCESS; + OVS_LOG_ERROR("Invalid ethHdr type %u, nbl %p", ethHdr->Type, ovsFwdCtx->curNbl); + return NDIS_STATUS_INVALID_PACKET; } } RtlMoveMemory(bufferStart + shiftLength, bufferStart, shiftOffset); @@ -1137,6 +1137,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx, static __inline NDIS_STATUS OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) { + NDIS_STATUS status; + OVS_PACKET_HDR_INFO* layers = &ovsFwdCtx->layers; + /* * Declare a dummy vlanTag structure since we need to compute the size * of shiftLength. The NDIS one is a unionized structure. @@ -1145,7 +1148,15 @@ OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) UINT32 shiftLength = sizeof(vlanTag.TagHeader); UINT32 shiftOffset = sizeof(DL_EUI48) + sizeof(DL_EUI48); - return OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, NULL); + status = OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, + NULL); + + if (status == NDIS_STATUS_SUCCESS) { + layers->l3Offset -= (UINT16) shiftLength; + layers->l4Offset -= (UINT16) shiftLength; + } + + return status; } @@ -1516,6 +1527,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl, TcpIpChecksumNetBufferListInfo); + /* * Adjust the IP header inline as dictated by the action, and also update * the IP and the TCP checksum for the data modified. @@ -1524,6 +1536,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, * ChecksumUpdate32(). Ignoring this for now, since for the most common * case, we only update the TTL. */ + /*Only tx direction the checksum value will be reset to be PseudoChecksum*/ if (isSource) { addrField = &ipHdr->saddr; @@ -1540,7 +1553,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, ((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded || (BOOLEAN)csumInfo.Receive.UdpChecksumFailed); } - if (l4Offload) { + if (isTx && l4Offload) { *checkField = IPPseudoChecksum(&newAddr, &ipHdr->daddr, tcpHdr ? IPPROTO_TCP : IPPROTO_UDP, ntohs(ipHdr->tot_len) - ipHdr->ihl * 4); @@ -1561,7 +1574,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, (BOOLEAN)csumInfo.Receive.UdpChecksumFailed); } - if (l4Offload) { + if (isTx && l4Offload) { *checkField = IPPseudoChecksum(&ipHdr->saddr, &newAddr, tcpHdr ? IPPROTO_TCP : IPPROTO_UDP, ntohs(ipHdr->tot_len) - ipHdr->ihl * 4); @@ -1570,7 +1583,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, if (*addrField != newAddr) { UINT32 oldAddr = *addrField; - if (checkField && *checkField != 0 && !l4Offload) { + if ((checkField && *checkField != 0) && (!l4Offload || !isTx)) { /* Recompute total checksum. */ *checkField = ChecksumUpdate32(*checkField, oldAddr, newAddr); @@ -1579,11 +1592,12 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr, newAddr); } + *addrField = newAddr; } if (portField && *portField != newPort) { - if (checkField && !l4Offload) { + if ((checkField) && (!l4Offload || !isTx)) { /* Recompute total checksum. */ *checkField = ChecksumUpdate16(*checkField, *portField, newPort); @@ -1698,6 +1712,15 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, ipHdr->ttl = ipAttr->ipv4_ttl; key->ipKey.nwTtl = ipAttr->ipv4_ttl; } + if (ipHdr->dscp != (ipAttr->ipv4_tos & 0xfc)) { + /* ECN + DSCP */ + UINT8 newTos = (ipHdr->tos & 0x3) | (ipAttr->ipv4_tos & 0xfc); + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate16(ipHdr->check, ipHdr->tos, newTos); + } + ipHdr->tos = newTos; + key->ipKey.nwTos = newTos; + } return NDIS_STATUS_SUCCESS; } @@ -1792,9 +1815,11 @@ OvsExecuteRecirc(OvsForwardingContext *ovsFwdCtx, } if (newNbl) { - deferredAction = OvsAddDeferredActions(newNbl, key, NULL); + deferredAction = OvsAddDeferredActions(newNbl, key, &(ovsFwdCtx->layers), + NULL); } else { - deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key, NULL); + deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key, + &(ovsFwdCtx->layers), NULL); } if (deferredAction) { @@ -1964,7 +1989,7 @@ OvsExecuteSampleAction(OvsForwardingContext *ovsFwdCtx, return STATUS_SUCCESS; } - if (!OvsAddDeferredActions(newNbl, key, a)) { + if (!OvsAddDeferredActions(newNbl, key, &(ovsFwdCtx->layers), a)) { OVS_LOG_INFO( "Deferred actions limit reached, dropping sample action."); OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE); @@ -2100,6 +2125,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext, */ status = OvsPopVlanInPktBuf(&ovsFwdCtx); if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_ERROR("OVS-pop vlan action failed status = %lu", status); dropReason = L"OVS-pop vlan action failed"; goto dropit; } @@ -2349,7 +2375,7 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, if (status == STATUS_SUCCESS) { status = OvsProcessDeferredActions(switchContext, completionList, - portNo, sendFlags, layers); + portNo, sendFlags); } return status; diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 2610d626a0..7f1d2fb412 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -493,15 +493,32 @@ static __inline NDIS_STATUS OvsDetectCtPacket(OvsForwardingContext *fwdCtx, OvsFlowKey *key) { + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + OvsFlowKey newFlowKey = { 0 }; + switch (ntohs(key->l2.dlType)) { case ETH_TYPE_IPV4: if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) { - return OvsProcessIpv4Fragment(fwdCtx->switchContext, + status = OvsProcessIpv4Fragment(fwdCtx->switchContext, &fwdCtx->curNbl, fwdCtx->completionList, fwdCtx->fwdDetail->SourcePortId, &fwdCtx->layers, key->tunKey.tunnelId); + if (status == NDIS_STATUS_SUCCESS) { + /* After the Ipv4 Fragment is reassembled, update flow key as + L3 and L4 headers are not correct */ + status = + OvsExtractFlow(fwdCtx->curNbl, fwdCtx->srcVportNo, + &newFlowKey, &fwdCtx->layers, + fwdCtx->tunKey.dst != 0 ? &fwdCtx->tunKey : NULL); + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_ERROR("Extract flow failed Nbl %p", fwdCtx->curNbl); + return status; + } + *key = newFlowKey; + } + return status; } if (key->ipKey.nwProto == IPPROTO_TCP || key->ipKey.nwProto == IPPROTO_UDP @@ -609,6 +626,31 @@ OvsReverseIcmpType(UINT8 type) } } +static __inline void +OvsPickupCtTupleAsLookupKey(POVS_CT_KEY ctKey, UINT16 zone, OvsFlowKey *flowKey) +{ + UINT32 ipAddrSrc = 0, ipAddrDst = 0; + + if (!flowKey || !ctKey) return; + + if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) { + ipAddrSrc = flowKey->ct.tuple_ipv4.ipv4_src; + ipAddrDst = flowKey->ct.tuple_ipv4.ipv4_dst; + + if ((ipAddrSrc > 0 && ipAddrDst > 0) && + (zone == flowKey->ct.zone)) { + /* if the ct tuple_ipv4 in flowKey is not null and ct.zone is same with + * zone parameter pickup the tuple_ipv4 value as the lookup key + */ + ctKey->src.addr.ipv4 = flowKey->ct.tuple_ipv4.ipv4_src; + ctKey->dst.addr.ipv4 = flowKey->ct.tuple_ipv4.ipv4_dst; + ctKey->nw_proto = flowKey->ct.tuple_ipv4.ipv4_proto; + ctKey->src.port = flowKey->ct.tuple_ipv4.src_port; + ctKey->dst.port = flowKey->ct.tuple_ipv4.dst_port; + } + } +} + static __inline NDIS_STATUS OvsCtSetupLookupCtx(OvsFlowKey *flowKey, UINT16 zone, @@ -629,6 +671,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, ctx->key.src.port = flowKey->ipKey.l4.tpSrc; ctx->key.dst.port = flowKey->ipKey.l4.tpDst; + if (flowKey->ipKey.nwProto == IPPROTO_ICMP) { ICMPHdr icmpStorage; const ICMPHdr *icmp; @@ -683,6 +726,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey, /* Translate address first for reverse NAT */ ctx->key = natEntry->ctEntry->key; OvsCtKeyReverse(&ctx->key); + } else { + if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) { + OvsPickupCtTupleAsLookupKey(&(ctx->key), zone, flowKey); + } } ctx->hash = OvsCtHashKey(&ctx->key); diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index cc0840704a..2a206305ec 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -45,7 +45,9 @@ extern NDIS_STRING ovsExtFriendlyNameUC; static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList); static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags); + PNET_BUFFER_LIST netBufferLists, + ULONG sendCompleteFlags, + BOOLEAN isSendComplete); VOID OvsInitCompletionList(OvsCompletionList *completionList, @@ -155,7 +157,7 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, OvsReportNBLIngressError(switchContext, netBufferLists, &filterReason, NDIS_STATUS_PAUSED); OvsCompleteNBLIngress(switchContext, netBufferLists, - sendCompleteFlags); + sendCompleteFlags, FALSE); return; } @@ -175,6 +177,79 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, NDIS_DEFAULT_PORT_NUMBER, sendFlags); } +static __inline BOOLEAN +OvsCheckNBLSingleSource(PNET_BUFFER_LIST netBufferLists) +{ + UINT32 sourcePortId = 0; + BOOLEAN singleSource = TRUE; + PNET_BUFFER_LIST curNbl = netBufferLists; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; + + while (curNbl != NULL) { + info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + if (info == NULL) { + /* We are not able to determine the source port ID */ + singleSource = FALSE; + OVS_LOG_INFO("nbl %p has no source port", curNbl); + break; + } + if (curNbl == netBufferLists) { + sourcePortId = info->SourcePortId; + } else if (info->SourcePortId != sourcePortId) { + singleSource = FALSE; + OVS_LOG_INFO("Source port in nbl %p is %u, not from %u", + curNbl, info->SourcePortId, sourcePortId); + break; + } + curNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl); + } + + return singleSource; +} + +/* + * SendNetBufferListsCompleteHandler releases the NetBufferLists with flag + * NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE if all the NBLs have same + * source port, for cloned NBLs, source port might be changed, although the + * cloned NBLs have same source port, there parent NBLs may have different + * source ports, so we should have a check before passing the flag to + * NdisFSendNetBufferListsComplete. + */ +static __inline VOID +OvsCompleteUpperLayerNBL(NDIS_HANDLE ndisHandle, + PNET_BUFFER_LIST netBufferLists, + ULONG sendCompleteFlags, + BOOLEAN isSendComplete) +{ + BOOLEAN singleSource = TRUE; + PNET_BUFFER_LIST curNbl, nextNbl; + + /* To check whether the NBLs are from the same source port */ + if (isSendComplete && + (sendCompleteFlags & NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE)) { + singleSource = OvsCheckNBLSingleSource(netBufferLists); + } + + if (singleSource) { + NdisFSendNetBufferListsComplete(ndisHandle, + netBufferLists, + sendCompleteFlags); + } else { + /* + * Not from a single source port, releasing the NBls without flag + * NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE doesn't help, so + * let's release them one by one. + */ + for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { + nextNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl); + NET_BUFFER_LIST_NEXT_NBL(curNbl) = NULL; + NdisFSendNetBufferListsComplete(ndisHandle, + curNbl, + sendCompleteFlags); + } + } +} + static __inline VOID OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST nblList, @@ -184,8 +259,8 @@ OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, { ASSERT(error); OvsReportNBLIngressError(switchContext, nblList, filterReason, error); - NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, nblList, - sendCompleteFlags); + OvsCompleteUpperLayerNBL(switchContext->NdisFilterHandle, nblList, + sendCompleteFlags, FALSE); } static VOID @@ -427,7 +502,8 @@ OvsExtSendNBL(NDIS_HANDLE filterModuleContext, static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST netBufferLists, - ULONG sendCompleteFlags) + ULONG sendCompleteFlags, + BOOLEAN isSendComplete) { PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; OvsCompletionList newList; @@ -449,8 +525,10 @@ OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, /* Complete the NBL's that were sent by the upper layer. */ if (newList.dropNbl != NULL) { - NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl, - sendCompleteFlags); + OvsCompleteUpperLayerNBL(switchContext->NdisFilterHandle, + newList.dropNbl, + sendCompleteFlags, + isSendComplete); } } @@ -466,7 +544,7 @@ OvsExtSendNBLComplete(NDIS_HANDLE filterModuleContext, ULONG sendCompleteFlags) { OvsCompleteNBLIngress((POVS_SWITCH_CONTEXT)filterModuleContext, - netBufferLists, sendCompleteFlags); + netBufferLists, sendCompleteFlags, TRUE); } @@ -476,7 +554,8 @@ OvsFinalizeCompletionList(OvsCompletionList *completionList) if (completionList->dropNbl != NULL) { OvsCompleteNBLIngress(completionList->switchContext, completionList->dropNbl, - completionList->sendCompleteFlags); + completionList->sendCompleteFlags, + FALSE); completionList->dropNbl = NULL; completionList->dropNblNext = &completionList->dropNbl; diff --git a/datapath-windows/ovsext/Recirc.c b/datapath-windows/ovsext/Recirc.c index 2febf060dd..7a688c8742 100644 --- a/datapath-windows/ovsext/Recirc.c +++ b/datapath-windows/ovsext/Recirc.c @@ -277,16 +277,23 @@ OvsDeferredActionsQueuePush(POVS_DEFERRED_ACTION_QUEUE queue) POVS_DEFERRED_ACTION OvsAddDeferredActions(PNET_BUFFER_LIST nbl, OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers, const PNL_ATTR actions) { POVS_DEFERRED_ACTION_QUEUE queue = OvsDeferredActionsQueueGet(); POVS_DEFERRED_ACTION deferredAction = NULL; + OVS_PACKET_HDR_INFO layersInit = { 0 }; deferredAction = OvsDeferredActionsQueuePush(queue); if (deferredAction) { deferredAction->nbl = nbl; deferredAction->actions = actions; deferredAction->key = *key; + if (layers) { + deferredAction->layers = *layers; + } else { + deferredAction->layers = layersInit; + } } return deferredAction; @@ -303,15 +310,17 @@ NDIS_STATUS OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, OvsCompletionList *completionList, UINT32 portNo, - ULONG sendFlags, - OVS_PACKET_HDR_INFO *layers) + ULONG sendFlags) { NDIS_STATUS status = NDIS_STATUS_SUCCESS; POVS_DEFERRED_ACTION_QUEUE queue = OvsDeferredActionsQueueGet(); POVS_DEFERRED_ACTION deferredAction = NULL; + POVS_PACKET_HDR_INFO layersDeferred = NULL; /* Process all deferred actions. */ while ((deferredAction = OvsDeferredActionsQueuePop(queue)) != NULL) { + layersDeferred = &(deferredAction->layers); + if (deferredAction->actions) { status = OvsDoExecuteActions(switchContext, completionList, @@ -319,7 +328,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, portNo, sendFlags, &deferredAction->key, NULL, - layers, deferredAction->actions, + layersDeferred, deferredAction->actions, NlAttrGetSize(deferredAction->actions)); } else { status = OvsDoRecirc(switchContext, @@ -327,7 +336,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, deferredAction->nbl, &deferredAction->key, portNo, - layers); + layersDeferred); } } diff --git a/datapath-windows/ovsext/Recirc.h b/datapath-windows/ovsext/Recirc.h index 2b314ce274..b2d02a65c2 100644 --- a/datapath-windows/ovsext/Recirc.h +++ b/datapath-windows/ovsext/Recirc.h @@ -18,6 +18,7 @@ #define __RECIRC_H_ 1 #include "Actions.h" +#include "NetProto.h" #define DEFERRED_ACTION_QUEUE_SIZE 10 #define DEFERRED_ACTION_EXEC_LEVEL 4 @@ -26,6 +27,7 @@ typedef struct _OVS_DEFERRED_ACTION { PNET_BUFFER_LIST nbl; PNL_ATTR actions; OvsFlowKey key; + OVS_PACKET_HDR_INFO layers; } OVS_DEFERRED_ACTION, *POVS_DEFERRED_ACTION; /* @@ -39,8 +41,7 @@ NDIS_STATUS OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, OvsCompletionList *completionList, UINT32 portNo, - ULONG sendFlags, - OVS_PACKET_HDR_INFO *layers); + ULONG sendFlags); /* * -------------------------------------------------------------------------- @@ -52,6 +53,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext, POVS_DEFERRED_ACTION OvsAddDeferredActions(PNET_BUFFER_LIST packet, OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers, const PNL_ATTR actions); /* diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h index 4cce92f66c..bc18c56b81 100644 --- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h +++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h @@ -108,7 +108,14 @@ static inline bool rpl_nf_ct_delete(struct nf_conn *ct, u32 portid, int report) static inline unsigned int rpl_nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { - return nf_conntrack_in(state->net, state->pf, state->hook, skb); + int err; + + /* Repeat if requested, see nf_iterate(). */ + do { + err = nf_conntrack_in(state->net, state->pf, state->hook, skb); + } while (err == NF_REPEAT); + + return err; } #define nf_conntrack_in rpl_nf_conntrack_in #endif /* HAVE_NF_CONNTRACK_IN_TAKES_NF_HOOK_STATE */ diff --git a/debian/changelog b/debian/changelog index 239d210b96..522e10b0e5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,33 @@ +openvswitch (2.16.5-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Wed, 15 Jun 2022 12:03:55 +0200 + +openvswitch (2.16.4-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Wed, 15 Jun 2022 12:03:55 +0200 + +openvswitch (2.16.3-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Fri, 08 Apr 2022 14:57:43 +0200 + +openvswitch (2.16.2-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Fri, 17 Dec 2021 22:14:03 +0100 + +openvswitch (2.16.1-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Thu, 21 Oct 2021 23:58:12 +0200 + openvswitch (2.16.0-1) unstable; urgency=low * New upstream version diff --git a/dpdk/lib/librte_vhost/vhost_user.c b/dpdk/lib/librte_vhost/vhost_user.c index 45c8ac09da..70d206dcf8 100644 --- a/dpdk/lib/librte_vhost/vhost_user.c +++ b/dpdk/lib/librte_vhost/vhost_user.c @@ -1416,6 +1416,9 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, int fd, i, j; void *addr; + if (validate_msg_fds(msg, 0) != 0) + return RTE_VHOST_MSG_RESULT_ERR; + if (msg->size != sizeof(msg->payload.inflight)) { VHOST_LOG_CONFIG(ERR, "invalid get_inflight_fd message size is %d\n", @@ -1509,6 +1512,9 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, void *addr; int fd, i; + if (validate_msg_fds(msg, 1) != 0) + return RTE_VHOST_MSG_RESULT_ERR; + fd = msg->fds[0]; if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { VHOST_LOG_CONFIG(ERR, @@ -2652,6 +2658,9 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, case VHOST_USER_SET_VRING_ADDR: vring_idx = msg->payload.addr.index; break; + case VHOST_USER_SET_INFLIGHT_FD: + vring_idx = msg->payload.inflight.num_queues - 1; + break; default: return 0; } diff --git a/include/linux/automake.mk b/include/linux/automake.mk index 8f063f482e..f857c7e088 100644 --- a/include/linux/automake.mk +++ b/include/linux/automake.mk @@ -2,6 +2,7 @@ noinst_HEADERS += \ include/linux/netlink.h \ include/linux/netfilter/nf_conntrack_sctp.h \ include/linux/pkt_cls.h \ + include/linux/gen_stats.h \ include/linux/tc_act/tc_mpls.h \ include/linux/tc_act/tc_pedit.h \ include/linux/tc_act/tc_skbedit.h \ diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h new file mode 100644 index 0000000000..6fae6f727c --- /dev/null +++ b/include/linux/gen_stats.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_GEN_STATS_WRAPPER_H +#define __LINUX_GEN_STATS_WRAPPER_H 1 + +#if defined(__KERNEL__) || defined(HAVE_TCA_STATS_PKT64) +#include_next +#else +#include + +enum { + TCA_STATS_UNSPEC, + TCA_STATS_BASIC, + TCA_STATS_RATE_EST, + TCA_STATS_QUEUE, + TCA_STATS_APP, + TCA_STATS_RATE_EST64, + TCA_STATS_PAD, + TCA_STATS_BASIC_HW, + TCA_STATS_PKT64, + __TCA_STATS_MAX, +}; +#define TCA_STATS_MAX (__TCA_STATS_MAX - 1) + +/** + * struct gnet_stats_basic - byte/packet throughput statistics + * @bytes: number of seen bytes + * @packets: number of seen packets + */ +struct gnet_stats_basic { + __u64 bytes; + __u32 packets; +}; + +/** + * struct gnet_stats_rate_est - rate estimator + * @bps: current byte rate + * @pps: current packet rate + */ +struct gnet_stats_rate_est { + __u32 bps; + __u32 pps; +}; + +/** + * struct gnet_stats_rate_est64 - rate estimator + * @bps: current byte rate + * @pps: current packet rate + */ +struct gnet_stats_rate_est64 { + __u64 bps; + __u64 pps; +}; + +/** + * struct gnet_stats_queue - queuing statistics + * @qlen: queue length + * @backlog: backlog size of queue + * @drops: number of dropped packets + * @requeues: number of requeues + * @overlimits: number of enqueues over the limit + */ +struct gnet_stats_queue { + __u32 qlen; + __u32 backlog; + __u32 drops; + __u32 requeues; + __u32 overlimits; +}; + +/** + * struct gnet_estimator - rate estimator configuration + * @interval: sampling period + * @ewma_log: the log of measurement window weight + */ +struct gnet_estimator { + signed char interval; + unsigned char ewma_log; +}; + +#endif /* __KERNEL__ || !HAVE_TCA_STATS_PKT64 */ +#endif /* __LINUX_GEN_STATS_WRAPPER_H */ diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h index 3054015d93..df10cf579e 100644 --- a/include/openvswitch/flow.h +++ b/include/openvswitch/flow.h @@ -141,15 +141,14 @@ struct flow { uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */ uint8_t nw_ttl; /* IP TTL/Hop Limit. */ uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ + /* L4 (64-bit aligned) */ struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ struct eth_addr arp_sha; /* ARP/ND source hardware address. */ struct eth_addr arp_tha; /* ARP/ND target hardware address. */ - ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. - * With L3 to avoid matching L4. */ + ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. */ ovs_be16 pad2; /* Pad to 64 bits. */ struct ovs_key_nsh nsh; /* Network Service Header keys */ - /* L4 (64-bit aligned) */ ovs_be16 tp_src; /* TCP/UDP/SCTP source port/ICMP type. */ ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */ ovs_be16 ct_tp_src; /* CT original tuple source port/ICMP type. */ @@ -179,7 +178,7 @@ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t) enum { FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_dst), FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, nw_src), - FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src), + FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, nd_target), }; BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % sizeof(uint64_t) == 0); BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % sizeof(uint64_t) == 0); diff --git a/include/openvswitch/hmap.h b/include/openvswitch/hmap.h index 4e001cc692..68c284cf14 100644 --- a/include/openvswitch/hmap.h +++ b/include/openvswitch/hmap.h @@ -134,17 +134,17 @@ struct hmap_node *hmap_random_node(const struct hmap *); * without using 'break', NODE will be NULL. This is true for all of the * HMAP_FOR_EACH_*() macros. */ -#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ - for (INIT_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false); \ - ASSIGN_CONTAINER(NODE, hmap_next_with_hash(&(NODE)->MEMBER), \ - MEMBER)) -#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ - for (INIT_CONTAINER(NODE, hmap_first_in_bucket(HMAP, HASH), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false); \ - ASSIGN_CONTAINER(NODE, hmap_next_in_bucket(&(NODE)->MEMBER), MEMBER)) +#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ + for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_with_hash(HMAP, HASH), \ + struct hmap_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hmap_next_with_hash(ITER_VAR(NODE)))) + +#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ + for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_in_bucket(HMAP, HASH), \ + struct hmap_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hmap_next_in_bucket(ITER_VAR(NODE)))) static inline struct hmap_node *hmap_first_with_hash(const struct hmap *, size_t hash); @@ -170,54 +170,62 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *); /* Iterates through every node in HMAP. */ #define HMAP_FOR_EACH(NODE, MEMBER, HMAP) \ HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, (void) 0) -#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ - for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false); \ - ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) +#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ + for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_first(HMAP), struct hmap_node, \ + __VA_ARGS__); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE)))) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash map but its members remain accessible and intact). */ #define HMAP_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HMAP) \ - HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, (void) 0) -#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ - for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ - ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false) \ - ? INIT_CONTAINER(NEXT, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), 1 \ - : 0); \ - (NODE) = (NEXT)) + HMAP_FOR_EACH_SAFE_INIT (NODE, NEXT, MEMBER, HMAP, (void) NEXT) + +#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ + for (INIT_MULTIVAR_SAFE_LONG_EXP(NODE, NEXT, MEMBER, hmap_first(HMAP), \ + struct hmap_node, __VA_ARGS__); \ + CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_VAR(NEXT) = hmap_next(HMAP, ITER_VAR(NODE)), \ + ITER_VAR(NEXT) != NULL); \ + UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) /* Continues an iteration from just after NODE. */ #define HMAP_FOR_EACH_CONTINUE(NODE, MEMBER, HMAP) \ HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, (void) 0) -#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ - for (ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), \ - __VA_ARGS__; \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false); \ - ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) +#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ + for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_next(HMAP, &(NODE)->MEMBER), \ + struct hmap_node, __VA_ARGS__); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE)))) + +struct hmap_pop_helper_iter__ { + size_t bucket; + struct hmap_node *node; +}; -static inline struct hmap_node * -hmap_pop_helper__(struct hmap *hmap, size_t *bucket) { +static inline void +hmap_pop_helper__(struct hmap *hmap, struct hmap_pop_helper_iter__ *iter) { - for (; *bucket <= hmap->mask; (*bucket)++) { - struct hmap_node *node = hmap->buckets[*bucket]; + for (; iter->bucket <= hmap->mask; (iter->bucket)++) { + struct hmap_node *node = hmap->buckets[iter->bucket]; if (node) { hmap_remove(hmap, node); - return node; + iter->node = node; + return; } } - - return NULL; + iter->node = NULL; } -#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ - for (size_t bucket__ = 0; \ - INIT_CONTAINER(NODE, hmap_pop_helper__(HMAP, &bucket__), MEMBER), \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false);) +#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ + for (struct hmap_pop_helper_iter__ ITER_VAR(NODE) = { 0, NULL }; \ + hmap_pop_helper__(HMAP, &ITER_VAR(NODE)), \ + (ITER_VAR(NODE).node != NULL) ? \ + (((NODE) = OBJECT_CONTAINING(ITER_VAR(NODE).node, \ + NODE, MEMBER)),1): \ + (((NODE) = NULL), 0);) static inline struct hmap_node *hmap_first(const struct hmap *); static inline struct hmap_node *hmap_next(const struct hmap *, diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h index 73b562e03d..0831a9cee1 100644 --- a/include/openvswitch/json.h +++ b/include/openvswitch/json.h @@ -50,7 +50,9 @@ enum json_type { JSON_INTEGER, /* 123. */ JSON_REAL, /* 123.456. */ JSON_STRING, /* "..." */ - JSON_N_TYPES + JSON_N_TYPES, + JSON_SERIALIZED_OBJECT, /* Internal type to hold serialized version of + * data of other types. */ }; const char *json_type_to_string(enum json_type); @@ -70,7 +72,7 @@ struct json { struct json_array array; long long int integer; double real; - char *string; + char *string; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */ }; }; @@ -78,6 +80,7 @@ struct json *json_null_create(void); struct json *json_boolean_create(bool); struct json *json_string_create(const char *); struct json *json_string_create_nocopy(char *); +struct json *json_serialized_object_create(const struct json *); struct json *json_integer_create(long long int); struct json *json_real_create(double); @@ -99,6 +102,7 @@ void json_object_put_format(struct json *, OVS_PRINTF_FORMAT(3, 4); const char *json_string(const struct json *); +const char *json_serialized_object(const struct json *); struct json_array *json_array(const struct json *); struct shash *json_object(const struct json *); bool json_boolean(const struct json *); @@ -125,6 +129,7 @@ struct json *json_parser_finish(struct json_parser *); void json_parser_abort(struct json_parser *); struct json *json_from_string(const char *string); +struct json *json_from_serialized_object(const struct json *); struct json *json_from_file(const char *file_name); struct json *json_from_stream(FILE *stream); diff --git a/include/openvswitch/list.h b/include/openvswitch/list.h index 8ad5eeb327..bbd2edbd0c 100644 --- a/include/openvswitch/list.h +++ b/include/openvswitch/list.h @@ -72,37 +72,48 @@ static inline bool ovs_list_is_empty(const struct ovs_list *); static inline bool ovs_list_is_singleton(const struct ovs_list *); static inline bool ovs_list_is_short(const struct ovs_list *); -#define LIST_FOR_EACH(ITER, MEMBER, LIST) \ - for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ - &(ITER)->MEMBER != (LIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) -#define LIST_FOR_EACH_CONTINUE(ITER, MEMBER, LIST) \ - for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER); \ - &(ITER)->MEMBER != (LIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) -#define LIST_FOR_EACH_REVERSE(ITER, MEMBER, LIST) \ - for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \ - &(ITER)->MEMBER != (LIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) -#define LIST_FOR_EACH_REVERSE_SAFE(ITER, PREV, MEMBER, LIST) \ - for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \ - (&(ITER)->MEMBER != (LIST) \ - ? INIT_CONTAINER(PREV, (ITER)->MEMBER.prev, MEMBER), 1 \ - : 0); \ - (ITER) = (PREV)) -#define LIST_FOR_EACH_REVERSE_CONTINUE(ITER, MEMBER, LIST) \ - for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ - &(ITER)->MEMBER != (LIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) -#define LIST_FOR_EACH_SAFE(ITER, NEXT, MEMBER, LIST) \ - for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ - (&(ITER)->MEMBER != (LIST) \ - ? INIT_CONTAINER(NEXT, (ITER)->MEMBER.next, MEMBER), 1 \ - : 0); \ - (ITER) = (NEXT)) -#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \ - while (!ovs_list_is_empty(LIST) \ - && (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1)) +#define LIST_FOR_EACH(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->next, struct ovs_list); \ + CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ + UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next)) + +#define LIST_FOR_EACH_CONTINUE(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.next, struct ovs_list); \ + CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ + UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next)) + +#define LIST_FOR_EACH_REVERSE(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->prev, struct ovs_list); \ + CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ + UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev)) + +#define LIST_FOR_EACH_REVERSE_CONTINUE(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.prev, struct ovs_list); \ + CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ + UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev)) + +#define LIST_FOR_EACH_REVERSE_SAFE(VAR, PREV, MEMBER, LIST) \ + for (INIT_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, (LIST)->prev, \ + struct ovs_list); \ + CONDITION_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, \ + ITER_VAR(VAR) != (LIST), \ + ITER_VAR(PREV) = ITER_VAR(VAR)->prev, \ + ITER_VAR(PREV) != (LIST)); \ + UPDATE_MULTIVAR_SAFE_LONG(VAR, PREV)) + +#define LIST_FOR_EACH_SAFE(VAR, NEXT, MEMBER, LIST) \ + for (INIT_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, (LIST)->next, \ + struct ovs_list); \ + CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, \ + ITER_VAR(VAR) != (LIST), \ + ITER_VAR(NEXT) = ITER_VAR(VAR)->next, \ + ITER_VAR(NEXT) != (LIST)); \ + UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT)) + +#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \ + while (!ovs_list_is_empty(LIST) ? \ + (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1) : \ + (ITER = NULL, 0)) /* Inline implementations. */ diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h index 95e52e3587..045dce8f5f 100644 --- a/include/openvswitch/meta-flow.h +++ b/include/openvswitch/meta-flow.h @@ -2305,6 +2305,7 @@ void mf_set_flow_value_masked(const struct mf_field *, const union mf_value *mask, struct flow *); bool mf_is_tun_metadata(const struct mf_field *); +bool mf_is_frozen_metadata(const struct mf_field *); bool mf_is_pipeline_field(const struct mf_field *); bool mf_is_set(const struct mf_field *, const struct flow *); void mf_mask_field(const struct mf_field *, struct flow_wildcards *); diff --git a/include/openvswitch/util.h b/include/openvswitch/util.h index 228b185c3a..8e6c46a85f 100644 --- a/include/openvswitch/util.h +++ b/include/openvswitch/util.h @@ -145,6 +145,150 @@ OVS_NO_RETURN void ovs_assert_failure(const char *, const char *, const char *); #define INIT_CONTAINER(OBJECT, POINTER, MEMBER) \ ((OBJECT) = NULL, ASSIGN_CONTAINER(OBJECT, POINTER, MEMBER)) +/* Multi-variable container iterators. + * + * The following macros facilitate safe iteration over data structures + * contained in objects. It does so by using an internal iterator variable of + * the type of the member object pointer (i.e: pointer to the data structure). + */ + +/* Multi-variable iterator variable name. + * Returns the name of the internal iterator variable. + */ +#define ITER_VAR(NAME) NAME ## __iterator__ + +/* Multi-variable initialization. Creates an internal iterator variable that + * points to the provided pointer. The type of the iterator variable is + * ITER_TYPE*. It must be the same type as &VAR->MEMBER. + * + * The _EXP version evaluates the extra expressions once. + */ +#define INIT_MULTIVAR(VAR, MEMBER, POINTER, ITER_TYPE) \ + INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0) + +#define INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \ + ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER) + +/* Multi-variable condition. + * Evaluates the condition expression (that must be based on the internal + * iterator variable). Only if the result of expression is true, the OBJECT is + * set to the object containing the current value of the iterator variable. + * + * It is up to the caller to make sure it is safe to run OBJECT_CONTAINING on + * the pointers that verify the condition. + */ +#define CONDITION_MULTIVAR(VAR, MEMBER, EXPR) \ + ((EXPR) ? \ + (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), 1) : \ + (((VAR) = NULL), 0)) + +/* Multi-variable update. + * Sets the iterator value to NEXT_ITER. + */ +#define UPDATE_MULTIVAR(VAR, NEXT_ITER) \ + (ITER_VAR(VAR) = NEXT_ITER) + +/* In the safe version of the multi-variable container iteration, the next + * value of the iterator is precalculated on the condition expression. + * This allows for the iterator to be freed inside the loop. + * + * Two versions of the macros are provided: + * + * * In the _SHORT version, the user does not have to provide a variable to + * store the next value of the iterator. Instead, a second iterator variable + * is declared in the INIT_ macro and its name is determined by + * ITER_NEXT_VAR(OBJECT). + * + * * In the _LONG version, the user provides another variable of the same type + * as the iterator object variable to store the next containing object. + * We still declare an iterator variable inside the loop but in this case it's + * name is derived from the name of the next containing variable. + * The value of the next containing object will only be set + * (via OBJECT_CONTAINING) if an additional condition is statisfied. This + * second condition must ensure it is safe to call OBJECT_CONTAINING on the + * next iterator variable. + * With respect to the value of the next containing object: + * - Inside of the loop: the variable is either NULL or safe to use. + * - Outside of the loop: the variable is NULL if the loop ends normally. + * If the loop ends with a "break;" statement, rules of Inside the loop + * apply. + */ +#define ITER_NEXT_VAR(NAME) NAME ## __iterator__next__ + +/* Safe initialization declares both iterators. */ +#define INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, POINTER, ITER_TYPE) \ + INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0) + +#define INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \ + ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \ + *ITER_NEXT_VAR(VAR) = NULL + +/* Evaluate the condition expression and, if satisfied, update the _next_ + * iterator with the NEXT_EXPR. + * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and + * ITER_NEXT_VAR(VAR). + */ +#define CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, EXPR, NEXT_EXPR) \ + ((EXPR) ? \ + (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \ + (NEXT_EXPR), 1) : \ + (((VAR) = NULL), 0)) + +#define UPDATE_MULTIVAR_SAFE_SHORT(VAR) \ + UPDATE_MULTIVAR(VAR, ITER_NEXT_VAR(VAR)) + +/* _LONG versions of the macros. */ + +#define INIT_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE) \ + INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE, \ + (void) 0) \ + +#define INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, \ + ITER_TYPE, ...) \ + ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \ + *ITER_VAR(NEXT_VAR) = NULL + +/* Evaluate the condition expression and, if satisfied, update the _next_ + * iterator with the NEXT_EXPR. After, evaluate the NEXT_COND and, if + * satisfied, set the value to NEXT_VAR. NEXT_COND must use ITER_VAR(NEXT_VAR). + * + * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and + * ITER_VAR(NEXT_VAR). + */ +#define CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, EXPR, NEXT_EXPR, \ + NEXT_COND) \ + ((EXPR) ? \ + (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \ + (NEXT_EXPR), ((NEXT_COND) ? \ + ((NEXT_VAR) = \ + OBJECT_CONTAINING(ITER_VAR(NEXT_VAR), NEXT_VAR, MEMBER)) : \ + ((NEXT_VAR) = NULL)), 1) : \ + (((VAR) = NULL), ((NEXT_VAR) = NULL), 0)) + +#define UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR) \ + UPDATE_MULTIVAR(VAR, ITER_VAR(NEXT_VAR)) + +/* Helpers to allow overloading the *_SAFE iterator macros and select either + * the LONG or the SHORT version depending on the number of arguments. + */ +#define GET_SAFE_MACRO2(_1, _2, NAME, ...) NAME +#define GET_SAFE_MACRO3(_1, _2, _3, NAME, ...) NAME +#define GET_SAFE_MACRO4(_1, _2, _3, _4, NAME, ...) NAME +#define GET_SAFE_MACRO5(_1, _2, _3, _4, _5, NAME, ...) NAME +#define GET_SAFE_MACRO6(_1, _2, _3, _4, _5, _6, NAME, ...) NAME +#define GET_SAFE_MACRO(MAX_ARGS) GET_SAFE_MACRO ## MAX_ARGS + +/* MSVC treats __VA_ARGS__ as a simple token in argument lists. Introduce + * a level of indirection to work around that. */ +#define EXPAND_MACRO(name, args) name args + +/* Overload the LONG and the SHORT version of the macros. MAX_ARGS is the + * maximum number of arguments (i.e: the number of arguments of the LONG + * version). */ +#define OVERLOAD_SAFE_MACRO(LONG, SHORT, MAX_ARGS, ...) \ + EXPAND_MACRO(GET_SAFE_MACRO(MAX_ARGS), \ + (__VA_ARGS__, LONG, SHORT))(__VA_ARGS__) + /* Returns the number of elements in ARRAY. */ #define ARRAY_SIZE(ARRAY) __ARRAY_SIZE(ARRAY) @@ -285,6 +429,9 @@ is_pow2(uintmax_t x) * segfault, so it is important to be aware of correct alignment. */ #define ALIGNED_CAST(TYPE, ATTR) ((TYPE) (void *) (ATTR)) +#define IS_PTR_ALIGNED(OBJ) \ + (!(OBJ) || (uintptr_t) (OBJ) % __alignof__(OVS_TYPEOF(OBJ)) == 0) + #ifdef __cplusplus } #endif diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in index 89a36fe17b..a8b0705d9f 100755 --- a/ipsec/ovs-monitor-ipsec.in +++ b/ipsec/ovs-monitor-ipsec.in @@ -202,18 +202,18 @@ conn prevent_unencrypted_vxlan """ auth_tmpl = {"psk": Template("""\ - left=0.0.0.0 + left=%any right=$remote_ip authby=psk"""), "pki_remote": Template("""\ - left=0.0.0.0 + left=%any right=$remote_ip leftid=$local_name rightid=$remote_name leftcert=$certificate rightcert=$remote_cert"""), "pki_ca": Template("""\ - left=0.0.0.0 + left=%any right=$remote_ip leftid=$local_name rightid=$remote_name @@ -299,11 +299,11 @@ conn prevent_unencrypted_vxlan def config_tunnel(self, tunnel): if tunnel.conf["psk"]: - self.secrets_file.write('0.0.0.0 %s : PSK "%s"\n' % + self.secrets_file.write('%%any %s : PSK "%s"\n' % (tunnel.conf["remote_ip"], tunnel.conf["psk"])) auth_section = self.auth_tmpl["psk"].substitute(tunnel.conf) else: - self.secrets_file.write("0.0.0.0 %s : RSA %s\n" % + self.secrets_file.write("%%any %s : RSA %s\n" % (tunnel.conf["remote_ip"], tunnel.conf["private_key"])) if tunnel.conf["remote_cert"]: diff --git a/lib/bfd.c b/lib/bfd.c index 3c965699ac..9698576d07 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -131,16 +131,17 @@ enum diag { * | Required Min Echo RX Interval | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct msg { - uint8_t vers_diag; /* Version and diagnostic. */ - uint8_t flags; /* 2bit State field followed by flags. */ - uint8_t mult; /* Fault detection multiplier. */ - uint8_t length; /* Length of this BFD message. */ - ovs_be32 my_disc; /* My discriminator. */ - ovs_be32 your_disc; /* Your discriminator. */ - ovs_be32 min_tx; /* Desired minimum tx interval. */ - ovs_be32 min_rx; /* Required minimum rx interval. */ - ovs_be32 min_rx_echo; /* Required minimum echo rx interval. */ + uint8_t vers_diag; /* Version and diagnostic. */ + uint8_t flags; /* 2bit State field followed by flags. */ + uint8_t mult; /* Fault detection multiplier. */ + uint8_t length; /* Length of this BFD message. */ + ovs_16aligned_be32 my_disc; /* My discriminator. */ + ovs_16aligned_be32 your_disc; /* Your discriminator. */ + ovs_16aligned_be32 min_tx; /* Desired minimum tx interval. */ + ovs_16aligned_be32 min_rx; /* Required minimum rx interval. */ + ovs_16aligned_be32 min_rx_echo; /* Required minimum echo rx interval. */ }; + BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg)); #define DIAG_MASK 0x1f @@ -634,9 +635,9 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p, msg->mult = bfd->mult; msg->length = BFD_PACKET_LEN; - msg->my_disc = htonl(bfd->disc); - msg->your_disc = htonl(bfd->rmt_disc); - msg->min_rx_echo = htonl(0); + put_16aligned_be32(&msg->my_disc, htonl(bfd->disc)); + put_16aligned_be32(&msg->your_disc, htonl(bfd->rmt_disc)); + put_16aligned_be32(&msg->min_rx_echo, htonl(0)); if (bfd_in_poll(bfd)) { min_tx = bfd->poll_min_tx; @@ -646,8 +647,8 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p, min_rx = bfd->min_rx; } - msg->min_tx = htonl(min_tx * 1000); - msg->min_rx = htonl(min_rx * 1000); + put_16aligned_be32(&msg->min_tx, htonl(min_tx * 1000)); + put_16aligned_be32(&msg->min_rx, htonl(min_rx * 1000)); bfd->flags &= ~FLAG_FINAL; *oam = bfd->oam; @@ -781,12 +782,12 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, goto out; } - if (!msg->my_disc) { + if (!get_16aligned_be32(&msg->my_disc)) { log_msg(VLL_WARN, msg, "NULL my_disc", bfd); goto out; } - pkt_your_disc = ntohl(msg->your_disc); + pkt_your_disc = ntohl(get_16aligned_be32(&msg->your_disc)); if (pkt_your_disc) { /* Technically, we should use the your discriminator field to figure * out which 'struct bfd' this packet is destined towards. That way a @@ -806,7 +807,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, bfd_status_changed(bfd); } - bfd->rmt_disc = ntohl(msg->my_disc); + bfd->rmt_disc = ntohl(get_16aligned_be32(&msg->my_disc)); bfd->rmt_state = rmt_state; bfd->rmt_flags = flags; bfd->rmt_diag = msg->vers_diag & DIAG_MASK; @@ -834,7 +835,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, bfd->rmt_mult = msg->mult; } - rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1); + rmt_min_rx = MAX(ntohl(get_16aligned_be32(&msg->min_rx)) / 1000, 1); if (bfd->rmt_min_rx != rmt_min_rx) { bfd->rmt_min_rx = rmt_min_rx; if (bfd->next_tx) { @@ -843,7 +844,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow, log_msg(VLL_INFO, msg, "New remote min_rx", bfd); } - bfd->rmt_min_tx = MAX(ntohl(msg->min_tx) / 1000, 1); + bfd->rmt_min_tx = MAX(ntohl(get_16aligned_be32(&msg->min_tx)) / 1000, 1); bfd->detect_time = bfd_rx_interval(bfd) * bfd->rmt_mult + time_msec(); if (bfd->state == STATE_ADMIN_DOWN) { @@ -1105,10 +1106,14 @@ log_msg(enum vlog_level level, const struct msg *p, const char *message, bfd_diag_str(p->vers_diag & DIAG_MASK), bfd_state_str(p->flags & STATE_MASK), p->mult, p->length, bfd_flag_str(p->flags & FLAGS_MASK), - ntohl(p->my_disc), ntohl(p->your_disc), - ntohl(p->min_tx), ntohl(p->min_tx) / 1000, - ntohl(p->min_rx), ntohl(p->min_rx) / 1000, - ntohl(p->min_rx_echo), ntohl(p->min_rx_echo) / 1000); + ntohl(get_16aligned_be32(&p->my_disc)), + ntohl(get_16aligned_be32(&p->your_disc)), + ntohl(get_16aligned_be32(&p->min_tx)), + ntohl(get_16aligned_be32(&p->min_tx)) / 1000, + ntohl(get_16aligned_be32(&p->min_rx)), + ntohl(get_16aligned_be32(&p->min_rx)) / 1000, + ntohl(get_16aligned_be32(&p->min_rx_echo)), + ntohl(get_16aligned_be32(&p->min_rx_echo)) / 1000); bfd_put_details(&ds, bfd); VLOG(level, "%s", ds_cstr(&ds)); ds_destroy(&ds); diff --git a/lib/cmap.h b/lib/cmap.h index c502d23112..72e2ec5f71 100644 --- a/lib/cmap.h +++ b/lib/cmap.h @@ -108,6 +108,8 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node, * * CMAP and HASH are evaluated only once. NODE is evaluated many times. * + * After a normal exit of the loop (not through a "break;" statement) NODE is + * NULL. * * Thread-safety * ============= @@ -128,15 +130,15 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node, * CMAP_FOR_EACH_WITH_HASH_PROTECTED may only be used if CMAP is guaranteed not * to change during iteration. It may be very slightly faster. */ -#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \ - for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \ - (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ - ASSIGN_CONTAINER(NODE, cmap_node_next(&(NODE)->MEMBER), MEMBER)) -#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \ - for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \ - (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ - ASSIGN_CONTAINER(NODE, cmap_node_next_protected(&(NODE)->MEMBER), \ - MEMBER)) +#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \ + for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, cmap_node_next(ITER_VAR(NODE)))) +#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \ + for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, cmap_node_next_protected(ITER_VAR(NODE)))) + #define CMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, CMAP) \ CMAP_NODE_FOR_EACH(NODE, MEMBER, cmap_find(CMAP, HASH)) #define CMAP_FOR_EACH_WITH_HASH_PROTECTED(NODE, MEMBER, HASH, CMAP) \ @@ -223,7 +225,7 @@ unsigned long cmap_find_batch(const struct cmap *cmap, unsigned long map, ? (INIT_CONTAINER(NODE, (CURSOR)->node, MEMBER), \ cmap_cursor_advance(CURSOR), \ true) \ - : false) + : (NODE = NULL, false)) #define CMAP_CURSOR_FOR_EACH(NODE, MEMBER, CURSOR, CMAP) \ for (*(CURSOR) = cmap_cursor_start(CMAP); \ diff --git a/lib/conntrack.c b/lib/conntrack.c index 551c2061a7..b8183faa2d 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -2238,7 +2238,7 @@ nat_range_hash(const struct conn *conn, uint32_t basis) hash = ct_addr_hash_add(hash, &conn->nat_info->min_addr); hash = ct_addr_hash_add(hash, &conn->nat_info->max_addr); hash = hash_add(hash, - (conn->nat_info->max_port << 16) + ((uint32_t) conn->nat_info->max_port << 16) | conn->nat_info->min_port); hash = ct_endpoint_hash_add(hash, &conn->key.src); hash = ct_endpoint_hash_add(hash, &conn->key.dst); diff --git a/lib/db-ctl-base.c b/lib/db-ctl-base.c index 77cc76a9f6..7074561588 100644 --- a/lib/db-ctl-base.c +++ b/lib/db-ctl-base.c @@ -247,15 +247,15 @@ record_id_equals(const union ovsdb_atom *name, enum ovsdb_atomic_type type, const char *record_id) { if (type == OVSDB_TYPE_STRING) { - if (!strcmp(name->string, record_id)) { + if (!strcmp(name->s->string, record_id)) { return true; } struct uuid uuid; size_t len = strlen(record_id); if (len >= 4 - && uuid_from_string(&uuid, name->string) - && !strncmp(name->string, record_id, len)) { + && uuid_from_string(&uuid, name->s->string) + && !strncmp(name->s->string, record_id, len)) { return true; } @@ -314,15 +314,19 @@ get_row_by_id(struct ctl_context *ctx, row, id->name_column, key, value); /* Extract the name from the column. */ - const union ovsdb_atom *name; + const union ovsdb_atom *name = NULL; if (!id->key) { name = datum->n == 1 ? &datum->keys[0] : NULL; } else { - const union ovsdb_atom key_atom - = { .string = CONST_CAST(char *, id->key) }; - unsigned int i = ovsdb_datum_find_key(datum, &key_atom, - OVSDB_TYPE_STRING); - name = i == UINT_MAX ? NULL : &datum->values[i]; + union ovsdb_atom key_atom = { + .s = ovsdb_atom_string_create(CONST_CAST(char *, id->key)) }; + unsigned int i; + + if (ovsdb_datum_find_key(datum, &key_atom, + OVSDB_TYPE_STRING, &i)) { + name = &datum->values[i]; + } + ovsdb_atom_destroy(&key_atom, OVSDB_TYPE_STRING); } if (!name) { continue; @@ -819,14 +823,14 @@ check_condition(const struct ovsdb_idl_table_class *table, goto out; } - idx = ovsdb_datum_find_key(have_datum, - &want_key, column->type.key.type); - if (idx == UINT_MAX && !is_set_operator(operator)) { + bool found = ovsdb_datum_find_key(have_datum, &want_key, + column->type.key.type, &idx); + if (!found && !is_set_operator(operator)) { retval = false; } else { struct ovsdb_datum a; - if (idx != UINT_MAX) { + if (found) { a.n = 1; a.keys = &have_datum->values[idx]; a.values = NULL; @@ -992,9 +996,8 @@ cmd_get(struct ctl_context *ctx) return; } - idx = ovsdb_datum_find_key(datum, &key, - column->type.key.type); - if (idx == UINT_MAX) { + if (!ovsdb_datum_find_key(datum, &key, + column->type.key.type, &idx)) { if (must_exist) { ctl_error( ctx, "no key \"%s\" in %s record \"%s\" column %s", @@ -1375,7 +1378,7 @@ set_column(const struct ovsdb_idl_table_class *table, ovsdb_atom_destroy(&value, column->type.value.type); ovsdb_datum_union(&datum, ovsdb_idl_read(row, column), - &column->type, false); + &column->type); ovsdb_idl_txn_verify(row, column); ovsdb_idl_txn_write(row, column, &datum); } else { @@ -1514,7 +1517,7 @@ cmd_add(struct ctl_context *ctx) ovsdb_datum_destroy(&old, &column->type); return; } - ovsdb_datum_union(&old, &add, type, false); + ovsdb_datum_union(&old, &add, type); ovsdb_datum_destroy(&add, type); } if (old.n > type->n_max) { diff --git a/lib/dns-resolve.c b/lib/dns-resolve.c index d344514343..8bcecb90ce 100644 --- a/lib/dns-resolve.c +++ b/lib/dns-resolve.c @@ -265,7 +265,7 @@ resolve_callback__(void *req_, int err, struct ub_result *result) if (err != 0 || (result->qtype == ns_t_aaaa && !result->havedata)) { ub_resolve_free(result); req->state = RESOLVE_ERROR; - VLOG_ERR_RL(&rl, "%s: failed to resolve", req->name); + VLOG_WARN_RL(&rl, "%s: failed to resolve", req->name); return; } diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 72f6d09ac7..35c72542a2 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -294,7 +294,7 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom) void dp_packet_prealloc_tailroom(struct dp_packet *b, size_t size) { - if (size > dp_packet_tailroom(b)) { + if ((size && !dp_packet_base(b)) || (size > dp_packet_tailroom(b))) { dp_packet_resize(b, dp_packet_headroom(b), MAX(size, 64)); } } diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 08d93c2779..3dc582fbfd 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -199,6 +199,7 @@ struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t, void dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom); static inline void dp_packet_delete(struct dp_packet *); +static inline void dp_packet_swap(struct dp_packet *, struct dp_packet *); static inline void *dp_packet_at(const struct dp_packet *, size_t offset, size_t size); @@ -256,6 +257,18 @@ dp_packet_delete(struct dp_packet *b) } } +/* Swaps content of two packets. */ +static inline void +dp_packet_swap(struct dp_packet *a, struct dp_packet *b) +{ + ovs_assert(a->source == DPBUF_MALLOC || a->source == DPBUF_STUB); + ovs_assert(b->source == DPBUF_MALLOC || b->source == DPBUF_STUB); + struct dp_packet c = *a; + + *a = *b; + *b = c; +} + /* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to * byte 'offset'. Otherwise, returns a null pointer. */ static inline void * diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c index b7d577870d..fe24f9abdf 100644 --- a/lib/dpdk-stub.c +++ b/lib/dpdk-stub.c @@ -83,7 +83,7 @@ bool dpdk_get_cpu_has_isa(const char *arch OVS_UNUSED, const char *feature OVS_UNUSED) { - VLOG_ERR_ONCE("DPDK not supported in this version of Open vSwitch, " + VLOG_DBG_ONCE("DPDK not supported in this version of Open vSwitch, " "cannot use CPU flag based optimizations"); return false; } diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c index 544d36903e..01011f679a 100644 --- a/lib/dpif-netdev-avx512.c +++ b/lib/dpif-netdev-avx512.c @@ -58,19 +58,6 @@ struct dpif_userdata { struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST]; }; -int32_t -dp_netdev_input_outer_avx512_probe(void) -{ - bool avx512f_available = dpdk_get_cpu_has_isa("x86_64", "avx512f"); - bool bmi2_available = dpdk_get_cpu_has_isa("x86_64", "bmi2"); - - if (!avx512f_available || !bmi2_available) { - return -ENOTSUP; - } - - return 0; -} - int32_t dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c index ec64419e38..993d07e401 100644 --- a/lib/dpif-netdev-extract-avx512.c +++ b/lib/dpif-netdev-extract-avx512.c @@ -43,7 +43,6 @@ #include #include "flow.h" -#include "dpdk.h" #include "dpif-netdev-private-dpcls.h" #include "dpif-netdev-private-extract.h" @@ -157,10 +156,19 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a) 0, 0, 0, 0, /* Src IP */ \ 0, 0, 0, 0, /* Dst IP */ -#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xFE, 0xFF, 0xFF) +#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xBF, 0xFF, 0xFF) #define PATTERN_IPV4_UDP PATTERN_IPV4_GEN(0x45, 0, 0, 0x11) #define PATTERN_IPV4_TCP PATTERN_IPV4_GEN(0x45, 0, 0, 0x06) +#define PATTERN_TCP_GEN(data_offset) \ + 0, 0, 0, 0, /* sport, dport */ \ + 0, 0, 0, 0, /* sequence number */ \ + 0, 0, 0, 0, /* ack number */ \ + data_offset, /* data offset: used to verify = 5, options not supported */ + +#define PATTERN_TCP_MASK PATTERN_TCP_GEN(0xF0) +#define PATTERN_TCP PATTERN_TCP_GEN(0x50) + #define NU 0 #define PATTERN_IPV4_UDP_SHUFFLE \ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* Ether */ \ @@ -217,6 +225,25 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a) #define PATTERN_DT1Q_IPV4_TCP_KMASK \ (KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_TCP << 40)) +/* Miniflow Strip post-processing masks. + * This allows unsetting specific bits from the resulting miniflow. It is used + * for e.g. IPv4 where the "DF" bit is never pushed to the miniflow itself. + * The NC define is for "No Change", allowing the bits to pass through. + */ +#define NC 0xFF + +#define PATTERN_STRIP_IPV4_MASK \ + NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ + NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, 0xBF, NC, NC, NC, \ + NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ + NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC + +#define PATTERN_STRIP_DOT1Q_IPV4_MASK \ + NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ + NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ + NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \ + NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC + /* This union allows initializing static data as u8, but easily loading it * into AVX512 registers too. The union ensures proper alignment for the zmm. */ @@ -241,8 +268,9 @@ struct mfex_profile { union mfex_data probe_mask; union mfex_data probe_data; - /* Required for reshaping packet into miniflow. */ + /* Required for reshaping packet into miniflow and post-processing it. */ union mfex_data store_shuf; + union mfex_data strip_mask; __mmask64 store_kmsk; /* Constant data to set in mf.bits and dp_packet data on hit. */ @@ -310,6 +338,7 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_UDP}, .store_shuf.u8_data = { PATTERN_IPV4_UDP_SHUFFLE }, + .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK }, .store_kmsk = PATTERN_IPV4_UDP_KMASK, .mf_bits = { 0x18a0000000000000, 0x0000000000040401}, @@ -320,10 +349,19 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = }, [PROFILE_ETH_IPV4_TCP] = { - .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK }, - .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_TCP}, + .probe_mask.u8_data = { + PATTERN_ETHERTYPE_MASK + PATTERN_IPV4_MASK + PATTERN_TCP_MASK + }, + .probe_data.u8_data = { + PATTERN_ETHERTYPE_IPV4 + PATTERN_IPV4_TCP + PATTERN_TCP + }, .store_shuf.u8_data = { PATTERN_IPV4_TCP_SHUFFLE }, + .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK }, .store_kmsk = PATTERN_IPV4_TCP_KMASK, .mf_bits = { 0x18a0000000000000, 0x0000000000044401}, @@ -342,6 +380,7 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = }, .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_UDP_SHUFFLE }, + .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK }, .store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK, .mf_bits = { 0x38a0000000000000, 0x0000000000040401}, @@ -353,20 +392,27 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = [PROFILE_ETH_VLAN_IPV4_TCP] = { .probe_mask.u8_data = { - PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK + PATTERN_ETHERTYPE_MASK + PATTERN_DT1Q_MASK + PATTERN_IPV4_MASK + PATTERN_TCP_MASK }, .probe_data.u8_data = { - PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_TCP + PATTERN_ETHERTYPE_DT1Q + PATTERN_DT1Q_IPV4 + PATTERN_IPV4_TCP + PATTERN_TCP }, .store_shuf.u8_data = { PATTERN_DT1Q_IPV4_TCP_SHUFFLE }, + .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK }, .store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK, .mf_bits = { 0x38a0000000000000, 0x0000000000044401}, .dp_pkt_offs = { 14, UINT16_MAX, 18, 38, }, - .dp_pkt_min_size = 46, + .dp_pkt_min_size = 58, }, }; @@ -374,16 +420,31 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] = /* Protocol specific helper functions, for calculating offsets/lenghts. */ static int32_t mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh, - uint32_t len_from_ipv4) + uint32_t len_from_ipv4, uint32_t next_proto_len) { - /* Handle dynamic l2_pad_size. */ - uint16_t tot_len = ntohs(nh->ip_tot_len); - if (OVS_UNLIKELY(tot_len > len_from_ipv4 || - (len_from_ipv4 - tot_len) > UINT16_MAX)) { - return -1; - } - dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - tot_len); - return 0; + /* Handle dynamic l2_pad_size; note that avx512 has already validated + * the IP->ihl field to be 5, so 20 bytes of IP header (no options). + */ + uint16_t ip_tot_len = ntohs(nh->ip_tot_len); + + /* Error if IP total length is greater than remaining packet size. */ + bool err_ip_tot_len_too_high = ip_tot_len > len_from_ipv4; + + /* Error if IP total length is less than the size of the IP header + * itself, and the size of the next-protocol this profile matches on. + */ + bool err_ip_tot_len_too_low = + (IP_HEADER_LEN + next_proto_len) > ip_tot_len; + + /* Ensure the l2 pad size will not overflow. */ + bool err_len_u16_overflow = (len_from_ipv4 - ip_tot_len) > UINT16_MAX; + + if (OVS_UNLIKELY(err_ip_tot_len_too_high || err_ip_tot_len_too_low || + err_len_u16_overflow)) { + return -1; + } + dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len); + return 0; } /* Fixup the VLAN CFI and PCP, reading the PCP from the input to this function, @@ -433,6 +494,7 @@ mfex_avx512_process(struct dp_packet_batch *packets, __m512i v_vals = _mm512_loadu_si512(&profile->probe_data); __m512i v_mask = _mm512_loadu_si512(&profile->probe_mask); __m512i v_shuf = _mm512_loadu_si512(&profile->store_shuf); + __m512i v_strp = _mm512_loadu_si512(&profile->strip_mask); __mmask64 k_shuf = profile->store_kmsk; __m128i v_bits = _mm_loadu_si128((void *) &profile->mf_bits); @@ -450,10 +512,17 @@ mfex_avx512_process(struct dp_packet_batch *packets, /* Load packet data and probe with AVX512 mask & compare. */ const uint8_t *pkt = dp_packet_data(packet); - __m512i v_pkt0 = _mm512_loadu_si512(pkt); + __m512i v_pkt0; + if (size >= 64) { + v_pkt0 = _mm512_loadu_si512(pkt); + } else { + uint64_t load_kmask = (1ULL << size) - 1; + v_pkt0 = _mm512_maskz_loadu_epi8(load_kmask, pkt); + } + __m512i v_pkt0_masked = _mm512_and_si512(v_pkt0, v_mask); __mmask64 k_cmp = _mm512_cmpeq_epi8_mask(v_pkt0_masked, v_vals); - if (k_cmp != UINT64_MAX) { + if (OVS_UNLIKELY(k_cmp != UINT64_MAX)) { continue; } @@ -474,15 +543,20 @@ mfex_avx512_process(struct dp_packet_batch *packets, */ __m512i v512_zeros = _mm512_setzero_si512(); __m512i v_blk0; +#if __GNUC__ >= 4 if (__builtin_constant_p(use_vbmi) && use_vbmi) { +#else + if (use_vbmi) { +#endif v_blk0 = _mm512_maskz_permutexvar_epi8_wrap(k_shuf, v_shuf, v_pkt0); } else { v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0, v_shuf, v512_zeros); } - _mm512_storeu_si512(&blocks[2], v_blk0); + __m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp); + _mm512_storeu_si512(&blocks[2], v_blk0_strip); /* Perform "post-processing" per profile, handling details not easily * handled in the above generic AVX512 code. Examples include TCP flag @@ -498,7 +572,8 @@ mfex_avx512_process(struct dp_packet_batch *packets, uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN; struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN]; - if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4, + TCP_HEADER_LEN)) { continue; } @@ -512,7 +587,8 @@ mfex_avx512_process(struct dp_packet_batch *packets, uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN; struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN]; - if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4, + UDP_HEADER_LEN)) { continue; } } break; @@ -525,7 +601,8 @@ mfex_avx512_process(struct dp_packet_batch *packets, /* Handle dynamic l2_pad_size. */ uint32_t size_from_ipv4 = size - sizeof(struct eth_header); struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)]; - if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4, + TCP_HEADER_LEN)) { continue; } } break; @@ -534,7 +611,8 @@ mfex_avx512_process(struct dp_packet_batch *packets, /* Handle dynamic l2_pad_size. */ uint32_t size_from_ipv4 = size - sizeof(struct eth_header); struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)]; - if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) { + if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4, + UDP_HEADER_LEN)) { continue; } @@ -584,47 +662,5 @@ DECLARE_MFEX_FUNC(ip_udp, PROFILE_ETH_IPV4_UDP) DECLARE_MFEX_FUNC(ip_tcp, PROFILE_ETH_IPV4_TCP) DECLARE_MFEX_FUNC(dot1q_ip_udp, PROFILE_ETH_VLAN_IPV4_UDP) DECLARE_MFEX_FUNC(dot1q_ip_tcp, PROFILE_ETH_VLAN_IPV4_TCP) - - -static int32_t -avx512_isa_probe(uint32_t needs_vbmi) -{ - static const char *isa_required[] = { - "avx512f", - "avx512bw", - "bmi2", - }; - - int32_t ret = 0; - for (uint32_t i = 0; i < ARRAY_SIZE(isa_required); i++) { - if (!dpdk_get_cpu_has_isa("x86_64", isa_required[i])) { - ret = -ENOTSUP; - } - } - - if (needs_vbmi) { - if (!dpdk_get_cpu_has_isa("x86_64", "avx512vbmi")) { - ret = -ENOTSUP; - } - } - - return ret; -} - -/* Probe functions to check ISA requirements. */ -int32_t -mfex_avx512_probe(void) -{ - const uint32_t needs_vbmi = 0; - return avx512_isa_probe(needs_vbmi); -} - -int32_t -mfex_avx512_vbmi_probe(void) -{ - const uint32_t needs_vbmi = 1; - return avx512_isa_probe(needs_vbmi); -} - #endif /* __CHECKER__ */ #endif /* __x86_64__ */ diff --git a/lib/dpif-netdev-lookup-avx512-gather.c b/lib/dpif-netdev-lookup-avx512-gather.c index 072831e96a..154f9318d4 100644 --- a/lib/dpif-netdev-lookup-avx512-gather.c +++ b/lib/dpif-netdev-lookup-avx512-gather.c @@ -394,18 +394,11 @@ dpcls_avx512_gather_mf_any(struct dpcls_subtable *subtable, uint32_t keys_map, } dpcls_subtable_lookup_func -dpcls_subtable_avx512_gather_probe(uint32_t u0_bits, uint32_t u1_bits) +dpcls_subtable_avx512_gather_probe__(uint32_t u0_bits, uint32_t u1_bits, + bool use_vpop) { dpcls_subtable_lookup_func f = NULL; - int avx512f_available = dpdk_get_cpu_has_isa("x86_64", "avx512f"); - int bmi2_available = dpdk_get_cpu_has_isa("x86_64", "bmi2"); - if (!avx512f_available || !bmi2_available) { - return NULL; - } - - int use_vpop = dpdk_get_cpu_has_isa("x86_64", "avx512vpopcntdq"); - CHECK_LOOKUP_FUNCTION(9, 4, use_vpop); CHECK_LOOKUP_FUNCTION(9, 1, use_vpop); CHECK_LOOKUP_FUNCTION(5, 3, use_vpop); diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c index bd0a99abe7..5cb52386f2 100644 --- a/lib/dpif-netdev-lookup.c +++ b/lib/dpif-netdev-lookup.c @@ -22,6 +22,20 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_lookup); +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) +static dpcls_subtable_lookup_func +dpcls_subtable_avx512_gather_probe(uint32_t u0_bits, uint32_t u1_bits) +{ + if (!dpdk_get_cpu_has_isa("x86_64", "avx512f") + || !dpdk_get_cpu_has_isa("x86_64", "bmi2")) { + return NULL; + } + + return dpcls_subtable_avx512_gather_probe__(u0_bits, u1_bits, + dpdk_get_cpu_has_isa("x86_64", "avx512vpopcntdq")); +} +#endif + /* Actual list of implementations goes here */ static struct dpcls_subtable_lookup_info_t subtable_lookups[] = { /* The autovalidator implementation will not be used by default, it must diff --git a/lib/dpif-netdev-lookup.h b/lib/dpif-netdev-lookup.h index 59f51faa0e..5d2d845945 100644 --- a/lib/dpif-netdev-lookup.h +++ b/lib/dpif-netdev-lookup.h @@ -44,7 +44,8 @@ dpcls_subtable_generic_probe(uint32_t u0_bit_count, uint32_t u1_bit_count); /* Probe function for AVX-512 gather implementation */ dpcls_subtable_lookup_func -dpcls_subtable_avx512_gather_probe(uint32_t u0_bit_cnt, uint32_t u1_bit_cnt); +dpcls_subtable_avx512_gather_probe__(uint32_t u0_bit_cnt, uint32_t u1_bit_cnt, + bool use_vpop); /* Subtable registration and iteration helpers */ diff --git a/lib/dpif-netdev-private-dfc.h b/lib/dpif-netdev-private-dfc.h index 92092ebec9..3dfc91f0fe 100644 --- a/lib/dpif-netdev-private-dfc.h +++ b/lib/dpif-netdev-private-dfc.h @@ -59,7 +59,8 @@ extern "C" { * Thread-safety * ============= * - * Each pmd_thread has its own private exact match cache. + * Each pmd_thread has its own private exact match cache and signature match + * cache. * If dp_netdev_input is not called from a pmd thread, a mutex is used. */ diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h index 7c4a840cb1..0d5da73c7a 100644 --- a/lib/dpif-netdev-private-dpcls.h +++ b/lib/dpif-netdev-private-dpcls.h @@ -83,8 +83,10 @@ struct dpcls_subtable { /* The lookup function to use for this subtable. If there is a known * property of the subtable (eg: only 3 bits of miniflow metadata is * used for the lookup) then this can point at an optimized version of - * the lookup function for this particular subtable. */ - dpcls_subtable_lookup_func lookup_func; + * the lookup function for this particular subtable. The lookup function + * can be used at any time by a PMD thread, so it's declared as an atomic + * here to prevent garbage from being read. */ + ATOMIC(dpcls_subtable_lookup_func) lookup_func; /* Caches the masks to match a packet to, reducing runtime calculations. */ uint64_t *mf_masks; diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c index 84d4ec156e..ac40757281 100644 --- a/lib/dpif-netdev-private-dpif.c +++ b/lib/dpif-netdev-private-dpif.c @@ -33,6 +33,19 @@ enum dpif_netdev_impl_info_idx { DPIF_NETDEV_IMPL_AVX512 }; +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) +static int32_t +dp_netdev_input_outer_avx512_probe(void) +{ + if (!dpdk_get_cpu_has_isa("x86_64", "avx512f") + || !dpdk_get_cpu_has_isa("x86_64", "bmi2")) { + return -ENOTSUP; + } + + return 0; +} +#endif + /* Actual list of implementations goes here. */ static struct dpif_netdev_impl_info_t dpif_impls[] = { /* The default scalar C code implementation. */ diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h index 0da639c55a..3e38630f53 100644 --- a/lib/dpif-netdev-private-dpif.h +++ b/lib/dpif-netdev-private-dpif.h @@ -67,10 +67,7 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, odp_port_t in_port); -/* AVX512 enabled DPIF implementation and probe functions. */ -int32_t -dp_netdev_input_outer_avx512_probe(void); - +/* AVX512 enabled DPIF implementation function. */ int32_t dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c index 7a06dbf6fd..245d1c0cad 100644 --- a/lib/dpif-netdev-private-extract.c +++ b/lib/dpif-netdev-private-extract.c @@ -33,6 +33,43 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract); /* Variable to hold the default MFEX implementation. */ static ATOMIC(miniflow_extract_func) default_mfex_func; +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) +static int32_t +avx512_isa_probe(bool needs_vbmi) +{ + static const char *isa_required[] = { + "avx512f", + "avx512bw", + "bmi2", + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(isa_required); i++) { + if (!dpdk_get_cpu_has_isa("x86_64", isa_required[i])) { + return -ENOTSUP; + } + } + + if (needs_vbmi && !dpdk_get_cpu_has_isa("x86_64", "avx512vbmi")) { + return -ENOTSUP; + } + + return 0; +} + +/* Probe functions to check ISA requirements. */ +static int32_t +mfex_avx512_probe(void) +{ + return avx512_isa_probe(false); +} + +static int32_t +mfex_avx512_vbmi_probe(void) +{ + return avx512_isa_probe(true); +} +#endif + /* Implementations of available extract options and * the implementations are always in order of preference. */ diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h index f9a757ba41..3e06148c5a 100644 --- a/lib/dpif-netdev-private-extract.h +++ b/lib/dpif-netdev-private-extract.h @@ -176,10 +176,8 @@ mfex_study_traffic(struct dp_packet_batch *packets, int mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char *name); -/* AVX512 MFEX Probe and Implementations functions. */ +/* AVX512 MFEX Implementation functions. */ #ifdef __x86_64__ -int32_t mfex_avx512_probe(void); -int32_t mfex_avx512_vbmi_probe(void); #define DECLARE_AVX512_MFEX_PROTOTYPE(name) \ uint32_t \ diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h index 3030660675..32ad020d90 100644 --- a/lib/dpif-netdev-private-flow.h +++ b/lib/dpif-netdev-private-flow.h @@ -101,6 +101,7 @@ struct dp_netdev_flow { bool dead; uint32_t mark; /* Unique flow mark assigned to a flow */ + odp_port_t orig_in_port; /* Statistics. */ struct dp_netdev_flow_stats stats; diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h index a782d9678a..ac4885538c 100644 --- a/lib/dpif-netdev-private-thread.h +++ b/lib/dpif-netdev-private-thread.h @@ -78,10 +78,10 @@ struct dp_netdev_pmd_thread { struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */ struct cmap_node node; /* In 'dp->poll_threads'. */ - /* Per thread exact-match cache. Note, the instance for cpu core - * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly - * need to be protected by 'non_pmd_mutex'. Every other instance - * will only be accessed by its own pmd thread. */ + /* Per thread exact match cache and signature match cache. Note, the + * instance for cpu core NON_PMD_CORE_ID can be accessed by multiple + * threads, and thusly need to be protected by 'non_pmd_mutex'. Every + * other instance will only be accessed by its own pmd thread. */ OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct dfc_cache flow_cache; /* Flow-Table and classifiers diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index bddce75b63..5f90dd4ceb 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -984,7 +984,9 @@ dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc OVS_UNUSED, if (!cls) { continue; } + ovs_mutex_lock(&pmd->flow_mutex); uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls); + ovs_mutex_unlock(&pmd->flow_mutex); if (subtbl_changes) { lookup_dpcls_changed++; lookup_subtable_changed += subtbl_changes; @@ -2221,13 +2223,24 @@ static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) OVS_REQUIRES(dp->port_mutex) { - netdev_flow_flush(port->netdev); - netdev_uninit_flow_api(port->netdev); hmap_remove(&dp->ports, &port->node); seq_change(dp->port_seq); reconfigure_datapath(dp); + /* Flush and disable offloads only after 'port' has been made + * inaccessible through datapath reconfiguration. + * This prevents having PMDs enqueuing offload requests after + * the flush. However, the flush doesn't provide any synchronization + * with the offload thread, so some requests could still be in the + * queue. + * When only this port is deleted instead of the whole datapath, + * revalidator threads are still active and can still enqueue + * offload modification or deletion. Managing those stray requests + * is done in the offload threads. */ + netdev_flow_flush(port->netdev); + netdev_uninit_flow_api(port->netdev); + port_destroy(port); } @@ -2711,6 +2724,10 @@ queue_netdev_flow_del(struct dp_netdev_pmd_thread *pmd, ovsthread_once_done(&offload_thread_once); } + if (!netdev_is_flow_api_enabled()) { + return; + } + offload = dp_netdev_alloc_flow_offload(pmd, flow, DP_NETDEV_FLOW_OFFLOAD_OP_DEL); dp_netdev_append_flow_offload(offload); @@ -2720,7 +2737,7 @@ static void queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow, struct match *match, const struct nlattr *actions, size_t actions_len, - odp_port_t orig_in_port, int op) + int op) { struct dp_flow_offload_item *offload; @@ -2740,7 +2757,7 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, offload->actions = xmalloc(actions_len); memcpy(offload->actions, actions, actions_len); offload->actions_len = actions_len; - offload->orig_in_port = orig_in_port; + offload->orig_in_port = flow->orig_in_port; dp_netdev_append_flow_offload(offload); } @@ -2758,9 +2775,7 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd, ovs_assert(cls != NULL); dpcls_remove(cls, &flow->cr); cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid)); - if (flow->mark != INVALID_FLOW_MARK) { - queue_netdev_flow_del(pmd, flow); - } + queue_netdev_flow_del(pmd, flow); flow->dead = true; dp_netdev_flow_unref(flow); @@ -3555,6 +3570,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, flow->dead = false; flow->batch = NULL; flow->mark = INVALID_FLOW_MARK; + flow->orig_in_port = orig_in_port; *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id; *CONST_CAST(struct flow *, &flow->flow) = match->flow; *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid; @@ -3584,7 +3600,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, dp_netdev_flow_hash(&flow->ufid)); queue_netdev_flow_put(pmd, flow, match, actions, actions_len, - orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD); + DP_NETDEV_FLOW_OFFLOAD_OP_ADD); if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl)))) { struct ds ds = DS_EMPTY_INITIALIZER; @@ -3671,7 +3687,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, ovsrcu_set(&netdev_flow->actions, new_actions); queue_netdev_flow_put(pmd, netdev_flow, match, - put->actions, put->actions_len, ODPP_NONE, + put->actions, put->actions_len, DP_NETDEV_FLOW_OFFLOAD_OP_MOD); if (stats) { @@ -4061,7 +4077,10 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) flow_hash_5tuple(execute->flow, 0)); } - dp_packet_batch_init_packet(&pp, execute->packet); + /* Making a copy because the packet might be stolen during the execution + * and caller might still need it. */ + struct dp_packet *packet_clone = dp_packet_clone(execute->packet); + dp_packet_batch_init_packet(&pp, packet_clone); dp_netdev_execute_actions(pmd, &pp, false, execute->flow, execute->actions, execute->actions_len); dp_netdev_pmd_flush_output_packets(pmd, true); @@ -4071,6 +4090,24 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) dp_netdev_pmd_unref(pmd); } + if (dp_packet_batch_size(&pp) == 1) { + /* Packet wasn't dropped during the execution. Swapping content with + * the original packet, because the caller might expect actions to + * modify it. Uisng the packet from a batch instead of 'packet_clone' + * because it maybe stolen and replaced by other packet, e.g. by + * the fragmentation engine. */ + dp_packet_swap(execute->packet, pp.packets[0]); + dp_packet_delete_batch(&pp, true); + } else if (dp_packet_batch_size(&pp)) { + /* FIXME: We have more packets than expected. Likely, we got IP + * fragments of the reassembled packet. Dropping them here as we have + * no way to get them to the caller. It might be that all the required + * actions with them are already executed, but it also might not be a + * case, e.g. if dpif_netdev_execute() called to execute a single + * tunnel push. */ + dp_packet_delete_batch(&pp, true); + } + return 0; } @@ -5033,23 +5070,28 @@ sched_numa_list_put_in_place(struct sched_numa_list *numa_list) } } +/* Returns 'true' if OVS rxq scheduling algorithm assigned any unpinned rxq to + * a PMD thread core on a non-local numa node. */ static bool sched_numa_list_cross_numa_polling(struct sched_numa_list *numa_list) { struct sched_numa *numa; - /* For each numa */ HMAP_FOR_EACH (numa, node, &numa_list->numas) { - /* For each pmd */ for (int i = 0; i < numa->n_pmds; i++) { struct sched_pmd *sched_pmd; sched_pmd = &numa->pmds[i]; - /* For each rxq. */ + if (sched_pmd->isolated) { + /* All rxqs on this PMD thread core are pinned. */ + continue; + } for (unsigned k = 0; k < sched_pmd->n_rxq; k++) { struct dp_netdev_rxq *rxq = sched_pmd->rxqs[k]; - - if (!sched_pmd->isolated && + /* Check if the rxq is not pinned to a specific PMD thread core + * by the user AND the PMD thread core that OVS assigned is + * non-local to the rxq port. */ + if (rxq->core_id == OVS_CORE_UNSPEC && rxq->pmd->numa_id != netdev_get_numa_id(rxq->port->netdev)) { return true; @@ -5349,10 +5391,10 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list, /* Find any numa with available PMDs. */ for (int j = 0; j < n_numa; j++) { numa = sched_numa_list_next(numa_list, last_cross_numa); + last_cross_numa = numa; if (sched_numa_noniso_pmd_count(numa)) { break; } - last_cross_numa = numa; numa = NULL; } } @@ -6616,15 +6658,15 @@ static struct dp_netdev_pmd_thread * dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id) { struct dp_netdev_pmd_thread *pmd; - const struct cmap_node *pnode; - pnode = cmap_find(&dp->poll_threads, hash_int(core_id, 0)); - if (!pnode) { - return NULL; + CMAP_FOR_EACH_WITH_HASH (pmd, node, hash_int(core_id, 0), + &dp->poll_threads) { + if (pmd->core_id == core_id) { + return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL; + } } - pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node); - return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL; + return NULL; } /* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */ @@ -6751,6 +6793,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) seq_destroy(pmd->reload_seq); ovs_mutex_destroy(&pmd->port_mutex); ovs_mutex_destroy(&pmd->bond_mutex); + free(pmd->netdev_input_func_userdata); free(pmd); } @@ -8942,9 +8985,12 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask) /* Get the preferred subtable search function for this (u0,u1) subtable. * The function is guaranteed to always return a valid implementation, and - * possibly an ISA optimized, and/or specialized implementation. + * possibly an ISA optimized, and/or specialized implementation. Initialize + * the subtable search function atomically to avoid garbage data being read + * by the PMD thread. */ - subtable->lookup_func = dpcls_subtable_get_best_impl(unit0, unit1); + atomic_init(&subtable->lookup_func, + dpcls_subtable_get_best_impl(unit0, unit1)); cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash); /* Add the new subtable at the end of the pvector (with no hits yet) */ @@ -8973,6 +9019,10 @@ dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask) /* Checks for the best available implementation for each subtable lookup * function, and assigns it as the lookup function pointer for each subtable. * Returns the number of subtables that have changed lookup implementation. + * This function requires holding a flow_mutex when called. This is to make + * sure modifications done by this function are not overwritten. This could + * happen if dpcls_sort_subtable_vector() is called at the same time as this + * function. */ static uint32_t dpcls_subtable_lookup_reprobe(struct dpcls *cls) @@ -8985,10 +9035,13 @@ dpcls_subtable_lookup_reprobe(struct dpcls *cls) uint32_t u0_bits = subtable->mf_bits_set_unit0; uint32_t u1_bits = subtable->mf_bits_set_unit1; void *old_func = subtable->lookup_func; - subtable->lookup_func = dpcls_subtable_get_best_impl(u0_bits, u1_bits); + + /* Set the subtable lookup function atomically to avoid garbage data + * being read by the PMD thread. */ + atomic_store_relaxed(&subtable->lookup_func, + dpcls_subtable_get_best_impl(u0_bits, u1_bits)); subtables_changed += (old_func != subtable->lookup_func); } - pvector_publish(pvec); return subtables_changed; } diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 34fc042373..5f4b60c5a6 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -84,6 +84,8 @@ enum { MAX_PORTS = USHRT_MAX }; #define EPOLLEXCLUSIVE (1u << 28) #endif +#define OVS_DP_F_UNSUPPORTED (1 << 31); + /* This PID is not used by the kernel datapath when using dispatch per CPU, * but it is required to be set (not zero). */ #define DPIF_NETLINK_PER_CPU_PID UINT32_MAX @@ -382,36 +384,62 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name, dp_request.cmd = OVS_DP_CMD_SET; } - /* The Open vSwitch kernel module has two modes for dispatching upcalls: - * per-vport and per-cpu. - * - * When dispatching upcalls per-vport, the kernel will - * send the upcall via a Netlink socket that has been selected based on the - * vport that received the packet that is causing the upcall. - * - * When dispatching upcall per-cpu, the kernel will send the upcall via - * a Netlink socket that has been selected based on the cpu that received - * the packet that is causing the upcall. - * - * First we test to see if the kernel module supports per-cpu dispatching - * (the preferred method). If it does not support per-cpu dispatching, we - * fall back to the per-vport dispatch mode. + /* Some older kernels will not reject unknown features. This will cause + * 'ovs-vswitchd' to incorrectly assume a feature is supported. In order to + * test for that, we attempt to set a feature that we know is not supported + * by any kernel. If this feature is not rejected, we can assume we are + * running on one of these older kernels. */ dp_request.user_features |= OVS_DP_F_UNALIGNED; - dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS; - dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU; + dp_request.user_features |= OVS_DP_F_VPORT_PIDS; + dp_request.user_features |= OVS_DP_F_UNSUPPORTED; error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); if (error) { - dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU; + /* The Open vSwitch kernel module has two modes for dispatching + * upcalls: per-vport and per-cpu. + * + * When dispatching upcalls per-vport, the kernel will + * send the upcall via a Netlink socket that has been selected based on + * the vport that received the packet that is causing the upcall. + * + * When dispatching upcall per-cpu, the kernel will send the upcall via + * a Netlink socket that has been selected based on the cpu that + * received the packet that is causing the upcall. + * + * First we test to see if the kernel module supports per-cpu + * dispatching (the preferred method). If it does not support per-cpu + * dispatching, we fall back to the per-vport dispatch mode. + */ + dp_request.user_features &= ~OVS_DP_F_UNSUPPORTED; + dp_request.user_features |= OVS_DP_F_UNALIGNED; + dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS; + dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU; + error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); + if (error) { + dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU; + dp_request.user_features |= OVS_DP_F_VPORT_PIDS; + error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); + } + if (error) { + return error; + } + + error = open_dpif(&dp, dpifp); + dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING); + } else { + VLOG_INFO("Kernel does not correctly support feature negotiation. " + "Using standard features."); + dp_request.cmd = OVS_DP_CMD_SET; + dp_request.user_features = 0; + dp_request.user_features |= OVS_DP_F_UNALIGNED; dp_request.user_features |= OVS_DP_F_VPORT_PIDS; error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); - } - if (error) { - return error; + if (error) { + return error; + } + error = open_dpif(&dp, dpifp); } - error = open_dpif(&dp, dpifp); - dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING); ofpbuf_delete(buf); if (create) { diff --git a/lib/flow.c b/lib/flow.c index 89837de95d..a021bc0eba 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1006,14 +1006,18 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst) if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { const struct tcp_header *tcp = data; - - miniflow_push_be32(mf, arp_tha.ea[2], 0); - miniflow_push_be32(mf, tcp_flags, - TCP_FLAGS_BE32(tcp->tcp_ctl)); - miniflow_push_be16(mf, tp_src, tcp->tcp_src); - miniflow_push_be16(mf, tp_dst, tcp->tcp_dst); - miniflow_push_be16(mf, ct_tp_src, ct_tp_src); - miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); + size_t tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4; + + if (OVS_LIKELY(tcp_hdr_len >= TCP_HEADER_LEN) + && OVS_LIKELY(size >= tcp_hdr_len)) { + miniflow_push_be32(mf, arp_tha.ea[2], 0); + miniflow_push_be32(mf, tcp_flags, + TCP_FLAGS_BE32(tcp->tcp_ctl)); + miniflow_push_be16(mf, tp_src, tcp->tcp_src); + miniflow_push_be16(mf, tp_dst, tcp->tcp_dst); + miniflow_push_be16(mf, ct_tp_src, ct_tp_src); + miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst); + } } } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) { if (OVS_LIKELY(size >= UDP_HEADER_LEN)) { diff --git a/lib/hindex.h b/lib/hindex.h index 876c5a9e39..f7a30d511a 100644 --- a/lib/hindex.h +++ b/lib/hindex.h @@ -128,18 +128,22 @@ void hindex_remove(struct hindex *, struct hindex_node *); * Evaluates HASH only once. */ #define HINDEX_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HINDEX) \ - for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ - NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ - ASSIGN_CONTAINER(NODE, (NODE)->MEMBER.s, MEMBER)) + for (INIT_MULTIVAR(NODE, MEMBER, hindex_node_with_hash(HINDEX, HASH), \ + struct hindex_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, ITER_VAR(NODE)->s)) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash map but its members remain accessible and intact). */ -#define HINDEX_FOR_EACH_WITH_HASH_SAFE(NODE, NEXT, MEMBER, HASH, HINDEX) \ - for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ - ? INIT_CONTAINER(NEXT, (NODE)->MEMBER.s, MEMBER), 1 \ - : 0); \ - (NODE) = (NEXT)) +#define HINDEX_FOR_EACH_WITH_HASH_SAFE(NODE, NEXT, MEMBER, HASH, HINDEX) \ + for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ + hindex_node_with_hash(HINDEX, HASH), \ + struct hindex_node); \ + CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_VAR(NEXT) = ITER_VAR(NODE)->s, \ + ITER_VAR(NEXT) != NULL); \ + UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) /* Returns the head node in 'hindex' with the given 'hash', or a null pointer * if no nodes have that hash value. */ @@ -157,19 +161,22 @@ hindex_node_with_hash(const struct hindex *hindex, size_t hash) /* Iteration. */ /* Iterates through every node in HINDEX. */ -#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ - for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ - NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ - ASSIGN_CONTAINER(NODE, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER)) +#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ + for (INIT_MULTIVAR(NODE, MEMBER, hindex_first(HINDEX), \ + struct hindex_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hindex_next(HINDEX, ITER_VAR(NODE)))) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash index but its members remain accessible and intact). */ -#define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \ - for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ - ? INIT_CONTAINER(NEXT, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER), 1 \ - : 0); \ - (NODE) = (NEXT)) +#define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \ + for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, hindex_first(HINDEX), \ + struct hindex_node); \ + CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_VAR(NEXT) = hindex_next(HINDEX, ITER_VAR(NODE)), \ + ITER_VAR(NEXT) != NULL); \ + UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) struct hindex_node *hindex_first(const struct hindex *); struct hindex_node *hindex_next(const struct hindex *, diff --git a/lib/ipf.c b/lib/ipf.c index d9f781147a..507db2aea2 100644 --- a/lib/ipf.c +++ b/lib/ipf.c @@ -943,6 +943,8 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, ovs_mutex_lock(&ipf->ipf_lock); if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) { dp_packet_batch_refill(pb, pkt, pb_idx); + } else { + dp_packet_delete(pkt); } ovs_mutex_unlock(&ipf->ipf_lock); } else { @@ -1152,52 +1154,56 @@ ipf_post_execute_reass_pkts(struct ipf *ipf, * NETDEV_MAX_BURST. */ DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) { if (rp && pkt == rp->list->reass_execute_ctx) { + const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; + void *l4_frag = dp_packet_l4(frag_0->pkt); + void *l4_reass = dp_packet_l4(pkt); + memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); + for (int i = 0; i <= rp->list->last_inuse_idx; i++) { - rp->list->frag_list[i].pkt->md.ct_label = pkt->md.ct_label; - rp->list->frag_list[i].pkt->md.ct_mark = pkt->md.ct_mark; - rp->list->frag_list[i].pkt->md.ct_state = pkt->md.ct_state; - rp->list->frag_list[i].pkt->md.ct_zone = pkt->md.ct_zone; - rp->list->frag_list[i].pkt->md.ct_orig_tuple_ipv6 = + const struct ipf_frag *frag_i = &rp->list->frag_list[i]; + + frag_i->pkt->md.ct_label = pkt->md.ct_label; + frag_i->pkt->md.ct_mark = pkt->md.ct_mark; + frag_i->pkt->md.ct_state = pkt->md.ct_state; + frag_i->pkt->md.ct_zone = pkt->md.ct_zone; + frag_i->pkt->md.ct_orig_tuple_ipv6 = pkt->md.ct_orig_tuple_ipv6; if (pkt->md.ct_orig_tuple_ipv6) { - rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv6 = + frag_i->pkt->md.ct_orig_tuple.ipv6 = pkt->md.ct_orig_tuple.ipv6; } else { - rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv4 = + frag_i->pkt->md.ct_orig_tuple.ipv4 = pkt->md.ct_orig_tuple.ipv4; } - } - - const struct ipf_frag *frag_0 = &rp->list->frag_list[0]; - void *l4_frag = dp_packet_l4(frag_0->pkt); - void *l4_reass = dp_packet_l4(pkt); - memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt)); - - if (v6) { - struct ovs_16aligned_ip6_hdr *l3_frag - = dp_packet_l3(frag_0->pkt); - struct ovs_16aligned_ip6_hdr *l3_reass = dp_packet_l3(pkt); - l3_frag->ip6_src = l3_reass->ip6_src; - l3_frag->ip6_dst = l3_reass->ip6_dst; - } else { - struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt); - struct ip_header *l3_reass = dp_packet_l3(pkt); - if (!dp_packet_hwol_is_ipv4(frag_0->pkt)) { - ovs_be32 reass_ip = - get_16aligned_be32(&l3_reass->ip_src); - ovs_be32 frag_ip = - get_16aligned_be32(&l3_frag->ip_src); - - l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, - frag_ip, reass_ip); - reass_ip = get_16aligned_be32(&l3_reass->ip_dst); - frag_ip = get_16aligned_be32(&l3_frag->ip_dst); - l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, - frag_ip, reass_ip); + if (v6) { + struct ovs_16aligned_ip6_hdr *l3_frag + = dp_packet_l3(frag_i->pkt); + struct ovs_16aligned_ip6_hdr *l3_reass + = dp_packet_l3(pkt); + l3_frag->ip6_src = l3_reass->ip6_src; + l3_frag->ip6_dst = l3_reass->ip6_dst; + } else { + struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt); + struct ip_header *l3_reass = dp_packet_l3(pkt); + if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) { + ovs_be32 reass_ip = + get_16aligned_be32(&l3_reass->ip_src); + ovs_be32 frag_ip = + get_16aligned_be32(&l3_frag->ip_src); + + l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, + frag_ip, + reass_ip); + reass_ip = get_16aligned_be32(&l3_reass->ip_dst); + frag_ip = get_16aligned_be32(&l3_frag->ip_dst); + l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum, + frag_ip, + reass_ip); + } + + l3_frag->ip_src = l3_reass->ip_src; + l3_frag->ip_dst = l3_reass->ip_dst; } - - l3_frag->ip_src = l3_reass->ip_src; - l3_frag->ip_dst = l3_reass->ip_dst; } ipf_completed_list_add(&ipf->frag_complete_list, rp->list); diff --git a/lib/json.c b/lib/json.c index 32d25003b8..0baf7c622c 100644 --- a/lib/json.c +++ b/lib/json.c @@ -146,6 +146,7 @@ json_type_to_string(enum json_type type) case JSON_STRING: return "string"; + case JSON_SERIALIZED_OBJECT: case JSON_N_TYPES: default: return ""; @@ -180,6 +181,14 @@ json_string_create(const char *s) return json_string_create_nocopy(xstrdup(s)); } +struct json * +json_serialized_object_create(const struct json *src) +{ + struct json *json = json_create(JSON_SERIALIZED_OBJECT); + json->string = json_to_string(src, JSSF_SORT); + return json; +} + struct json * json_array_create_empty(void) { @@ -309,6 +318,13 @@ json_string(const struct json *json) return json->string; } +const char * +json_serialized_object(const struct json *json) +{ + ovs_assert(json->type == JSON_SERIALIZED_OBJECT); + return json->string; +} + struct json_array * json_array(const struct json *json) { @@ -362,6 +378,7 @@ json_destroy(struct json *json) break; case JSON_STRING: + case JSON_SERIALIZED_OBJECT: free(json->string); break; @@ -422,6 +439,9 @@ json_deep_clone(const struct json *json) case JSON_STRING: return json_string_create(json->string); + case JSON_SERIALIZED_OBJECT: + return json_serialized_object_create(json); + case JSON_NULL: case JSON_FALSE: case JSON_TRUE: @@ -521,6 +541,7 @@ json_hash(const struct json *json, size_t basis) return json_hash_array(&json->array, basis); case JSON_STRING: + case JSON_SERIALIZED_OBJECT: return hash_string(json->string, basis); case JSON_NULL: @@ -596,6 +617,7 @@ json_equal(const struct json *a, const struct json *b) return json_equal_array(&a->array, &b->array); case JSON_STRING: + case JSON_SERIALIZED_OBJECT: return !strcmp(a->string, b->string); case JSON_NULL: @@ -1072,6 +1094,14 @@ json_from_string(const char *string) return json_parser_finish(p); } +/* Parses data of JSON_SERIALIZED_OBJECT to the real JSON. */ +struct json * +json_from_serialized_object(const struct json *json) +{ + ovs_assert(json->type == JSON_SERIALIZED_OBJECT); + return json_from_string(json->string); +} + /* Reads the file named 'file_name', parses its contents as a JSON object or * array, and returns a newly allocated 'struct json'. The caller must free * the returned structure with json_destroy() when it is no longer needed. @@ -1563,6 +1593,10 @@ json_serialize(const struct json *json, struct json_serializer *s) json_serialize_string(json->string, ds); break; + case JSON_SERIALIZED_OBJECT: + ds_put_cstr(ds, json->string); + break; + case JSON_N_TYPES: default: OVS_NOT_REACHED(); @@ -1696,14 +1730,30 @@ json_serialize_string(const char *string, struct ds *ds) { uint8_t c; uint8_t c2; + size_t count; const char *escape; + const char *start; ds_put_char(ds, '"'); + count = 0; + start = string; while ((c = *string++) != '\0') { - escape = chars_escaping[c]; - while ((c2 = *escape++) != '\0') { - ds_put_char(ds, c2); + if (c >= ' ' && c != '"' && c != '\\') { + count++; + } else { + if (count) { + ds_put_buffer(ds, start, count); + count = 0; + } + start = string; + escape = chars_escaping[c]; + while ((c2 = *escape++) != '\0') { + ds_put_char(ds, c2); + } } } + if (count) { + ds_put_buffer(ds, start, count); + } ds_put_char(ds, '"'); } diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c index 18afbab9a7..dfeb2a8002 100644 --- a/lib/lldp/lldp.c +++ b/lib/lldp/lldp.c @@ -146,7 +146,9 @@ static void lldp_tlv_end(struct dp_packet *p, unsigned int start) { ovs_be16 *tlv = dp_packet_at_assert(p, start, 2); - *tlv |= htons((dp_packet_size(p) - (start + 2)) & 0x1ff); + put_unaligned_be16(tlv, + get_unaligned_be16(tlv) + | htons((dp_packet_size(p) - (start + 2)) & 0x1ff)); } int diff --git a/lib/lldp/lldpd.c b/lib/lldp/lldpd.c index a024dc5e58..ee1051dde7 100644 --- a/lib/lldp/lldpd.c +++ b/lib/lldp/lldpd.c @@ -140,13 +140,9 @@ lldpd_cleanup(struct lldpd *cfg) VLOG_DBG("cleanup all ports"); LIST_FOR_EACH_SAFE (hw, hw_next, h_entries, &cfg->g_hardware) { - if (!hw->h_flags) { - ovs_list_remove(&hw->h_entries); - lldpd_remote_cleanup(hw, NULL, true); - lldpd_hardware_cleanup(cfg, hw); - } else { - lldpd_remote_cleanup(hw, NULL, false); - } + ovs_list_remove(&hw->h_entries); + lldpd_remote_cleanup(hw, NULL, true); + lldpd_hardware_cleanup(cfg, hw); } VLOG_DBG("cleanup all chassis"); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index c808d205d5..e03cd8d0c5 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -1788,6 +1788,19 @@ mf_is_tun_metadata(const struct mf_field *mf) mf->id < MFF_TUN_METADATA0 + TUN_METADATA_NUM_OPTS; } +bool +mf_is_frozen_metadata(const struct mf_field *mf) +{ + if (mf->id >= MFF_TUN_ID && mf->id <= MFF_IN_PORT_OXM) { + return true; + } + + if (mf->id >= MFF_REG0 && mf->id < MFF_ETH_SRC) { + return true; + } + return false; +} + bool mf_is_pipeline_field(const struct mf_field *mf) { diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 45a96b9be2..738fb44b3c 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -961,14 +961,6 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) rte_eth_dev_info_get(dev->port_id, &info); - /* As of DPDK 19.11, it is not allowed to set a mq_mode for - * virtio PMD driver. */ - if (!strcmp(info.driver_name, "net_virtio")) { - conf.rxmode.mq_mode = ETH_MQ_RX_NONE; - } else { - conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - } - /* As of DPDK 17.11.1 a few PMDs require to explicitly enable * scatter to support jumbo RX. * Setting scatter for the device is done after checking for @@ -1000,6 +992,11 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) /* Limit configured rss hash functions to only those supported * by the eth device. */ conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads; + if (conf.rx_adv_conf.rss_conf.rss_hf == 0) { + conf.rxmode.mq_mode = ETH_MQ_RX_NONE; + } else { + conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + } /* A device may report more queues than it makes available (this has * been observed for Intel xl710, which reserves some of them for @@ -2867,6 +2864,9 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, bool concurrent_txq) { if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) { + rte_spinlock_lock(&dev->stats_lock); + dev->stats.tx_dropped += dp_packet_batch_size(batch); + rte_spinlock_unlock(&dev->stats_lock); dp_packet_delete_batch(batch, true); return; } diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 60dd138914..94c9737110 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -627,6 +627,7 @@ netdev_linux_notify_sock(void) if (!error) { size_t i; + nl_sock_listen_all_nsid(sock, true); for (i = 0; i < ARRAY_SIZE(mcgroups); i++) { error = nl_sock_join_mcgroup(sock, mcgroups[i]); if (error) { @@ -636,7 +637,6 @@ netdev_linux_notify_sock(void) } } } - nl_sock_listen_all_nsid(sock, true); ovsthread_once_done(&once); } @@ -6285,7 +6285,14 @@ get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats) if (ofpbuf_try_pull(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg))) { const struct nlattr *a = nl_attr_find(reply, 0, IFLA_STATS64); if (a && nl_attr_get_size(a) >= sizeof(struct rtnl_link_stats64)) { - netdev_stats_from_rtnl_link_stats64(stats, nl_attr_get(a)); + const struct rtnl_link_stats64 *lstats = nl_attr_get(a); + struct rtnl_link_stats64 aligned_lstats; + + if (!IS_PTR_ALIGNED(lstats)) { + memcpy(&aligned_lstats, lstats, sizeof aligned_lstats); + lstats = &aligned_lstats; + } + netdev_stats_from_rtnl_link_stats64(stats, lstats); error = 0; } else { a = nl_attr_find(reply, 0, IFLA_STATS); diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 9845e8d3fe..e1568e38a0 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -481,10 +481,10 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump) static void parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf, - struct tc_flower *flower) + struct tc_action *action) { - char *mask = (char *) &flower->rewrite.mask; - char *data = (char *) &flower->rewrite.key; + char *mask = (char *) &action->rewrite.mask; + char *data = (char *) &action->rewrite.key; for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) { char *put = NULL; @@ -585,8 +585,10 @@ parse_tc_flower_to_stats(struct tc_flower *flower, } memset(stats, 0, sizeof *stats); - stats->n_packets = get_32aligned_u64(&flower->stats.n_packets); - stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes); + stats->n_packets = get_32aligned_u64(&flower->stats_sw.n_packets); + stats->n_packets += get_32aligned_u64(&flower->stats_hw.n_packets); + stats->n_bytes = get_32aligned_u64(&flower->stats_sw.n_bytes); + stats->n_bytes += get_32aligned_u64(&flower->stats_hw.n_bytes); stats->used = flower->lastused; } @@ -877,7 +879,7 @@ parse_tc_flower_to_match(struct tc_flower *flower, } break; case TC_ACT_PEDIT: { - parse_flower_rewrite_to_netlink_action(buf, flower); + parse_flower_rewrite_to_netlink_action(buf, action); } break; case TC_ACT_ENCAP: { @@ -1222,8 +1224,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, uint64_t set_stub[1024 / 8]; struct ofpbuf set_buf = OFPBUF_STUB_INITIALIZER(set_stub); char *set_data, *set_mask; - char *key = (char *) &flower->rewrite.key; - char *mask = (char *) &flower->rewrite.mask; + char *key = (char *) &action->rewrite.key; + char *mask = (char *) &action->rewrite.mask; const struct nlattr *attr; int i, j, type; size_t size; @@ -1265,14 +1267,6 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, } } - if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) { - if (flower->rewrite.rewrite == false) { - flower->rewrite.rewrite = true; - action->type = TC_ACT_PEDIT; - flower->action_count++; - } - } - if (hasmask && !is_all_zeros(set_mask, size)) { VLOG_DBG_RL(&rl, "unsupported sub attribute of set action type %d", type); @@ -1281,6 +1275,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, } ofpbuf_uninit(&set_buf); + action->type = TC_ACT_PEDIT; + flower->action_count++; return 0; } @@ -1541,6 +1537,12 @@ parse_match_ct_state_to_flower(struct tc_flower *flower, struct match *match) flower->key.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); flower->mask.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); } + + if (flower->key.ct_state && + !(flower->key.ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { + flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + } } if (mask->ct_zone) { @@ -1841,7 +1843,25 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port); return ENODEV; } + + if (!netdev_flow_api_equals(netdev, outdev)) { + VLOG_DBG_RL(&rl, + "Flow API provider mismatch between ingress (%s) " + "and egress (%s) ports", + netdev_get_name(netdev), netdev_get_name(outdev)); + netdev_close(outdev); + return EOPNOTSUPP; + } + action->out.ifindex_out = netdev_get_ifindex(outdev); + if (action->out.ifindex_out < 0) { + VLOG_DBG_RL(&rl, + "Can't find ifindex for output port %s, error %d", + netdev_get_name(outdev), action->out.ifindex_out); + netdev_close(outdev); + return -action->out.ifindex_out; + } + action->out.ingress = is_internal_port(netdev_get_type(outdev)); action->type = TC_ACT_OUTPUT; flower.action_count++; @@ -2015,9 +2035,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, if (stats) { memset(stats, 0, sizeof *stats); if (!tc_get_flower(&id, &flower)) { - stats->n_packets = get_32aligned_u64(&flower.stats.n_packets); - stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes); - stats->used = flower.lastused; + parse_tc_flower_to_stats(&flower, stats); } } diff --git a/lib/odp-util.c b/lib/odp-util.c index 7729a90608..ce3b853e0f 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2941,7 +2941,7 @@ odp_nsh_key_from_attr__(const struct nlattr *attr, bool is_mask, const struct ovs_nsh_key_md1 *md1 = nl_attr_get(a); has_md1 = true; memcpy(nsh->context, md1->context, sizeof md1->context); - if (len == 2 * sizeof(*md1)) { + if (nsh_mask && (len == 2 * sizeof *md1)) { const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; memcpy(nsh_mask->context, md1_mask->context, sizeof(*md1_mask)); @@ -3212,7 +3212,7 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, opts.flags = tun_key->gtpu_flags; opts.msgtype = tun_key->gtpu_msgtype; - nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, &opts, sizeof(opts)); } nl_msg_end_nested(a, tun_key_ofs); @@ -3400,16 +3400,16 @@ format_eth(struct ds *ds, const char *name, const struct eth_addr key, static void format_be64(struct ds *ds, const char *name, ovs_be64 key, - const ovs_be64 *mask, bool verbose) + const ovs_32aligned_be64 *mask_, bool verbose) { - bool mask_empty = mask && !*mask; + ovs_be64 mask = mask_ ? get_32aligned_be64(mask_) : htonll(0); - if (verbose || !mask_empty) { - bool mask_full = !mask || *mask == OVS_BE64_MAX; + if (verbose || mask) { + bool mask_full = !mask_ || mask == OVS_BE64_MAX; ds_put_format(ds, "%s=0x%"PRIx64, name, ntohll(key)); if (!mask_full) { /* Partially masked. */ - ds_put_format(ds, "/%#"PRIx64, ntohll(*mask)); + ds_put_format(ds, "/%#"PRIx64, ntohll(mask)); } ds_put_char(ds, ','); } @@ -4601,6 +4601,11 @@ odp_flow_format(const struct nlattr *key, size_t key_len, ds_put_char(ds, ','); } ds_put_cstr(ds, "eth()"); + } else if (attr_type == OVS_KEY_ATTR_PACKET_TYPE && is_wildcard) { + /* See the above help text, however in the case where the + * packet type is not shown, we still need to display the + * eth() header if the packets type is wildcarded. */ + has_packet_type_key = false; } ofpbuf_clear(&ofp); } @@ -4618,7 +4623,7 @@ odp_flow_format(const struct nlattr *key, size_t key_len, } ds_put_char(ds, ')'); } - if (!has_ethtype_key) { + if (!has_ethtype_key && mask) { const struct nlattr *ma = nl_attr_find__(mask, mask_len, OVS_KEY_ATTR_ETHERTYPE); if (ma) { @@ -7132,11 +7137,6 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], } } } - } else if (src_flow->nw_proto == IPPROTO_IGMP - && src_flow->dl_type == htons(ETH_TYPE_IP)) { - /* OVS userspace parses the IGMP type, code, and group, but its - * datapaths do not, so there is always missing information. */ - return ODP_FIT_TOO_LITTLE; } if (is_mask && expected_bit != OVS_KEY_ATTR_UNSPEC) { if ((flow->tp_src || flow->tp_dst) && flow->nw_proto != 0xff) { @@ -7230,6 +7230,14 @@ parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], } expected_attrs = 0; + /* For OVS to be backward compatible with newer datapath + * implementations, we should ignore out of range attributes. */ + if (out_of_range_attr) { + VLOG_DBG("Flow key decode found unknown OVS_KEY_ATTR, %d", + out_of_range_attr); + out_of_range_attr = 0; + } + if (!parse_ethertype(attrs, present_attrs, &expected_attrs, flow, src_flow, errorp)) { return ODP_FIT_ERROR; @@ -7279,6 +7287,14 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, } expected_attrs = 0; + /* For OVS to be backward compatible with newer datapath implementations, + * we should ignore out of range attributes. */ + if (out_of_range_attr) { + VLOG_DBG("Flow key decode found unknown OVS_KEY_ATTR, %d", + out_of_range_attr); + out_of_range_attr = 0; + } + /* Metadata. */ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID)) { flow->recirc_id = nl_attr_get_u32(attrs[OVS_KEY_ATTR_RECIRC_ID]); @@ -7513,10 +7529,12 @@ parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len, fitness = odp_flow_key_to_flow(key, key_len, &match->flow, NULL); if (fitness) { - /* This should not happen: it indicates that - * odp_flow_key_from_flow() and odp_flow_key_to_flow() disagree on - * the acceptable form of a flow. Log the problem as an error, - * with enough details to enable debugging. */ + /* This will happen when the odp_flow_key_to_flow() function can't + * parse the netlink message to a match structure. It will return + * ODP_FIT_TOO_LITTLE if there is not enough information to parse the + * content successfully, ODP_FIT_TOO_MUCH if there is too much netlink + * data and we do not know how to safely ignore it, and ODP_FIT_ERROR + * in any other case. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (!VLOG_DROP_ERR(&rl)) { @@ -7524,7 +7542,8 @@ parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len, ds_init(&s); odp_flow_format(key, key_len, NULL, 0, NULL, &s, true); - VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s)); + VLOG_ERR("internal error parsing flow key %s (%s)", + ds_cstr(&s), odp_key_fitness_to_string(fitness)); ds_destroy(&s); } @@ -7534,10 +7553,7 @@ parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len, fitness = odp_flow_key_to_mask(mask, mask_len, &match->wc, &match->flow, NULL); if (fitness) { - /* This should not happen: it indicates that - * odp_flow_key_from_mask() and odp_flow_key_to_mask() - * disagree on the acceptable form of a mask. Log the problem - * as an error, with enough details to enable debugging. */ + /* This should not happen, see comment above. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (!VLOG_DROP_ERR(&rl)) { diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index ecf914eac1..7ea4b6ed56 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -853,7 +853,9 @@ decode_NXAST_RAW_CONTROLLER2(const struct ext_action_header *eah, case NXAC2PT_REASON: { uint8_t u8; error = ofpprop_parse_u8(&payload, &u8); - oc->reason = u8; + if (!error) { + oc->reason = u8; + } break; } diff --git a/lib/ofp-flow.c b/lib/ofp-flow.c index ff0396845a..3bc744f78f 100644 --- a/lib/ofp-flow.c +++ b/lib/ofp-flow.c @@ -1254,7 +1254,16 @@ ofputil_append_flow_stats_reply(const struct ofputil_flow_stats *fs, OVS_NOT_REACHED(); } - ofpmp_postappend(replies, start_ofs); + if ((reply->size - start_ofs) > (UINT16_MAX - ofpbuf_headersize(reply))) { + /* When this happens, the reply will not fit in a single OFP message, + * and we should not append it to the queue. We will log a warning + * and continue with the next flow stat entry. */ + reply->size = start_ofs; + VLOG_WARN_RL(&rl, "Flow exceeded the maximum flow statistics reply " + "size and was excluded from the response set"); + } else { + ofpmp_postappend(replies, start_ofs); + } fs_->match.flow.tunnel.metadata.tab = orig_tun_table; } diff --git a/lib/ofp-packet.c b/lib/ofp-packet.c index 4579548ee1..9485ddfc93 100644 --- a/lib/ofp-packet.c +++ b/lib/ofp-packet.c @@ -133,7 +133,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose, case NXPINT_FULL_LEN: { uint32_t u32; error = ofpprop_parse_u32(&payload, &u32); - *total_len = u32; + if (!error) { + *total_len = u32; + } break; } @@ -152,7 +154,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose, case NXPINT_REASON: { uint8_t reason; error = ofpprop_parse_u8(&payload, &reason); - pin->reason = reason; + if (!error) { + pin->reason = reason; + } break; } @@ -883,7 +887,9 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose, case NXCPT_ODP_PORT: { uint32_t value; error = ofpprop_parse_u32(&payload, &value); - pin->odp_port = u32_to_odp(value); + if (!error) { + pin->odp_port = u32_to_odp(value); + } break; } diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c index 4edb3c114a..05c0b5711d 100644 --- a/lib/ofpbuf.c +++ b/lib/ofpbuf.c @@ -422,6 +422,10 @@ void ofpbuf_reserve(struct ofpbuf *b, size_t size) { ovs_assert(!b->size); + + if (!size) { + return; + } ofpbuf_prealloc_tailroom(b, size); b->data = (char*)b->data + size; } diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index ecc251a7ff..83bd10cca5 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -68,9 +68,9 @@ void ovs_numa_dump_destroy(struct ovs_numa_dump *); int ovs_numa_thread_setaffinity_core(unsigned core_id); #define FOR_EACH_CORE_ON_DUMP(ITER, DUMP) \ - HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->cores) + HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->cores) #define FOR_EACH_NUMA_ON_DUMP(ITER, DUMP) \ - HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->numas) + HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->numas) #endif /* ovs-numa.h */ diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c index 1866bd3088..946aa04d18 100644 --- a/lib/ovs-rcu.c +++ b/lib/ovs-rcu.c @@ -444,3 +444,40 @@ ovsrcu_init_module(void) ovsthread_once_done(&once); } } + +static void +ovsrcu_barrier_func(void *seq_) +{ + struct seq *seq = (struct seq *) seq_; + seq_change(seq); +} + +/* Similar to the kernel rcu_barrier, ovsrcu_barrier waits for all outstanding + * RCU callbacks to complete. However, unlike the kernel rcu_barrier, which + * might return immediately if there are no outstanding RCU callbacks, + * this API will at least wait for a grace period. + * + * Another issue the caller might need to know is that the barrier is just + * for "one-shot", i.e. if inside some RCU callbacks, another RCU callback is + * registered, this API only guarantees the first round of RCU callbacks have + * been executed after it returns. + */ +void +ovsrcu_barrier(void) +{ + struct seq *seq = seq_create(); + /* First let all threads flush their cbsets. */ + ovsrcu_synchronize(); + + /* Then register a new cbset, ensure this cbset + * is at the tail of the global list. */ + uint64_t seqno = seq_read(seq); + ovsrcu_postpone__(ovsrcu_barrier_func, (void *) seq); + + do { + seq_wait(seq, seqno); + poll_block(); + } while (seqno == seq_read(seq)); + + seq_destroy(seq); +} diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h index ecc4c92010..8b397b7fb0 100644 --- a/lib/ovs-rcu.h +++ b/lib/ovs-rcu.h @@ -155,6 +155,19 @@ * port_delete(id); * } * + * Use ovsrcu_barrier() to wait for all the outstanding RCU callbacks to + * finish. This is useful when you have to destroy some resources however + * these resources are referenced in the outstanding RCU callbacks. + * + * void rcu_cb(void *A) { + * do_something(A); + * } + * + * void destroy_A() { + * ovsrcu_postpone(rcu_cb, A); // will use A later + * ovsrcu_barrier(); // wait for rcu_cb done + * do_destroy_A(); // free A + * } */ #include "compiler.h" @@ -310,4 +323,6 @@ void ovsrcu_synchronize(void); void ovsrcu_exit(void); +void ovsrcu_barrier(void); + #endif /* ovs-rcu.h */ diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c index 659d49dbf7..dead31275d 100644 --- a/lib/ovsdb-cs.c +++ b/lib/ovsdb-cs.c @@ -1109,6 +1109,23 @@ ovsdb_cs_db_sync_condition(struct ovsdb_cs_db *db) } table->req_cond = NULL; db->cond_changed = true; + + /* There are two cases: + * a. either the server already processed the requested monitor + * condition change but the FSM was restarted before the + * client was notified. In this case the client should + * clear its local cache because it's out of sync with the + * monitor view on the server side. + * + * b. OR the server hasn't processed the requested monitor + * condition change yet. + * + * As there's no easy way to differentiate between the two, + * and given that this condition should be rare, reset the + * 'last_id', essentially flushing the local cached DB + * contents. + */ + db->last_id = UUID_ZERO; } } } @@ -1539,12 +1556,11 @@ ovsdb_cs_db_parse_monitor_reply(struct ovsdb_cs_db *db, const struct json *table_updates; bool clear; if (version == 3) { - struct uuid last_id; if (result->type != JSON_ARRAY || result->array.n != 3 || (result->array.elems[0]->type != JSON_TRUE && result->array.elems[0]->type != JSON_FALSE) || result->array.elems[1]->type != JSON_STRING - || !uuid_from_string(&last_id, + || !uuid_from_string(&db->last_id, json_string(result->array.elems[1]))) { struct ovsdb_error *error = ovsdb_syntax_error( result, NULL, "bad monitor_cond_since reply format"); @@ -1833,7 +1849,7 @@ server_column_get_string(const struct server_row *row, { ovs_assert(server_columns[index].type.key.type == OVSDB_TYPE_STRING); const struct ovsdb_datum *d = &row->data[index]; - return d->n == 1 ? d->keys[0].string : default_value; + return d->n == 1 ? d->keys[0].s->string : default_value; } static bool diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c index c145f5ad97..6654ed6deb 100644 --- a/lib/ovsdb-data.c +++ b/lib/ovsdb-data.c @@ -74,7 +74,7 @@ ovsdb_atom_init_default(union ovsdb_atom *atom, enum ovsdb_atomic_type type) break; case OVSDB_TYPE_STRING: - atom->string = xmemdup("", 1); + atom->s = ovsdb_atom_string_create_nocopy(xmemdup("", 1)); break; case OVSDB_TYPE_UUID: @@ -136,7 +136,7 @@ ovsdb_atom_is_default(const union ovsdb_atom *atom, return atom->boolean == false; case OVSDB_TYPE_STRING: - return atom->string[0] == '\0'; + return atom->s->string[0] == '\0'; case OVSDB_TYPE_UUID: return uuid_is_zero(&atom->uuid); @@ -172,7 +172,8 @@ ovsdb_atom_clone(union ovsdb_atom *new, const union ovsdb_atom *old, break; case OVSDB_TYPE_STRING: - new->string = xstrdup(old->string); + new->s = old->s; + new->s->n_refs++; break; case OVSDB_TYPE_UUID: @@ -214,7 +215,7 @@ ovsdb_atom_hash(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, return hash_boolean(atom->boolean, basis); case OVSDB_TYPE_STRING: - return hash_string(atom->string, basis); + return hash_string(atom->s->string, basis); case OVSDB_TYPE_UUID: return hash_int(uuid_hash(&atom->uuid), basis); @@ -246,7 +247,7 @@ ovsdb_atom_compare_3way(const union ovsdb_atom *a, return a->boolean - b->boolean; case OVSDB_TYPE_STRING: - return strcmp(a->string, b->string); + return a->s == b->s ? 0 : strcmp(a->s->string, b->s->string); case OVSDB_TYPE_UUID: return uuid_compare_3way(&a->uuid, &b->uuid); @@ -404,7 +405,7 @@ ovsdb_atom_from_json__(union ovsdb_atom *atom, case OVSDB_TYPE_STRING: if (json->type == JSON_STRING) { - atom->string = xstrdup(json->string); + atom->s = ovsdb_atom_string_create(json->string); return NULL; } break; @@ -473,7 +474,7 @@ ovsdb_atom_to_json(const union ovsdb_atom *atom, enum ovsdb_atomic_type type) return json_boolean_create(atom->boolean); case OVSDB_TYPE_STRING: - return json_string_create(atom->string); + return json_string_create(atom->s->string); case OVSDB_TYPE_UUID: return wrap_json("uuid", json_string_create_nocopy( @@ -551,14 +552,18 @@ ovsdb_atom_from_string__(union ovsdb_atom *atom, if (s_len < 2 || s[s_len - 1] != '"') { return xasprintf("%s: missing quote at end of " "quoted string", s); - } else if (!json_string_unescape(s + 1, s_len - 2, - &atom->string)) { - char *error = xasprintf("%s: %s", s, atom->string); - free(atom->string); - return error; + } else { + char *res; + if (json_string_unescape(s + 1, s_len - 2, &res)) { + atom->s = ovsdb_atom_string_create_nocopy(res); + } else { + char *error = xasprintf("%s: %s", s, res); + free(res); + return error; + } } } else { - atom->string = xstrdup(s); + atom->s = ovsdb_atom_string_create(s); } break; @@ -721,14 +726,14 @@ ovsdb_atom_to_string(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, break; case OVSDB_TYPE_STRING: - if (string_needs_quotes(atom->string)) { + if (string_needs_quotes(atom->s->string)) { struct json json; json.type = JSON_STRING; - json.string = atom->string; + json.string = atom->s->string; json_to_ds(&json, 0, out); } else { - ds_put_cstr(out, atom->string); + ds_put_cstr(out, atom->s->string); } break; @@ -750,7 +755,7 @@ ovsdb_atom_to_bare(const union ovsdb_atom *atom, enum ovsdb_atomic_type type, struct ds *out) { if (type == OVSDB_TYPE_STRING) { - ds_put_cstr(out, atom->string); + ds_put_cstr(out, atom->s->string); } else { ovsdb_atom_to_string(atom, type, out); } @@ -799,7 +804,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom, const struct ovsdb_base_type *base) { if (base->enum_ - && ovsdb_datum_find_key(base->enum_, atom, base->type) == UINT_MAX) { + && !ovsdb_datum_find_key(base->enum_, atom, base->type, NULL)) { struct ovsdb_error *error; struct ds actual = DS_EMPTY_INITIALIZER; struct ds valid = DS_EMPTY_INITIALIZER; @@ -877,7 +882,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom, return NULL; case OVSDB_TYPE_STRING: - return check_string_constraints(atom->string, &base->string); + return check_string_constraints(atom->s->string, &base->string); case OVSDB_TYPE_UUID: return NULL; @@ -1691,8 +1696,8 @@ ovsdb_datum_from_smap(struct ovsdb_datum *datum, const struct smap *smap) struct smap_node *node; size_t i = 0; SMAP_FOR_EACH (node, smap) { - datum->keys[i].string = xstrdup(node->key); - datum->values[i].string = xstrdup(node->value); + datum->keys[i].s = ovsdb_atom_string_create(node->key); + datum->values[i].s = ovsdb_atom_string_create(node->value); i++; } ovs_assert(i == datum->n); @@ -1784,14 +1789,16 @@ ovsdb_datum_compare_3way(const struct ovsdb_datum *a, a->n)); } -/* If 'key' is one of the keys in 'datum', returns its index within 'datum', - * otherwise UINT_MAX. 'key.type' must be the type of the atoms stored in the - * 'keys' array in 'datum'. +/* If 'key' is one of the keys in 'datum', returns 'true' and sets '*pos' to + * its index within 'datum', otherwise returns 'false' and sets '*pos' to the + * index where 'key' should have been. 'key.type' must be the type of the + * atoms stored in the 'keys' array in 'datum'. */ -unsigned int +bool ovsdb_datum_find_key(const struct ovsdb_datum *datum, const union ovsdb_atom *key, - enum ovsdb_atomic_type key_type) + enum ovsdb_atomic_type key_type, + unsigned int *pos) { unsigned int low = 0; unsigned int high = datum->n; @@ -1803,10 +1810,16 @@ ovsdb_datum_find_key(const struct ovsdb_datum *datum, } else if (cmp > 0) { low = idx + 1; } else { - return idx; + if (pos) { + *pos = idx; + } + return true; } } - return UINT_MAX; + if (pos) { + *pos = low; + } + return false; } /* If 'key' and 'value' is one of the key-value pairs in 'datum', returns its @@ -1821,10 +1834,11 @@ ovsdb_datum_find_key_value(const struct ovsdb_datum *datum, const union ovsdb_atom *value, enum ovsdb_atomic_type value_type) { - unsigned int idx = ovsdb_datum_find_key(datum, key, key_type); - if (idx != UINT_MAX - && value_type != OVSDB_TYPE_VOID - && !ovsdb_atom_equals(&datum->values[idx], value, value_type)) { + unsigned int idx; + + if (!ovsdb_datum_find_key(datum, key, key_type, &idx) + || (value_type != OVSDB_TYPE_VOID + && !ovsdb_atom_equals(&datum->values[idx], value, value_type))) { idx = UINT_MAX; } return idx; @@ -1948,38 +1962,68 @@ ovsdb_datum_add_unsafe(struct ovsdb_datum *datum, } } +/* Adds 'n' atoms starting from index 'start_idx' from 'src' to the end of + * 'dst'. 'dst' should have enough memory allocated to hold the additional + * 'n' atoms. Atoms are not cloned, i.e. 'dst' will reference the same data. + * Caller also should take care of the result being sorted. */ +static void +ovsdb_datum_push_unsafe(struct ovsdb_datum *dst, + const struct ovsdb_datum *src, + unsigned int start_idx, unsigned int n, + const struct ovsdb_type *type) +{ + memcpy(&dst->keys[dst->n], &src->keys[start_idx], n * sizeof src->keys[0]); + if (type->value.type != OVSDB_TYPE_VOID) { + memcpy(&dst->values[dst->n], &src->values[start_idx], + n * sizeof src->values[0]); + } + dst->n += n; +} + void ovsdb_datum_union(struct ovsdb_datum *a, const struct ovsdb_datum *b, - const struct ovsdb_type *type, bool replace) + const struct ovsdb_type *type) { - unsigned int n; - size_t bi; + struct ovsdb_datum result; + unsigned int copied, pos; - n = a->n; - for (bi = 0; bi < b->n; bi++) { - unsigned int ai; + ovsdb_datum_init_empty(&result); - ai = ovsdb_datum_find_key(a, &b->keys[bi], type->key.type); - if (ai == UINT_MAX) { - if (n == a->n) { - ovsdb_datum_reallocate(a, type, a->n + (b->n - bi)); - } - ovsdb_atom_clone(&a->keys[n], &b->keys[bi], type->key.type); - if (type->value.type != OVSDB_TYPE_VOID) { - ovsdb_atom_clone(&a->values[n], &b->values[bi], - type->value.type); - } - n++; - } else if (replace && type->value.type != OVSDB_TYPE_VOID) { - ovsdb_atom_destroy(&a->values[ai], type->value.type); - ovsdb_atom_clone(&a->values[ai], &b->values[bi], + copied = 0; + for (size_t bi = 0; bi < b->n; bi++) { + if (ovsdb_datum_find_key(a, &b->keys[bi], type->key.type, &pos)) { + /* Atom with the same key already exists. */ + continue; + } + if (!result.keys) { + ovsdb_datum_reallocate(&result, type, a->n + (b->n - bi)); + } + if (pos > copied) { + /* Need to copy some atoms from 'a' first. */ + ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type); + copied = pos; + } + /* Inserting new atom from 'b'. */ + ovsdb_atom_clone(&result.keys[result.n], &b->keys[bi], type->key.type); + if (type->value.type != OVSDB_TYPE_VOID) { + ovsdb_atom_clone(&result.values[result.n], &b->values[bi], type->value.type); } + result.n++; } - if (n != a->n) { - a->n = n; - ovs_assert(!ovsdb_datum_sort(a, type->key.type)); + if (!result.keys) { + /* 'a' doesn't need to be changed. */ + return; + } + if (a->n > copied) { + /* Copying remaining atoms. */ + ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type); } + /* All atoms are copied now. */ + a->n = 0; + + ovsdb_datum_swap(&result, a); + ovsdb_datum_destroy(&result, type); } void @@ -1987,26 +2031,55 @@ ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type, const struct ovsdb_datum *b, const struct ovsdb_type *b_type) { - bool changed = false; - size_t i; + unsigned int *idx, ai; + size_t n_idx; ovs_assert(a_type->key.type == b_type->key.type); ovs_assert(a_type->value.type == b_type->value.type || b_type->value.type == OVSDB_TYPE_VOID); - /* XXX The big-O of this could easily be improved. */ - for (i = 0; i < a->n; ) { - unsigned int idx = ovsdb_datum_find(a, i, b, b_type); - if (idx != UINT_MAX) { - changed = true; - ovsdb_datum_remove_unsafe(a, i, a_type); - } else { - i++; + idx = xmalloc(b->n * sizeof *idx); + n_idx = 0; + for (size_t bi = 0; bi < b->n; bi++) { + ai = ovsdb_datum_find(b, bi, a, b_type); + if (ai == UINT_MAX) { + /* No such atom in 'a'. */ + continue; } + /* Not destroying right away since ovsdb_datum_find() will use them. */ + idx[n_idx++] = ai; } - if (changed) { - ovsdb_datum_sort_assert(a, a_type->key.type); + if (!n_idx) { + free(idx); + return; + } + + struct ovsdb_datum result; + + ovsdb_datum_init_empty(&result); + ovsdb_datum_reallocate(&result, a_type, a->n - n_idx); + + unsigned int start_idx = 0; + for (size_t i = 0; i < n_idx; i++) { + ai = idx[i]; + + /* Destroying atom. */ + ovsdb_atom_destroy(&a->keys[ai], a_type->key.type); + if (a_type->value.type != OVSDB_TYPE_VOID) { + ovsdb_atom_destroy(&a->values[ai], a_type->value.type); + } + + /* Copy non-removed atoms from 'a' to result. */ + ovsdb_datum_push_unsafe(&result, a, start_idx, ai - start_idx, a_type); + start_idx = idx[i] + 1; } + /* Copying remaining atoms. */ + ovsdb_datum_push_unsafe(&result, a, start_idx, a->n - start_idx, a_type); + a->n = 0; + + ovsdb_datum_swap(&result, a); + ovsdb_datum_destroy(&result, a_type); + free(idx); } struct ovsdb_symbol_table * @@ -2067,6 +2140,64 @@ ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab, /* APIs for Generating and apply diffs. */ +/* Find what needs to be added to and removed from 'old' to construct 'new'. + * + * The 'added' and 'removed' datums are always safe; the orders of keys are + * maintained since they are added in order. */ +void +ovsdb_datum_added_removed(struct ovsdb_datum *added, + struct ovsdb_datum *removed, + const struct ovsdb_datum *old, + const struct ovsdb_datum *new, + const struct ovsdb_type *type) +{ + size_t oi, ni; + + ovsdb_datum_init_empty(added); + ovsdb_datum_init_empty(removed); + if (!ovsdb_type_is_composite(type)) { + ovsdb_datum_clone(removed, old, type); + ovsdb_datum_clone(added, new, type); + return; + } + + /* Generate the diff in O(n) time. */ + for (oi = ni = 0; oi < old->n && ni < new->n;) { + int c = ovsdb_atom_compare_3way(&old->keys[oi], &new->keys[ni], + type->key.type); + if (c < 0) { + ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi], + type, NULL); + oi++; + } else if (c > 0) { + ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni], + type, NULL); + ni++; + } else { + if (type->value.type != OVSDB_TYPE_VOID && + ovsdb_atom_compare_3way(&old->values[oi], &new->values[ni], + type->value.type)) { + ovsdb_datum_add_unsafe(removed, &old->keys[oi], + &old->values[oi], type, NULL); + ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni], + type, NULL); + } + oi++; ni++; + } + } + + for (; oi < old->n; oi++) { + ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi], + type, NULL); + } + + for (; ni < new->n; ni++) { + ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni], + type, NULL); + } +} + + /* Generate a difference ovsdb_dataum between 'old' and 'new'. * 'new' can be regenerated by applying the difference to the 'old'. * @@ -2127,6 +2258,106 @@ ovsdb_datum_diff(struct ovsdb_datum *diff, } } +/* Apply 'diff' to 'a'. + * + * Return NULL if the 'a' is successfully updated, otherwise, return + * ovsdb_error. */ +struct ovsdb_error * +ovsdb_datum_apply_diff_in_place(struct ovsdb_datum *a, + const struct ovsdb_datum *diff, + const struct ovsdb_type *type) +{ + struct ovsdb_error *error = NULL; + struct ovsdb_datum result; + size_t i, new_size; + unsigned int *idx, pos; + enum { + DIFF_OP_ADD, + DIFF_OP_REMOVE, + DIFF_OP_UPDATE, + } *operation; + + if (!ovsdb_type_is_composite(type)) { + ovsdb_datum_destroy(a, type); + ovsdb_datum_clone(a, diff, type); + return NULL; + } + + operation = xmalloc(diff->n * sizeof *operation); + idx = xmalloc(diff->n * sizeof *idx); + new_size = a->n; + for (i = 0; i < diff->n; i++) { + if (!ovsdb_datum_find_key(a, &diff->keys[i], type->key.type, &pos)) { + operation[i] = DIFF_OP_ADD; + new_size++; + } else if (type->value.type != OVSDB_TYPE_VOID + && !ovsdb_atom_equals(&diff->values[i], &a->values[pos], + type->value.type)) { + operation[i] = DIFF_OP_UPDATE; + } else { + operation[i] = DIFF_OP_REMOVE; + new_size--; + } + idx[i] = pos; + } + + /* Make sure member size of 'new' conforms to type. */ + if (new_size < type->n_min || new_size > type->n_max) { + error = ovsdb_error(NULL, "Datum crated by diff has size error"); + goto exit; + } + + ovsdb_datum_init_empty(&result); + ovsdb_datum_reallocate(&result, type, new_size); + + unsigned int copied = 0; + for (i = 0; i < diff->n; i++) { + pos = idx[i]; + + if (copied < pos) { + /* Copying all atoms that should go before the current one. */ + ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type); + copied = pos; + } + + switch (operation[i]) { + case DIFF_OP_UPDATE: + case DIFF_OP_ADD: + /* Inserting new atom from 'diff'. */ + ovsdb_atom_clone(&result.keys[result.n], + &diff->keys[i], type->key.type); + if (type->value.type != OVSDB_TYPE_VOID) { + ovsdb_atom_clone(&result.values[result.n], + &diff->values[i], type->value.type); + } + result.n++; + if (operation[i] != DIFF_OP_UPDATE) { + break; + } + /* fall through */ + + case DIFF_OP_REMOVE: + /* Destroying atom. */ + ovsdb_atom_destroy(&a->keys[pos], type->key.type); + if (type->value.type != OVSDB_TYPE_VOID) { + ovsdb_atom_destroy(&a->values[pos], type->value.type); + } + copied++; /* Skipping removed atom. */ + break; + } + } + /* Copying remaining atoms. */ + ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type); + a->n = 0; + + ovsdb_datum_swap(&result, a); + ovsdb_datum_destroy(&result, type); +exit: + free(operation); + free(idx); + return error; +} + /* Apply 'diff' to 'old' to regenerate 'new'. * * Return NULL if the 'new' is successfully generated, otherwise, return diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h index c5a80ee39f..f66ed3472c 100644 --- a/lib/ovsdb-data.h +++ b/lib/ovsdb-data.h @@ -20,6 +20,7 @@ #include "compiler.h" #include "ovsdb-types.h" #include "openvswitch/shash.h" +#include "util.h" #ifdef __cplusplus extern "C" { @@ -31,12 +32,33 @@ struct ds; struct ovsdb_symbol_table; struct smap; +struct ovsdb_atom_string { + char *string; + size_t n_refs; +}; + +static inline struct ovsdb_atom_string * +ovsdb_atom_string_create_nocopy(char *str) +{ + struct ovsdb_atom_string *s = xzalloc(sizeof *s); + + s->string = str; + s->n_refs = 1; + return s; +} + +static inline struct ovsdb_atom_string * +ovsdb_atom_string_create(const char *str) +{ + return ovsdb_atom_string_create_nocopy(xstrdup(str)); +} + /* One value of an atomic type (given by enum ovs_atomic_type). */ union ovsdb_atom { int64_t integer; double real; bool boolean; - char *string; + struct ovsdb_atom_string *s; struct uuid uuid; }; @@ -66,8 +88,9 @@ ovsdb_atom_needs_destruction(enum ovsdb_atomic_type type) static inline void ovsdb_atom_destroy(union ovsdb_atom *atom, enum ovsdb_atomic_type type) { - if (type == OVSDB_TYPE_STRING) { - free(atom->string); + if (type == OVSDB_TYPE_STRING && !--atom->s->n_refs) { + free(atom->s->string); + free(atom->s); } } @@ -209,9 +232,10 @@ bool ovsdb_datum_equals(const struct ovsdb_datum *, const struct ovsdb_type *); /* Search. */ -unsigned int ovsdb_datum_find_key(const struct ovsdb_datum *, - const union ovsdb_atom *key, - enum ovsdb_atomic_type key_type); +bool ovsdb_datum_find_key(const struct ovsdb_datum *, + const union ovsdb_atom *key, + enum ovsdb_atomic_type key_type, + unsigned int *pos); unsigned int ovsdb_datum_find_key_value(const struct ovsdb_datum *, const union ovsdb_atom *key, enum ovsdb_atomic_type key_type, @@ -227,14 +251,19 @@ bool ovsdb_datum_excludes_all(const struct ovsdb_datum *, const struct ovsdb_type *); void ovsdb_datum_union(struct ovsdb_datum *, const struct ovsdb_datum *, - const struct ovsdb_type *, - bool replace); + const struct ovsdb_type *); void ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type, const struct ovsdb_datum *b, const struct ovsdb_type *b_type); /* Generate and apply diffs */ +void ovsdb_datum_added_removed(struct ovsdb_datum *added, + struct ovsdb_datum *removed, + const struct ovsdb_datum *old, + const struct ovsdb_datum *new, + const struct ovsdb_type *type); + void ovsdb_datum_diff(struct ovsdb_datum *diff, const struct ovsdb_datum *old_datum, const struct ovsdb_datum *new_datum, @@ -246,6 +275,12 @@ struct ovsdb_error *ovsdb_datum_apply_diff(struct ovsdb_datum *new_datum, const struct ovsdb_type *type) OVS_WARN_UNUSED_RESULT; +struct ovsdb_error * ovsdb_datum_apply_diff_in_place( + struct ovsdb_datum *a, + const struct ovsdb_datum *diff, + const struct ovsdb_type *type) +OVS_WARN_UNUSED_RESULT; + /* Raw operations that may not maintain the invariants. */ void ovsdb_datum_remove_unsafe(struct ovsdb_datum *, size_t idx, const struct ovsdb_type *); diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 2198c69c60..496ec490d3 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -1898,8 +1898,7 @@ ovsdb_idl_index_destroy_row(const struct ovsdb_idl_row *row_) BITMAP_FOR_EACH_1 (i, class->n_columns, row->written) { c = &class->columns[i]; (c->unparse) (row); - free(row->new_datum[i].values); - free(row->new_datum[i].keys); + ovsdb_datum_destroy(&row->new_datum[i], &c->type); } free(row->new_datum); free(row->written); @@ -2787,9 +2786,8 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row, struct ovsdb_datum *new_datum; unsigned int pos; new_datum = map_op_datum(map_op); - pos = ovsdb_datum_find_key(old_datum, - &new_datum->keys[0], - key_type); + ovsdb_datum_find_key(old_datum, &new_datum->keys[0], + key_type, &pos); if (ovsdb_atom_equals(&new_datum->values[0], &old_datum->values[pos], value_type)) { @@ -2798,11 +2796,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row, } } else if (map_op_type(map_op) == MAP_OP_DELETE){ /* Verify that there is a key to delete. */ - unsigned int pos; - pos = ovsdb_datum_find_key(old_datum, - &map_op_datum(map_op)->keys[0], - key_type); - if (pos == UINT_MAX) { + if (!ovsdb_datum_find_key(old_datum, + &map_op_datum(map_op)->keys[0], + key_type, NULL)) { /* No key to delete. Move on to next update. */ VLOG_WARN("Trying to delete a key that doesn't " "exist in the map."); @@ -2897,11 +2893,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row, any_ins = true; } else { /* SETP_OP_DELETE */ /* Verify that there is a key to delete. */ - unsigned int pos; - pos = ovsdb_datum_find_key(old_datum, - &set_op_datum(set_op)->keys[0], - key_type); - if (pos == UINT_MAX) { + if (!ovsdb_datum_find_key(old_datum, + &set_op_datum(set_op)->keys[0], + key_type, NULL)) { /* No key to delete. Move on to next update. */ VLOG_WARN("Trying to delete a key that doesn't " "exist in the set."); @@ -4066,7 +4060,6 @@ ovsdb_idl_txn_write_partial_map(const struct ovsdb_idl_row *row_, struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_); enum ovsdb_atomic_type key_type; enum map_op_type op_type; - unsigned int pos; const struct ovsdb_datum *old_datum; if (!is_valid_partial_update(row, column, datum)) { @@ -4078,8 +4071,11 @@ ovsdb_idl_txn_write_partial_map(const struct ovsdb_idl_row *row_, /* Find out if this is an insert or an update. */ key_type = column->type.key.type; old_datum = ovsdb_idl_read(row, column); - pos = ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type); - op_type = pos == UINT_MAX ? MAP_OP_INSERT : MAP_OP_UPDATE; + if (ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type, NULL)) { + op_type = MAP_OP_UPDATE; + } else { + op_type = MAP_OP_INSERT; + } ovsdb_idl_txn_add_map_op(row, column, datum, op_type); } @@ -4112,6 +4108,9 @@ void ovsdb_idl_loop_destroy(struct ovsdb_idl_loop *loop) { if (loop) { + if (loop->committing_txn) { + ovsdb_idl_txn_destroy(loop->committing_txn); + } ovsdb_idl_destroy(loop->idl); } } @@ -4121,8 +4120,8 @@ ovsdb_idl_loop_run(struct ovsdb_idl_loop *loop) { ovsdb_idl_run(loop->idl); - /* See if we can commit the loop->committing_txn. */ - if (loop->committing_txn) { + /* See if the 'committing_txn' succeeded in the meantime. */ + if (loop->committing_txn && loop->committing_txn->status == TXN_SUCCESS) { ovsdb_idl_try_commit_loop_txn(loop, NULL); } diff --git a/lib/pcap-file.c b/lib/pcap-file.c index b30a11c24b..41835f6f4d 100644 --- a/lib/pcap-file.c +++ b/lib/pcap-file.c @@ -89,6 +89,7 @@ ovs_pcap_open(const char *file_name, const char *mode) : mode[0] == 'w' ? "writing" : "appending"), ovs_strerror(errno)); + free(p_file); return NULL; } diff --git a/lib/rculist.h b/lib/rculist.h index 1072b87af2..c0d77acf94 100644 --- a/lib/rculist.h +++ b/lib/rculist.h @@ -365,35 +365,57 @@ rculist_is_singleton_protected(const struct rculist *list) return list_next == list->prev && list_next != list; } -#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \ - for (INIT_CONTAINER(ITER, rculist_next(RCULIST), MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER)) -#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \ - for (ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER)) - -#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \ - for (INIT_CONTAINER(ITER, (RCULIST)->prev, MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) -#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \ - for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) - -#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \ - for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, rculist_next_protected(&(ITER)->MEMBER), \ - MEMBER)) - -#define RCULIST_FOR_EACH_SAFE_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \ - for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \ - (&(ITER)->MEMBER != (RCULIST) \ - ? INIT_CONTAINER(NEXT, rculist_next_protected(&(ITER)->MEMBER), \ - MEMBER), 1 : 0); \ - (ITER) = (NEXT)) +#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(RCULIST), \ + const struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER)))) + +#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(&(ITER)->MEMBER), \ + const struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER)))) + +#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, (RCULIST)->prev, struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev)) + +#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, (ITER)->MEMBER.prev, struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev)) + +#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, rculist_next_protected(RCULIST), \ + struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, rculist_next_protected(ITER_VAR(ITER))) \ + +#define RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \ + rculist_next_protected(RCULIST), \ + struct rculist); \ + CONDITION_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \ + ITER_VAR(ITER) != (RCULIST), \ + ITER_NEXT_VAR(ITER) = rculist_next_protected(ITER_VAR(VAR))); \ + UPDATE_MULTIVAR_SHORT(ITER)) + +#define RCULIST_FOR_EACH_SAFE_LONG_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \ + for (INIT_MULTIVAR_SAFE_LONG(ITER, NEXT, MEMBER, \ + rculist_next_protected(RCULIST) \ + struct rculist); \ + CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER \ + ITER_VAR(ITER) != (RCULIST), \ + ITER_VAR(NEXT) = rculist_next_protected(ITER_VAR(VAR)), \ + ITER_VAR(NEXT) != (RCULIST)); \ + UPDATE_MULTIVAR_LONG(ITER)) + +#define RCULIST_FOR_EACH_SAFE_PROTECTED(...) \ + OVERLOAD_SAFE_MACRO(RCULIST_FOR_EACH_SAFE_LONG_PROTECTED, \ + RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED, \ + 4, __VA_ARGS__) + #endif /* rculist.h */ diff --git a/lib/reconnect.c b/lib/reconnect.c index a929ddfd2d..89a0bcaf95 100644 --- a/lib/reconnect.c +++ b/lib/reconnect.c @@ -75,7 +75,8 @@ struct reconnect { static void reconnect_transition__(struct reconnect *, long long int now, enum state state); -static long long int reconnect_deadline__(const struct reconnect *); +static long long int reconnect_deadline__(const struct reconnect *, + long long int now); static bool reconnect_may_retry(struct reconnect *); static const char * @@ -539,7 +540,7 @@ reconnect_transition__(struct reconnect *fsm, long long int now, } static long long int -reconnect_deadline__(const struct reconnect *fsm) +reconnect_deadline__(const struct reconnect *fsm, long long int now) { ovs_assert(fsm->state_entered != LLONG_MIN); switch (fsm->state) { @@ -557,8 +558,18 @@ reconnect_deadline__(const struct reconnect *fsm) if (fsm->probe_interval) { long long int base = MAX(fsm->last_activity, fsm->state_entered); long long int expiration = base + fsm->probe_interval; - if (fsm->last_receive_attempt >= expiration) { + if (now < expiration || fsm->last_receive_attempt >= expiration) { + /* We still have time before the expiration or the time has + * already passed and there was no activity. In the first case + * we need to wait for the expiration, in the second - we're + * already past the deadline. */ return expiration; + } else { + /* Time has already passed, but we didn't attempt to receive + * anything. We need to wake up and try to receive even if + * nothing is pending, so we can update the expiration time or + * transition to a different state. */ + return now + 1; } } return LLONG_MAX; @@ -566,8 +577,10 @@ reconnect_deadline__(const struct reconnect *fsm) case S_IDLE: if (fsm->probe_interval) { long long int expiration = fsm->state_entered + fsm->probe_interval; - if (fsm->last_receive_attempt >= expiration) { + if (now < expiration || fsm->last_receive_attempt >= expiration) { return expiration; + } else { + return now + 1; } } return LLONG_MAX; @@ -618,7 +631,7 @@ reconnect_deadline__(const struct reconnect *fsm) enum reconnect_action reconnect_run(struct reconnect *fsm, long long int now) { - if (now >= reconnect_deadline__(fsm)) { + if (now >= reconnect_deadline__(fsm, now)) { switch (fsm->state) { case S_VOID: return 0; @@ -671,7 +684,7 @@ reconnect_wait(struct reconnect *fsm, long long int now) int reconnect_timeout(struct reconnect *fsm, long long int now) { - long long int deadline = reconnect_deadline__(fsm); + long long int deadline = reconnect_deadline__(fsm, now); if (deadline != LLONG_MAX) { long long int remaining = deadline - now; return MAX(0, MIN(INT_MAX, remaining)); diff --git a/lib/socket-util.c b/lib/socket-util.c index 4f1ffecf5d..38705cc51e 100644 --- a/lib/socket-util.c +++ b/lib/socket-util.c @@ -62,7 +62,8 @@ static bool parse_sockaddr_components(struct sockaddr_storage *ss, const char *port_s, uint16_t default_port, const char *s, - bool resolve_host); + bool resolve_host, + bool *dns_failure); /* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a * positive errno value. */ @@ -438,7 +439,7 @@ parse_sockaddr_components_dns(struct sockaddr_storage *ss OVS_UNUSED, dns_resolve(host_s, &tmp_host_s); if (tmp_host_s != NULL) { parse_sockaddr_components(ss, tmp_host_s, port_s, - default_port, s, false); + default_port, s, false, NULL); free(tmp_host_s); return true; } @@ -450,11 +451,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss, char *host_s, const char *port_s, uint16_t default_port, const char *s, - bool resolve_host) + bool resolve_host, bool *dns_failure) { struct sockaddr_in *sin = sin_cast(sa_cast(ss)); int port; + if (dns_failure) { + *dns_failure = false; + } + if (port_s && port_s[0]) { if (!str_to_int(port_s, 10, &port) || port < 0 || port > 65535) { VLOG_ERR("%s: bad port number \"%s\"", s, port_s); @@ -501,10 +506,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss, return true; resolve: - if (resolve_host && parse_sockaddr_components_dns(ss, host_s, port_s, - default_port, s)) { - return true; - } else if (!resolve_host) { + if (resolve_host) { + if (parse_sockaddr_components_dns(ss, host_s, port_s, + default_port, s)) { + return true; + } + if (dns_failure) { + *dns_failure = true; + } + } else { VLOG_ERR("%s: bad IP address \"%s\"", s, host_s); } exit: @@ -521,10 +531,12 @@ exit: * It resolves the host if 'resolve_host' is true. * * On success, returns true and stores the parsed remote address into '*ss'. - * On failure, logs an error, stores zeros into '*ss', and returns false. */ + * On failure, logs an error, stores zeros into '*ss', and returns false, + * '*dns_failure' indicates if the host resolution failed. */ bool inet_parse_active(const char *target_, int default_port, - struct sockaddr_storage *ss, bool resolve_host) + struct sockaddr_storage *ss, + bool resolve_host, bool *dns_failure) { char *target = xstrdup(target_); char *port, *host; @@ -539,7 +551,7 @@ inet_parse_active(const char *target_, int default_port, ok = false; } else { ok = parse_sockaddr_components(ss, host, port, default_port, - target_, resolve_host); + target_, resolve_host, dns_failure); } if (!ok) { memset(ss, 0, sizeof *ss); @@ -576,7 +588,7 @@ inet_open_active(int style, const char *target, int default_port, int error; /* Parse. */ - if (!inet_parse_active(target, default_port, &ss, true)) { + if (!inet_parse_active(target, default_port, &ss, true, NULL)) { error = EAFNOSUPPORT; goto exit; } @@ -660,7 +672,7 @@ inet_parse_passive(const char *target_, int default_port, ok = false; } else { ok = parse_sockaddr_components(ss, host, port, default_port, - target_, true); + target_, true, NULL); } if (!ok) { memset(ss, 0, sizeof *ss); @@ -783,7 +795,8 @@ inet_parse_address(const char *target_, struct sockaddr_storage *ss) { char *target = xstrdup(target_); char *host = unbracket(target); - bool ok = parse_sockaddr_components(ss, host, NULL, 0, target_, false); + bool ok = parse_sockaddr_components(ss, host, NULL, 0, + target_, false, NULL); if (!ok) { memset(ss, 0, sizeof *ss); } diff --git a/lib/socket-util.h b/lib/socket-util.h index 9ccb7d4cc4..bf66393df9 100644 --- a/lib/socket-util.h +++ b/lib/socket-util.h @@ -49,7 +49,8 @@ ovs_be32 guess_netmask(ovs_be32 ip); void inet_parse_host_port_tokens(char *s, char **hostp, char **portp); void inet_parse_port_host_tokens(char *s, char **portp, char **hostp); bool inet_parse_active(const char *target, int default_port, - struct sockaddr_storage *ssp, bool resolve_host); + struct sockaddr_storage *ssp, + bool resolve_host, bool *dns_failure); int inet_open_active(int style, const char *target, int default_port, struct sockaddr_storage *ssp, int *fdp, uint8_t dscp); diff --git a/lib/stopwatch.c b/lib/stopwatch.c index f5602163bc..1c71df1a12 100644 --- a/lib/stopwatch.c +++ b/lib/stopwatch.c @@ -114,7 +114,6 @@ static void calc_percentile(unsigned long long n_samples, struct percentile *pctl, unsigned long long new_sample) { - if (n_samples < P_SQUARE_MIN) { pctl->samples[n_samples - 1] = new_sample; } @@ -228,13 +227,12 @@ add_sample(struct stopwatch *sw, unsigned long long new_sample) sw->min = new_sample; } - calc_percentile(sw->n_samples, &sw->pctl, new_sample); - if (sw->n_samples++ == 0) { sw->short_term.average = sw->long_term.average = new_sample; return; } + calc_percentile(sw->n_samples, &sw->pctl, new_sample); calc_average(&sw->short_term, new_sample); calc_average(&sw->long_term, new_sample); } diff --git a/lib/stp.c b/lib/stp.c index 809b405a52..a869b5f390 100644 --- a/lib/stp.c +++ b/lib/stp.c @@ -737,7 +737,7 @@ void stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) { struct stp *stp = p->stp; - const struct stp_bpdu_header *header; + struct stp_bpdu_header header; ovs_mutex_lock(&mutex); if (p->state == STP_DISABLED) { @@ -750,19 +750,19 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) goto out; } - header = bpdu; - if (header->protocol_id != htons(STP_PROTOCOL_ID)) { + memcpy(&header, bpdu, sizeof header); + if (header.protocol_id != htons(STP_PROTOCOL_ID)) { VLOG_WARN("%s: received BPDU with unexpected protocol ID %"PRIu16, - stp->name, ntohs(header->protocol_id)); + stp->name, ntohs(header.protocol_id)); p->error_count++; goto out; } - if (header->protocol_version != STP_PROTOCOL_VERSION) { + if (header.protocol_version != STP_PROTOCOL_VERSION) { VLOG_DBG("%s: received BPDU with unexpected protocol version %"PRIu8, - stp->name, header->protocol_version); + stp->name, header.protocol_version); } - switch (header->bpdu_type) { + switch (header.bpdu_type) { case STP_TYPE_CONFIG: if (bpdu_size < sizeof(struct stp_config_bpdu)) { VLOG_WARN("%s: received config BPDU with invalid size %"PRIuSIZE, @@ -785,7 +785,7 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size) default: VLOG_WARN("%s: received BPDU of unexpected type %"PRIu8, - stp->name, header->bpdu_type); + stp->name, header.bpdu_type); p->error_count++; goto out; } diff --git a/lib/stream.c b/lib/stream.c index fcaddf10ad..71039e24f1 100644 --- a/lib/stream.c +++ b/lib/stream.c @@ -788,7 +788,7 @@ stream_parse_target_with_default_port(const char *target, int default_port, struct sockaddr_storage *ss) { return ((!strncmp(target, "tcp:", 4) || !strncmp(target, "ssl:", 4)) - && inet_parse_active(target + 4, default_port, ss, true)); + && inet_parse_active(target + 4, default_port, ss, true, NULL)); } /* Attempts to guess the content type of a stream whose first few bytes were diff --git a/lib/tc.c b/lib/tc.c index 38a1dfc0eb..df73a43d4c 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -568,16 +568,17 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) flower->key.encap_eth_type[0] = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]); + flower->mask.encap_eth_type[0] = CONSTANT_HTONS(0xffff); if (attrs[TCA_FLOWER_KEY_VLAN_ID]) { flower->key.vlan_id[0] = nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]); - flower->mask.vlan_id[0] = 0xffff; + flower->mask.vlan_id[0] = VLAN_VID_MASK >> VLAN_VID_SHIFT; } if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) { flower->key.vlan_prio[0] = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]); - flower->mask.vlan_prio[0] = 0xff; + flower->mask.vlan_prio[0] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT; } if (!attrs[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { @@ -590,17 +591,18 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) } flower->key.encap_eth_type[1] = flower->key.encap_eth_type[0]; + flower->mask.encap_eth_type[1] = CONSTANT_HTONS(0xffff); flower->key.encap_eth_type[0] = encap_ethtype; if (attrs[TCA_FLOWER_KEY_CVLAN_ID]) { flower->key.vlan_id[1] = nl_attr_get_u16(attrs[TCA_FLOWER_KEY_CVLAN_ID]); - flower->mask.vlan_id[1] = 0xffff; + flower->mask.vlan_id[1] = VLAN_VID_MASK >> VLAN_VID_SHIFT; } if (attrs[TCA_FLOWER_KEY_CVLAN_PRIO]) { flower->key.vlan_prio[1] = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_CVLAN_PRIO]); - flower->mask.vlan_prio[1] = 0xff; + flower->mask.vlan_prio[1] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT; } } @@ -937,24 +939,21 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) { key->icmp_code = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]); mask->icmp_code = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]); + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE_MASK]); } if (attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]) { - key->icmp_type = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]); + key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE]); mask->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]); } } else if (ip_proto == IPPROTO_ICMPV6) { if (attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]) { - key->icmp_code = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); + key->icmp_code = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); mask->icmp_code = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]); } if (attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]) { - key->icmp_type = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]); + key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE]); mask->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]); } @@ -1006,14 +1005,14 @@ static const struct nl_policy pedit_policy[] = { static int nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) { - struct tc_action *action; + struct tc_action *action = &flower->actions[flower->action_count++]; struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)]; const struct tc_pedit *pe; const struct tc_pedit_key *keys; const struct nlattr *nla, *keys_ex, *ex_type; const void *keys_attr; - char *rewrite_key = (void *) &flower->rewrite.key; - char *rewrite_mask = (void *) &flower->rewrite.mask; + char *rewrite_key = (void *) &action->rewrite.key; + char *rewrite_mask = (void *) &action->rewrite.mask; size_t keys_ex_size, left; int type, i = 0, err; @@ -1092,7 +1091,6 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) i++; } - action = &flower->actions[flower->action_count++]; action->type = TC_ACT_PEDIT; return 0; @@ -1487,7 +1485,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) if (ipv4_max) { ovs_be32 addr = nl_attr_get_be32(ipv4_max); - action->ct.range.ipv4.max = addr; + if (action->ct.range.ipv4.min != addr) { + action->ct.range.ipv4.max = addr; + } } } else if (ipv6_min) { action->ct.range.ip_family = AF_INET6; @@ -1496,7 +1496,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) if (ipv6_max) { struct in6_addr addr = nl_attr_get_in6_addr(ipv6_max); - action->ct.range.ipv6.max = addr; + if (!ipv6_addr_equals(&action->ct.range.ipv6.min, &addr)) { + action->ct.range.ipv6.max = addr; + } } } @@ -1504,6 +1506,10 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) action->ct.range.port.min = nl_attr_get_be16(port_min); if (port_max) { action->ct.range.port.max = nl_attr_get_be16(port_max); + if (action->ct.range.port.min == + action->ct.range.port.max) { + action->ct.range.port.max = 0; + } } } } @@ -1702,6 +1708,9 @@ static const struct nl_policy stats_policy[] = { [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC, .min_len = sizeof(struct gnet_stats_basic), .optional = false, }, + [TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct gnet_stats_basic), + .optional = true, }, }; static int @@ -1714,8 +1723,11 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, const char *act_kind; struct nlattr *action_attrs[ARRAY_SIZE(act_policy)]; struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)]; - struct ovs_flow_stats *stats = &flower->stats; - const struct gnet_stats_basic *bs; + struct ovs_flow_stats *stats_sw = &flower->stats_sw; + struct ovs_flow_stats *stats_hw = &flower->stats_hw; + const struct gnet_stats_basic *bs_all = NULL; + const struct gnet_stats_basic *bs_hw = NULL; + struct gnet_stats_basic bs_sw = { .packets = 0, .bytes = 0, }; int err = 0; if (!nl_parse_nested(action, act_policy, action_attrs, @@ -1771,10 +1783,26 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, return EPROTO; } - bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs); - if (bs->packets) { - put_32aligned_u64(&stats->n_packets, bs->packets); - put_32aligned_u64(&stats->n_bytes, bs->bytes); + bs_all = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs_all); + if (stats_attrs[TCA_STATS_BASIC_HW]) { + bs_hw = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC_HW], + sizeof *bs_hw); + + bs_sw.packets = bs_all->packets - bs_hw->packets; + bs_sw.bytes = bs_all->bytes - bs_hw->bytes; + } else { + bs_sw.packets = bs_all->packets; + bs_sw.bytes = bs_all->bytes; + } + + if (bs_sw.packets > get_32aligned_u64(&stats_sw->n_packets)) { + put_32aligned_u64(&stats_sw->n_packets, bs_sw.packets); + put_32aligned_u64(&stats_sw->n_bytes, bs_sw.bytes); + } + + if (bs_hw && bs_hw->packets > get_32aligned_u64(&stats_hw->n_packets)) { + put_32aligned_u64(&stats_hw->n_packets, bs_hw->packets); + put_32aligned_u64(&stats_hw->n_bytes, bs_hw->bytes); } return 0; @@ -2399,14 +2427,14 @@ nl_msg_put_act_flags(struct ofpbuf *request) { * first_word_mask/last_word_mask - the mask to use for the first/last read * (as we read entire words). */ static void -calc_offsets(struct tc_flower *flower, struct flower_key_to_pedit *m, +calc_offsets(struct tc_action *action, struct flower_key_to_pedit *m, int *cur_offset, int *cnt, ovs_be32 *last_word_mask, ovs_be32 *first_word_mask, ovs_be32 **mask, ovs_be32 **data) { int start_offset, max_offset, total_size; int diff, right_zero_bits, left_zero_bits; - char *rewrite_key = (void *) &flower->rewrite.key; - char *rewrite_mask = (void *) &flower->rewrite.mask; + char *rewrite_key = (void *) &action->rewrite.key; + char *rewrite_mask = (void *) &action->rewrite.mask; max_offset = m->offset + m->size; start_offset = ROUND_DOWN(m->offset, 4); @@ -2473,7 +2501,8 @@ csum_update_flag(struct tc_flower *flower, static int nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, - struct tc_flower *flower) + struct tc_flower *flower, + struct tc_action *action) { struct { struct tc_pedit sel; @@ -2497,7 +2526,7 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, continue; } - calc_offsets(flower, m, &cur_offset, &cnt, &last_word_mask, + calc_offsets(action, m, &cur_offset, &cnt, &last_word_mask, &first_word_mask, &mask, &data); for (j = 0; j < cnt; j++, mask++, data++, cur_offset += 4) { @@ -2545,6 +2574,40 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, return 0; } +static void +nl_msg_put_flower_acts_release(struct ofpbuf *request, uint16_t act_index) +{ + size_t act_offset; + + act_offset = nl_msg_start_nested(request, act_index); + nl_msg_put_act_tunnel_key_release(request); + nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); +} + +/* Aggregates all previous successive pedit actions csum_update_flags + * to flower->csum_update_flags. Only append one csum action to the + * last pedit action. */ +static void +nl_msg_put_csum_act(struct ofpbuf *request, struct tc_flower *flower, + uint16_t *act_index) +{ + size_t act_offset; + + /* No pedit actions or processed already. */ + if (!flower->csum_update_flags) { + return; + } + + act_offset = nl_msg_start_nested(request, (*act_index)++); + nl_msg_put_act_csum(request, flower->csum_update_flags); + nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + + /* Clear it. So we can have another series of pedit actions. */ + flower->csum_update_flags = 0; +} + static int nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) { @@ -2561,24 +2624,31 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) action = flower->actions; for (i = 0; i < flower->action_count; i++, action++) { + if (action->type != TC_ACT_PEDIT) { + nl_msg_put_csum_act(request, flower, &act_index); + } switch (action->type) { case TC_ACT_PEDIT: { act_offset = nl_msg_start_nested(request, act_index++); - error = nl_msg_put_flower_rewrite_pedits(request, flower); + error = nl_msg_put_flower_rewrite_pedits(request, flower, + action); if (error) { return error; } nl_msg_end_nested(request, act_offset); - if (flower->csum_update_flags) { - act_offset = nl_msg_start_nested(request, act_index++); - nl_msg_put_act_csum(request, flower->csum_update_flags); - nl_msg_put_act_flags(request); - nl_msg_end_nested(request, act_offset); + if (i == flower->action_count - 1) { + /* If this is the last action check csum calc again. */ + nl_msg_put_csum_act(request, flower, &act_index); } } break; case TC_ACT_ENCAP: { + if (!released && flower->tunnel) { + nl_msg_put_flower_acts_release(request, act_index++); + released = true; + } + act_offset = nl_msg_start_nested(request, act_index++); nl_msg_put_act_tunnel_key_set(request, action->encap.id_present, action->encap.id, @@ -2636,10 +2706,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) break; case TC_ACT_OUTPUT: { if (!released && flower->tunnel) { - act_offset = nl_msg_start_nested(request, act_index++); - nl_msg_put_act_tunnel_key_release(request); - nl_msg_put_act_flags(request); - nl_msg_end_nested(request, act_offset); + nl_msg_put_flower_acts_release(request, act_index++); released = true; } @@ -2901,13 +2968,13 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) FLOWER_PUT_MASKED_VALUE(icmp_code, TCA_FLOWER_KEY_ICMPV6_CODE); FLOWER_PUT_MASKED_VALUE(icmp_type, TCA_FLOWER_KEY_ICMPV6_TYPE); } - - FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE); - FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE); - FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK); - FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS); } + FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE); + FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE); + FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK); + FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS); + if (host_eth_type == ETH_P_IP) { FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_src, TCA_FLOWER_KEY_IPV4_SRC); FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_dst, TCA_FLOWER_KEY_IPV4_DST); @@ -2980,12 +3047,79 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) return 0; } +static void +log_tc_flower_match(const char *msg, + const struct tc_flower *a, + const struct tc_flower *b) +{ + uint8_t key_a[sizeof(struct tc_flower_key)]; + uint8_t key_b[sizeof(struct tc_flower_key)]; + struct ds s = DS_EMPTY_INITIALIZER; + + for (int i = 0; i < sizeof a->key; i++) { + uint8_t mask_a = ((uint8_t *) &a->mask)[i]; + uint8_t mask_b = ((uint8_t *) &b->mask)[i]; + + key_a[i] = ((uint8_t *) &a->key)[i] & mask_a; + key_b[i] = ((uint8_t *) &b->key)[i] & mask_b; + } + ds_put_cstr(&s, "\nExpected Mask:\n"); + ds_put_hex(&s, &a->mask, sizeof a->mask); + ds_put_cstr(&s, "\nReceived Mask:\n"); + ds_put_hex(&s, &b->mask, sizeof b->mask); + ds_put_cstr(&s, "\nExpected Key:\n"); + ds_put_hex(&s, &a->key, sizeof a->key); + ds_put_cstr(&s, "\nReceived Key:\n"); + ds_put_hex(&s, &b->key, sizeof b->key); + ds_put_cstr(&s, "\nExpected Masked Key:\n"); + ds_put_hex(&s, key_a, sizeof key_a); + ds_put_cstr(&s, "\nReceived Masked Key:\n"); + ds_put_hex(&s, key_b, sizeof key_b); + + if (a->action_count != b->action_count) { + /* If action count is not equal, we print all actions to see which + * ones are missing. */ + const struct tc_action *action; + int i; + + ds_put_cstr(&s, "\nExpected Actions:\n"); + for (i = 0, action = a->actions; i < a->action_count; i++, action++) { + ds_put_cstr(&s, " - "); + ds_put_hex(&s, action, sizeof *action); + ds_put_cstr(&s, "\n"); + } + ds_put_cstr(&s, "Received Actions:\n"); + for (i = 0, action = b->actions; i < b->action_count; i++, action++) { + ds_put_cstr(&s, " - "); + ds_put_hex(&s, action, sizeof *action); + ds_put_cstr(&s, "\n"); + } + } else { + /* Only dump the delta in actions. */ + const struct tc_action *action_a = a->actions; + const struct tc_action *action_b = b->actions; + + for (int i = 0; i < a->action_count; i++, action_a++, action_b++) { + if (memcmp(action_a, action_b, sizeof *action_a)) { + ds_put_format(&s, + "\nAction %d mismatch:\n - Expected Action: ", + i); + ds_put_hex(&s, action_a, sizeof *action_a); + ds_put_cstr(&s, "\n - Received Action: "); + ds_put_hex(&s, action_b, sizeof *action_b); + } + } + } + VLOG_DBG_RL(&error_rl, "%s%s", msg, ds_cstr(&s)); + ds_destroy(&s); +} + static bool cmp_tc_flower_match_action(const struct tc_flower *a, const struct tc_flower *b) { if (memcmp(&a->mask, &b->mask, sizeof a->mask)) { - VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare"); + log_tc_flower_match("tc flower compare failed mask compare:", a, b); return false; } @@ -2998,8 +3132,8 @@ cmp_tc_flower_match_action(const struct tc_flower *a, uint8_t key_b = ((uint8_t *)&b->key)[i] & mask; if (key_a != key_b) { - VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at " - "%d", i); + log_tc_flower_match("tc flower compare failed masked key compare:", + a, b); return false; } } @@ -3009,14 +3143,15 @@ cmp_tc_flower_match_action(const struct tc_flower *a, const struct tc_action *action_b = b->actions; if (a->action_count != b->action_count) { - VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check"); + log_tc_flower_match("tc flower compare failed action length check", + a, b); return false; } for (int i = 0; i < a->action_count; i++, action_a++, action_b++) { if (memcmp(action_a, action_b, sizeof *action_a)) { - VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare " - "for %d", i); + log_tc_flower_match("tc flower compare failed action compare", + a, b); return false; } } diff --git a/lib/tc.h b/lib/tc.h index a147ca461d..d6cdddd169 100644 --- a/lib/tc.h +++ b/lib/tc.h @@ -256,11 +256,23 @@ struct tc_action { bool force; bool commit; } ct; + + struct { + struct tc_flower_key key; + struct tc_flower_key mask; + } rewrite; }; enum tc_action_type type; }; +/* assert that if we overflow with a masked write of uint32_t to the last byte + * of action.rewrite we overflow inside struct tc_action. + * shouldn't happen unless someone moves rewrite to the end of action */ +BUILD_ASSERT_DECL(offsetof(struct tc_action, rewrite) + + MEMBER_SIZEOF(struct tc_action, rewrite) + + sizeof(uint32_t) - 2 < sizeof(struct tc_action)); + enum tc_offloaded_state { TC_OFFLOADED_STATE_UNDEFINED, TC_OFFLOADED_STATE_IN_HW, @@ -330,15 +342,10 @@ struct tc_flower { int action_count; struct tc_action actions[TCA_ACT_MAX_NUM]; - struct ovs_flow_stats stats; + struct ovs_flow_stats stats_sw; + struct ovs_flow_stats stats_hw; uint64_t lastused; - struct { - bool rewrite; - struct tc_flower_key key; - struct tc_flower_key mask; - } rewrite; - uint32_t csum_update_flags; bool tunnel; @@ -352,13 +359,6 @@ struct tc_flower { enum tc_offload_policy tc_policy; }; -/* assert that if we overflow with a masked write of uint32_t to the last byte - * of flower.rewrite we overflow inside struct flower. - * shouldn't happen unless someone moves rewrite to the end of flower */ -BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite) - + MEMBER_SIZEOF(struct tc_flower, rewrite) - + sizeof(uint32_t) - 2 < sizeof(struct tc_flower)); - int tc_replace_flower(struct tcf_id *id, struct tc_flower *flower); int tc_del_filter(struct tcf_id *id); int tc_get_flower(struct tcf_id *id, struct tc_flower *flower); diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c index 5bda4af7e0..995c88bf17 100644 --- a/lib/tnl-neigh-cache.c +++ b/lib/tnl-neigh-cache.c @@ -32,6 +32,7 @@ #include "errno.h" #include "flow.h" #include "netdev.h" +#include "ovs-atomic.h" #include "ovs-thread.h" #include "packets.h" #include "openvswitch/poll-loop.h" @@ -44,14 +45,13 @@ #include "openvswitch/vlog.h" -/* In seconds */ -#define NEIGH_ENTRY_DEFAULT_IDLE_TIME (15 * 60) +#define NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS (15 * 60 * 1000) struct tnl_neigh_entry { struct cmap_node cmap_node; struct in6_addr ip; struct eth_addr mac; - time_t expires; /* Expiration time. */ + atomic_llong expires; /* Expiration time in ms. */ char br_name[IFNAMSIZ]; }; @@ -64,6 +64,16 @@ tnl_neigh_hash(const struct in6_addr *ip) return hash_bytes(ip->s6_addr, 16, 0); } +static bool +tnl_neigh_expired(struct tnl_neigh_entry *neigh) +{ + long long expires; + + atomic_read_explicit(&neigh->expires, &expires, memory_order_acquire); + + return expires <= time_msec(); +} + static struct tnl_neigh_entry * tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst) { @@ -73,11 +83,13 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst) hash = tnl_neigh_hash(dst); CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) { if (ipv6_addr_equals(&neigh->ip, dst) && !strcmp(neigh->br_name, br_name)) { - if (neigh->expires <= time_now()) { + if (tnl_neigh_expired(neigh)) { return NULL; } - neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME; + atomic_store_explicit(&neigh->expires, time_msec() + + NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS, + memory_order_release); return neigh; } } @@ -113,15 +125,16 @@ tnl_neigh_delete(struct tnl_neigh_entry *neigh) ovsrcu_postpone(neigh_entry_free, neigh); } -static void -tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst, - const struct eth_addr mac) +void +tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst, + const struct eth_addr mac) { ovs_mutex_lock(&mutex); struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst); if (neigh) { if (eth_addr_equals(neigh->mac, mac)) { - neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME; + atomic_store_relaxed(&neigh->expires, time_msec() + + NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS); ovs_mutex_unlock(&mutex); return; } @@ -133,7 +146,8 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst, neigh->ip = *dst; neigh->mac = mac; - neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME; + atomic_store_relaxed(&neigh->expires, time_msec() + + NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS); ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name); cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip)); ovs_mutex_unlock(&mutex); @@ -144,12 +158,12 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst, const struct eth_addr mac) { struct in6_addr dst6 = in6_addr_mapped_ipv4(dst); - tnl_neigh_set__(name, &dst6, mac); + tnl_neigh_set(name, &dst6, mac); } static int tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc, - const char name[IFNAMSIZ]) + const char name[IFNAMSIZ], bool allow_update) { /* Snoop normal ARP replies and gratuitous ARP requests/replies only */ if (!is_arp(flow) @@ -159,13 +173,17 @@ tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc, return EINVAL; } - tnl_arp_set(name, FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src), flow->arp_sha); + memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); + + if (allow_update) { + tnl_arp_set(name, flow->nw_src, flow->arp_sha); + } return 0; } static int tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc, - const char name[IFNAMSIZ]) + const char name[IFNAMSIZ], bool allow_update) { if (!is_nd(flow, wc) || flow->tp_src != htons(ND_NEIGHBOR_ADVERT)) { return EINVAL; @@ -184,20 +202,22 @@ tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc, memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst); memset(&wc->masks.nd_target, 0xff, sizeof wc->masks.nd_target); - tnl_neigh_set__(name, &flow->nd_target, flow->arp_tha); + if (allow_update) { + tnl_neigh_set(name, &flow->nd_target, flow->arp_tha); + } return 0; } int tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc, - const char name[IFNAMSIZ]) + const char name[IFNAMSIZ], bool allow_update) { int res; - res = tnl_arp_snoop(flow, wc, name); + res = tnl_arp_snoop(flow, wc, name, allow_update); if (res != EINVAL) { return res; } - return tnl_nd_snoop(flow, wc, name); + return tnl_nd_snoop(flow, wc, name, allow_update); } void @@ -208,7 +228,7 @@ tnl_neigh_cache_run(void) ovs_mutex_lock(&mutex); CMAP_FOR_EACH(neigh, cmap_node, &table) { - if (neigh->expires <= time_now()) { + if (tnl_neigh_expired(neigh)) { tnl_neigh_delete(neigh); changed = true; } @@ -294,7 +314,7 @@ tnl_neigh_cache_add(struct unixctl_conn *conn, int argc OVS_UNUSED, return; } - tnl_neigh_set__(br_name, &ip6, mac); + tnl_neigh_set(br_name, &ip6, mac); unixctl_command_reply(conn, "OK"); } @@ -319,7 +339,7 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED, ds_put_format(&ds, ETH_ADDR_FMT" %s", ETH_ADDR_ARGS(neigh->mac), neigh->br_name); - if (neigh->expires <= time_now()) { + if (tnl_neigh_expired(neigh)) { ds_put_format(&ds, " STALE"); } ds_put_char(&ds, '\n'); diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h index e4b42b0594..877bca3127 100644 --- a/lib/tnl-neigh-cache.h +++ b/lib/tnl-neigh-cache.h @@ -32,7 +32,9 @@ #include "util.h" int tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc, - const char dev_name[IFNAMSIZ]); + const char dev_name[IFNAMSIZ], bool allow_update); +void tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst, + const struct eth_addr mac); int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr *dst, struct eth_addr *mac); void tnl_neigh_cache_init(void); diff --git a/ofproto/bond.c b/ofproto/bond.c index a4116588f4..2c0ad5ef84 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -1253,7 +1253,7 @@ insert_bal(struct ovs_list *bals, struct bond_member *member) break; } } - ovs_list_insert(&pos->bal_node, &member->bal_node); + ovs_list_insert(pos ? &pos->bal_node : bals, &member->bal_node); } /* Removes 'member' from its current list and then inserts it into 'bals' so diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c index 796eb6f881..92692470fd 100644 --- a/ofproto/ofproto-dpif-ipfix.c +++ b/ofproto/ofproto-dpif-ipfix.c @@ -926,17 +926,21 @@ dpif_ipfix_bridge_exporter_destroy(struct dpif_ipfix_bridge_exporter *exporter) static void dpif_ipfix_bridge_exporter_set_options( struct dpif_ipfix_bridge_exporter *exporter, - const struct ofproto_ipfix_bridge_exporter_options *options) + const struct ofproto_ipfix_bridge_exporter_options *options, + bool *options_changed) { - bool options_changed; - if (!options || sset_is_empty(&options->targets)) { /* No point in doing any work if there are no targets. */ - dpif_ipfix_bridge_exporter_clear(exporter); + if (exporter->options) { + dpif_ipfix_bridge_exporter_clear(exporter); + *options_changed = true; + } else { + *options_changed = false; + } return; } - options_changed = ( + *options_changed = ( !exporter->options || !ofproto_ipfix_bridge_exporter_options_equal( options, exporter->options)); @@ -945,7 +949,7 @@ dpif_ipfix_bridge_exporter_set_options( * shortchanged in collectors (which indicates that opening one or * more of the configured collectors failed, so that we should * retry). */ - if (options_changed + if (*options_changed || collectors_count(exporter->exporter.collectors) < sset_count(&options->targets)) { if (!dpif_ipfix_exporter_set_options( @@ -957,7 +961,7 @@ dpif_ipfix_bridge_exporter_set_options( } /* Avoid reconfiguring if options didn't change. */ - if (!options_changed) { + if (!*options_changed) { return; } @@ -1015,17 +1019,21 @@ dpif_ipfix_flow_exporter_destroy(struct dpif_ipfix_flow_exporter *exporter) static bool dpif_ipfix_flow_exporter_set_options( struct dpif_ipfix_flow_exporter *exporter, - const struct ofproto_ipfix_flow_exporter_options *options) + const struct ofproto_ipfix_flow_exporter_options *options, + bool *options_changed) { - bool options_changed; - if (sset_is_empty(&options->targets)) { /* No point in doing any work if there are no targets. */ - dpif_ipfix_flow_exporter_clear(exporter); + if (exporter->options) { + dpif_ipfix_flow_exporter_clear(exporter); + *options_changed = true; + } else { + *options_changed = false; + } return true; } - options_changed = ( + *options_changed = ( !exporter->options || !ofproto_ipfix_flow_exporter_options_equal( options, exporter->options)); @@ -1034,7 +1042,7 @@ dpif_ipfix_flow_exporter_set_options( * shortchanged in collectors (which indicates that opening one or * more of the configured collectors failed, so that we should * retry). */ - if (options_changed + if (*options_changed || collectors_count(exporter->exporter.collectors) < sset_count(&options->targets)) { if (!dpif_ipfix_exporter_set_options( @@ -1046,7 +1054,7 @@ dpif_ipfix_flow_exporter_set_options( } /* Avoid reconfiguring if options didn't change. */ - if (!options_changed) { + if (!*options_changed) { return true; } @@ -1069,7 +1077,7 @@ remove_flow_exporter(struct dpif_ipfix *di, free(node); } -void +bool dpif_ipfix_set_options( struct dpif_ipfix *di, const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options, @@ -1077,16 +1085,19 @@ dpif_ipfix_set_options( size_t n_flow_exporters_options) OVS_EXCLUDED(mutex) { int i; + bool beo_changed, feo_changed, entry_changed; struct ofproto_ipfix_flow_exporter_options *options; struct dpif_ipfix_flow_exporter_map_node *node, *next; ovs_mutex_lock(&mutex); dpif_ipfix_bridge_exporter_set_options(&di->bridge_exporter, - bridge_exporter_options); + bridge_exporter_options, + &beo_changed); /* Add new flow exporters and update current flow exporters. */ options = (struct ofproto_ipfix_flow_exporter_options *) flow_exporters_options; + feo_changed = false; for (i = 0; i < n_flow_exporters_options; i++) { node = dpif_ipfix_find_flow_exporter_map_node( di, options->collector_set_id); @@ -1095,10 +1106,14 @@ dpif_ipfix_set_options( dpif_ipfix_flow_exporter_init(&node->exporter); hmap_insert(&di->flow_exporter_map, &node->node, hash_int(options->collector_set_id, 0)); + feo_changed = true; } - if (!dpif_ipfix_flow_exporter_set_options(&node->exporter, options)) { + if (!dpif_ipfix_flow_exporter_set_options(&node->exporter, + options, + &entry_changed)) { remove_flow_exporter(di, node); } + feo_changed = entry_changed ? true : feo_changed; options++; } @@ -1117,10 +1132,12 @@ dpif_ipfix_set_options( } if (i == n_flow_exporters_options) { /* Not found. */ remove_flow_exporter(di, node); + feo_changed = true; } } ovs_mutex_unlock(&mutex); + return beo_changed || feo_changed; } struct dpif_ipfix * diff --git a/ofproto/ofproto-dpif-ipfix.h b/ofproto/ofproto-dpif-ipfix.h index 1f42cd5275..75c0ab81ac 100644 --- a/ofproto/ofproto-dpif-ipfix.h +++ b/ofproto/ofproto-dpif-ipfix.h @@ -48,7 +48,7 @@ bool dpif_ipfix_get_bridge_exporter_output_sampling(const struct dpif_ipfix *); bool dpif_ipfix_get_flow_exporter_tunnel_sampling(const struct dpif_ipfix *, const uint32_t); bool dpif_ipfix_is_tunnel_port(const struct dpif_ipfix *, odp_port_t); -void dpif_ipfix_set_options( +bool dpif_ipfix_set_options( struct dpif_ipfix *, const struct ofproto_ipfix_bridge_exporter_options *, const struct ofproto_ipfix_flow_exporter_options *, size_t); diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index 864c136b5d..0f4a61ac6b 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -468,7 +468,8 @@ sflow_choose_agent_address(const char *agent_device, const char *target; SSET_FOR_EACH (target, targets) { struct sockaddr_storage ss; - if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss, true)) { + if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, + &ss, true, NULL)) { /* sFlow only supports target in default routing table with * packet mark zero. */ diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 1c9c720f04..57f94df544 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -2971,11 +2971,11 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, } ds_put_char(&ds, '\n'); - for (i = 0; i < n_revalidators; i++) { + for (i = 0; i < udpif->n_revalidators; i++) { struct revalidator *revalidator = &udpif->revalidators[i]; int j, elements = 0; - for (j = i; j < N_UMAPS; j += n_revalidators) { + for (j = i; j < N_UMAPS; j += udpif->n_revalidators) { elements += cmap_count(&udpif->ukeys[j].cmap); } ds_put_format(&ds, " %u: (keys %d)\n", revalidator->id, elements); diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c index dcc91cb380..9224ee2e6d 100644 --- a/ofproto/ofproto-dpif-xlate-cache.c +++ b/ofproto/ofproto-dpif-xlate-cache.c @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) { switch (entry->type) { case XC_TABLE: + ofproto_unref(&(entry->table.ofproto->up)); break; case XC_RULE: ofproto_rule_unref(&entry->rule->up); @@ -231,6 +232,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) free(entry->learn.ofm); break; case XC_NORMAL: + ofproto_unref(&(entry->normal.ofproto->up)); break; case XC_FIN_TIMEOUT: /* 'u.fin.rule' is always already held as a XC_RULE, which diff --git a/ofproto/ofproto-dpif-xlate-cache.h b/ofproto/ofproto-dpif-xlate-cache.h index 114aff8ea3..0fc6d2ea60 100644 --- a/ofproto/ofproto-dpif-xlate-cache.h +++ b/ofproto/ofproto-dpif-xlate-cache.h @@ -61,9 +61,8 @@ enum xc_type { * that a flow relates to, although they may be used for other effects as well * (for instance, refreshing hard timeouts for learned flows). * - * An explicit reference is taken to all pointers other than the ones for - * struct ofproto_dpif. ofproto_dpif pointers are explicitly protected by - * destroying all xlate caches before the ofproto is destroyed. */ + * An explicit reference is taken to all pointers. + */ struct xc_entry { enum xc_type type; union { diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index a426fcfeb6..b8886105df 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -460,7 +460,7 @@ static void xlate_commit_actions(struct xlate_ctx *ctx); static void patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, - struct xport *out_dev); + struct xport *out_dev, bool is_last_action); static void ctx_trigger_freeze(struct xlate_ctx *ctx) @@ -865,7 +865,7 @@ xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge) ovs_list_init(&xbridge->xbundles); hmap_init(&xbridge->xports); hmap_insert(&xcfg->xbridges, &xbridge->hmap_node, - hash_pointer(xbridge->ofproto, 0)); + uuid_hash(&xbridge->ofproto->uuid)); } static void @@ -1639,7 +1639,7 @@ xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto) xbridges = &xcfg->xbridges; - HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0), + HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, uuid_hash(&ofproto->uuid), xbridges) { if (xbridge->ofproto == ofproto) { return xbridge; @@ -1661,6 +1661,23 @@ xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid) return NULL; } +struct ofproto_dpif * +xlate_ofproto_lookup(const struct uuid *uuid) +{ + struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); + struct xbridge *xbridge; + + if (!xcfg) { + return NULL; + } + + xbridge = xbridge_lookup_by_uuid(xcfg, uuid); + if (xbridge != NULL) { + return xbridge->ofproto; + } + return NULL; +} + static struct xbundle * xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle) { @@ -2125,9 +2142,14 @@ mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle, int snaplen; /* Get the details of the mirror represented by the rightmost 1-bit. */ - ovs_assert(mirror_get(xbridge->mbridge, raw_ctz(mirrors), - &vlans, &dup_mirrors, - &out, &snaplen, &out_vlan)); + if (OVS_UNLIKELY(!mirror_get(xbridge->mbridge, raw_ctz(mirrors), + &vlans, &dup_mirrors, + &out, &snaplen, &out_vlan))) { + /* The mirror got reconfigured before we got to read it's + * configuration. */ + mirrors = zero_rightmost_1bit(mirrors); + continue; + } /* If this mirror selects on the basis of VLAN, and it does not select @@ -3015,7 +3037,7 @@ xlate_normal(struct xlate_ctx *ctx) bool is_grat_arp = is_gratuitous_arp(flow, wc); if (ctx->xin->allow_side_effects && flow->packet_type == htonl(PT_ETH) - && in_port->pt_mode != NETDEV_PT_LEGACY_L3 + && in_port && in_port->pt_mode != NETDEV_PT_LEGACY_L3 ) { update_learning_table(ctx, in_xbundle, flow->dl_src, vlan, is_grat_arp); @@ -3024,12 +3046,14 @@ xlate_normal(struct xlate_ctx *ctx) struct xc_entry *entry; /* Save just enough info to update mac learning table later. */ - entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL); - entry->normal.ofproto = ctx->xbridge->ofproto; - entry->normal.in_port = flow->in_port.ofp_port; - entry->normal.dl_src = flow->dl_src; - entry->normal.vlan = vlan; - entry->normal.is_gratuitous_arp = is_grat_arp; + if (ofproto_try_ref(&ctx->xbridge->ofproto->up)) { + entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL); + entry->normal.ofproto = ctx->xbridge->ofproto; + entry->normal.in_port = flow->in_port.ofp_port; + entry->normal.dl_src = flow->dl_src; + entry->normal.vlan = vlan; + entry->normal.is_gratuitous_arp = is_grat_arp; + } } /* Determine output bundle. */ @@ -3048,7 +3072,6 @@ xlate_normal(struct xlate_ctx *ctx) */ ctx->xout->slow |= SLOW_ACTION; - memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); if (mcast_snooping_is_membership(flow->tp_src) || mcast_snooping_is_query(flow->tp_src)) { if (ctx->xin->allow_side_effects && ctx->xin->packet) { @@ -3272,7 +3295,9 @@ compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port) struct dpif_ipfix *ipfix = ctx->xbridge->ipfix; odp_port_t tunnel_out_port = ODPP_NONE; - if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) { + if (!ipfix || + (output_odp_port == ODPP_NONE && + ctx->xin->flow.in_port.ofp_port == OFPP_NONE)) { return; } @@ -3521,6 +3546,9 @@ propagate_tunnel_data_to_flow__(struct flow *dst_flow, dst_flow->dl_dst = dmac; dst_flow->dl_src = smac; + /* Clear VLAN entries which do not apply for tunnel flows. */ + memset(dst_flow->vlans, 0, sizeof dst_flow->vlans); + dst_flow->packet_type = htonl(PT_ETH); dst_flow->nw_dst = src_flow->tunnel.ip_dst; dst_flow->nw_src = src_flow->tunnel.ip_src; @@ -3598,7 +3626,7 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac, static int native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport, const struct flow *flow, odp_port_t tunnel_odp_port, - bool truncate) + bool truncate, bool is_last_action) { struct netdev_tnl_build_header_params tnl_params; struct ovs_action_push_tnl tnl_push_data; @@ -3728,7 +3756,7 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport, entry->tunnel_hdr.hdr_size = tnl_push_data.header_len; entry->tunnel_hdr.operation = ADD; - patch_port_output(ctx, xport, out_dev); + patch_port_output(ctx, xport, out_dev, is_last_action); /* Similar to the stats update in revalidation, the x_cache entries * are populated by the previous translation are used to update the @@ -3822,7 +3850,7 @@ xlate_flow_is_protected(const struct xlate_ctx *ctx, const struct flow *flow, co */ static void patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, - struct xport *out_dev) + struct xport *out_dev, bool is_last_action) { struct flow *flow = &ctx->xin->flow; struct flow old_flow = ctx->xin->flow; @@ -3864,8 +3892,9 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, if (!process_special(ctx, out_dev) && may_receive(out_dev, ctx)) { if (xport_stp_forward_state(out_dev) && xport_rstp_forward_state(out_dev)) { + xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true, - false, true, clone_xlate_actions); + false, is_last_action, clone_xlate_actions); if (!ctx->freezing) { xlate_action_set(ctx); } @@ -3880,7 +3909,7 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, mirror_mask_t old_mirrors2 = ctx->mirrors; xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true, - false, true, clone_xlate_actions); + false, is_last_action, clone_xlate_actions); ctx->mirrors = old_mirrors2; ctx->base_flow = old_base_flow; ctx->odp_actions->size = old_size; @@ -4097,7 +4126,21 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow, (flow->dl_type == htons(ETH_TYPE_ARP) || flow->nw_proto == IPPROTO_ICMPV6) && is_neighbor_reply_correct(ctx, flow)) { - tnl_neigh_snoop(flow, wc, ctx->xbridge->name); + tnl_neigh_snoop(flow, wc, ctx->xbridge->name, + ctx->xin->allow_side_effects); + } else if (*tnl_port != ODPP_NONE && + ctx->xin->allow_side_effects && + dl_type_is_ip_any(flow->dl_type)) { + struct eth_addr mac = flow->dl_src; + struct in6_addr s_ip6; + + if (flow->dl_type == htons(ETH_TYPE_IP)) { + in6_addr_set_mapped_ipv4(&s_ip6, flow->nw_src); + } else { + s_ip6 = flow->ipv6_src; + } + + tnl_neigh_set(ctx->xbridge->name, &s_ip6, mac); } } @@ -4107,7 +4150,7 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow, static void compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, const struct xlate_bond_recirc *xr, bool check_stp, - bool is_last_action OVS_UNUSED, bool truncate) + bool is_last_action, bool truncate) { const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port); struct flow_wildcards *wc = ctx->wc; @@ -4137,6 +4180,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, if (xport->pt_mode == NETDEV_PT_LEGACY_L3) { flow->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE, ntohs(flow->dl_type)); + if (ctx->pending_encap) { + /* The Ethernet header was not actually added yet. */ + ctx->pending_encap = false; + } } } @@ -4144,7 +4191,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, if (truncate) { xlate_report_error(ctx, "Cannot truncate output to patch port"); } - patch_port_output(ctx, xport, xport->peer); + patch_port_output(ctx, xport, xport->peer, is_last_action); return; } @@ -4239,7 +4286,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, xr->recirc_id); } else if (is_native_tunnel) { /* Output to native tunnel port. */ - native_tunnel_output(ctx, xport, flow, odp_port, truncate); + native_tunnel_output(ctx, xport, flow, odp_port, truncate, + is_last_action); flow->tunnel = flow_tnl; /* Restore tunnel metadata */ } else if (terminate_native_tunnel(ctx, flow, wc, @@ -6177,11 +6225,32 @@ static void compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, bool is_last_action) { - ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label; - uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark; - size_t ct_offset; uint16_t zone; + if (ofc->zone_src.field) { + union mf_subvalue value; + memset(&value, 0xff, sizeof(value)); + + zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow); + if (ctx->xin->frozen_state) { + /* If the upcall is a resume of a recirculation, we only need to + * unwildcard the fields that are not in the frozen_metadata, as + * when the rules update, OVS will generate a new recirc_id, + * which will invalidate the megaflow with old the recirc_id. + */ + if (!mf_is_frozen_metadata(ofc->zone_src.field)) { + mf_write_subfield_flow(&ofc->zone_src, &value, + &ctx->wc->masks); + } + } else { + mf_write_subfield_flow(&ofc->zone_src, &value, &ctx->wc->masks); + } + } else { + zone = ofc->zone_imm; + } + size_t ct_offset; + ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label; + uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark; /* Ensure that any prior actions are applied before composing the new * conntrack action. */ xlate_commit_actions(ctx); @@ -6193,11 +6262,6 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx, is_last_action, false); - if (ofc->zone_src.field) { - zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow); - } else { - zone = ofc->zone_imm; - } ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT); if (ofc->flags & NX_CT_F_COMMIT) { @@ -6333,6 +6397,7 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, * then ctx->exit would be true. Reset to false so that we can * do flow translation for 'IF_LESS_EQUAL' case. finish_freezing() * would have taken care of Undoing the changes done for freeze. */ + bool old_exit = ctx->exit; ctx->exit = false; offset_attr = nl_msg_start_nested( @@ -6357,7 +6422,7 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, ctx->was_mpls = old_was_mpls; ctx->conntracked = old_conntracked; ctx->xin->flow = old_flow; - ctx->exit = true; + ctx->exit = old_exit; } static void @@ -6738,13 +6803,14 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, return; } + bool exit = false; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { struct ofpact_controller *controller; const struct ofpact_metadata *metadata; const struct ofpact_set_field *set_field; const struct mf_field *mf; bool last = is_last_action && ofpact_last(a, ofpacts, ofpacts_len) - && ctx->action_set.size; + && !ctx->action_set.size; if (ctx->error) { break; @@ -6752,7 +6818,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, recirc_for_mpls(a, ctx); - if (ctx->exit) { + if (ctx->exit || exit) { /* Check if need to store the remaining actions for later * execution. */ if (ctx->freezing) { @@ -7149,17 +7215,18 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, break; case OFPACT_CHECK_PKT_LARGER: { - if (last) { - /* If this is last action, then there is no need to - * translate the action. */ - break; - } const struct ofpact *remaining_acts = ofpact_next(a); size_t remaining_acts_len = ofpact_remaining_len(remaining_acts, ofpacts, ofpacts_len); xlate_check_pkt_larger(ctx, ofpact_get_CHECK_PKT_LARGER(a), remaining_acts, remaining_acts_len); + if (ctx->xbridge->support.check_pkt_len) { + /* If datapath supports check_pkt_len, then + * xlate_check_pkt_larger() does the translation for the + * ofpacts following 'a'. */ + exit = true; + } break; } } @@ -7623,6 +7690,12 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) goto exit; } + if (!xin->frozen_state + && xin->flow.ct_state + && xin->flow.ct_state & CS_TRACKED) { + ctx.conntracked = true; + } + /* Tunnel metadata in udpif format must be normalized before translation. */ if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) { const struct tun_table *tun_tab = ofproto_get_tun_tab( diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index 851088d794..2ba90e999c 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -176,6 +176,7 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name, struct dpif *, bool forward_bpdu, bool has_in_band, const struct dpif_backer_support *support); void xlate_remove_ofproto(struct ofproto_dpif *); +struct ofproto_dpif *xlate_ofproto_lookup(const struct uuid *uuid); void xlate_bundle_set(struct ofproto_dpif *, struct ofbundle *, const char *name, enum port_vlan_mode, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index cba49a99e1..59eae88d87 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -215,10 +215,6 @@ struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers); static struct hmap all_ofproto_dpifs_by_name = HMAP_INITIALIZER(&all_ofproto_dpifs_by_name); -/* All existing ofproto_dpif instances, indexed by ->uuid. */ -static struct hmap all_ofproto_dpifs_by_uuid = - HMAP_INITIALIZER(&all_ofproto_dpifs_by_uuid); - static bool ofproto_use_tnl_push_pop = true; static void ofproto_unixctl_init(void); static void ct_zone_config_init(struct dpif_backer *backer); @@ -1682,9 +1678,6 @@ construct(struct ofproto *ofproto_) hmap_insert(&all_ofproto_dpifs_by_name, &ofproto->all_ofproto_dpifs_by_name_node, hash_string(ofproto->up.name, 0)); - hmap_insert(&all_ofproto_dpifs_by_uuid, - &ofproto->all_ofproto_dpifs_by_uuid_node, - uuid_hash(&ofproto->uuid)); memset(&ofproto->stats, 0, sizeof ofproto->stats); ofproto_init_tables(ofproto_, N_TABLES); @@ -1782,8 +1775,6 @@ destruct(struct ofproto *ofproto_, bool del) hmap_remove(&all_ofproto_dpifs_by_name, &ofproto->all_ofproto_dpifs_by_name_node); - hmap_remove(&all_ofproto_dpifs_by_uuid, - &ofproto->all_ofproto_dpifs_by_uuid_node); OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { CLS_FOR_EACH (rule, up.cr, &table->cls) { @@ -1819,6 +1810,8 @@ destruct(struct ofproto *ofproto_, bool del) seq_destroy(ofproto->ams_seq); + /* Wait for all the meter destroy work to finish. */ + ovsrcu_barrier(); close_dpif_backer(ofproto->backer, del); } @@ -2308,6 +2301,7 @@ set_ipfix( struct dpif_ipfix *di = ofproto->ipfix; bool has_options = bridge_exporter_options || flow_exporters_options; bool new_di = false; + bool options_changed = false; if (has_options && !di) { di = ofproto->ipfix = dpif_ipfix_create(); @@ -2317,7 +2311,7 @@ set_ipfix( if (di) { /* Call set_options in any case to cleanly flush the flow * caches in the last exporters that are to be destroyed. */ - dpif_ipfix_set_options( + options_changed = dpif_ipfix_set_options( di, bridge_exporter_options, flow_exporters_options, n_flow_exporters_options); @@ -2333,6 +2327,10 @@ set_ipfix( dpif_ipfix_unref(di); ofproto->ipfix = NULL; } + + if (new_di || options_changed) { + ofproto->backer->need_revalidate = REV_RECONFIGURE; + } } return 0; @@ -4433,12 +4431,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig); } if (xcache) { - struct xc_entry *entry; + if (ofproto_try_ref(&ofproto->up)) { + struct xc_entry *entry; - entry = xlate_cache_add_entry(xcache, XC_TABLE); - entry->table.ofproto = ofproto; - entry->table.id = *table_id; - entry->table.match = true; + entry = xlate_cache_add_entry(xcache, XC_TABLE); + entry->table.ofproto = ofproto; + entry->table.id = *table_id; + entry->table.match = true; + } } return rule; } @@ -4469,12 +4469,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, stats->n_packets, &orig); } if (xcache) { - struct xc_entry *entry; + if (ofproto_try_ref(&ofproto->up)) { + struct xc_entry *entry; - entry = xlate_cache_add_entry(xcache, XC_TABLE); - entry->table.ofproto = ofproto; - entry->table.id = next_id; - entry->table.match = (rule != NULL); + entry = xlate_cache_add_entry(xcache, XC_TABLE); + entry->table.ofproto = ofproto; + entry->table.id = next_id; + entry->table.match = (rule != NULL); + } } if (rule) { goto out; /* Match. */ @@ -5556,6 +5558,7 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id, ct_timeout_policy_unref(backer, ct_zone->ct_tp); ct_zone->ct_tp = ct_tp; ct_tp->ref_count++; + backer->need_revalidate = REV_RECONFIGURE; } } else { struct ct_zone *new_ct_zone = ct_zone_alloc(zone_id); @@ -5563,6 +5566,7 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id, cmap_insert(&backer->ct_zones, &new_ct_zone->node, hash_int(zone_id, 0)); ct_tp->ref_count++; + backer->need_revalidate = REV_RECONFIGURE; } } @@ -5579,6 +5583,7 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id) if (ct_zone) { ct_timeout_policy_unref(backer, ct_zone->ct_tp); ct_zone_remove_and_destroy(backer, ct_zone); + backer->need_revalidate = REV_RECONFIGURE; } } @@ -5779,15 +5784,7 @@ ofproto_dpif_lookup_by_name(const char *name) struct ofproto_dpif * ofproto_dpif_lookup_by_uuid(const struct uuid *uuid) { - struct ofproto_dpif *ofproto; - - HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node, - uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) { - if (uuid_equals(&ofproto->uuid, uuid)) { - return ofproto; - } - } - return NULL; + return xlate_ofproto_lookup(uuid); } static void @@ -6496,6 +6493,7 @@ ofproto_unixctl_dpif_show_dp_features(struct unixctl_conn *conn, dpif_show_support(&ofproto->backer->bt_support, &ds); unixctl_command_reply(conn, ds_cstr(&ds)); + ds_destroy(&ds); } static void diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 57c7d17cb2..47e96e62e1 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -66,6 +66,7 @@ struct bfd_cfg; struct meter; struct ofoperation; struct ofproto_packet_out; +struct rule_collection; struct smap; extern struct ovs_mutex ofproto_mutex; @@ -115,6 +116,9 @@ struct ofproto { /* List of expirable flows, in all flow tables. */ struct ovs_list expirable OVS_GUARDED_BY(ofproto_mutex); + /* List of flows to remove from flow tables. */ + struct rule_collection *to_remove OVS_GUARDED_BY(ofproto_mutex); + /* Meter table. */ struct ofputil_meter_features meter_features; struct hmap meters; /* uint32_t indexed 'struct meter *'. */ @@ -139,6 +143,8 @@ struct ofproto { /* Variable length mf_field mapping. Stores all configured variable length * meta-flow fields (struct mf_field) in a switch. */ struct vl_mff_map vl_mff_map; + /* refcount to this ofproto, held by rule/group/xlate_caches */ + struct ovs_refcount refcount; }; void ofproto_init_tables(struct ofproto *, int n_tables); @@ -1962,6 +1968,7 @@ struct ofproto_flow_mod { bool modify_may_add_flow; bool modify_keep_counts; enum nx_flow_update_event event; + uint8_t table_id; /* These are only used during commit execution. * ofproto_flow_mod_uninit() does NOT clean these up. */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index bd6103b1c8..7e09a588a2 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -213,6 +213,8 @@ static void ofproto_rule_insert__(struct ofproto *, struct rule *) OVS_REQUIRES(ofproto_mutex); static void ofproto_rule_remove__(struct ofproto *, struct rule *) OVS_REQUIRES(ofproto_mutex); +static void remove_rules_postponed(struct rule_collection *) + OVS_REQUIRES(ofproto_mutex); /* The source of an OpenFlow request. * @@ -530,6 +532,8 @@ ofproto_create(const char *datapath_name, const char *datapath_type, hindex_init(&ofproto->cookies); hmap_init(&ofproto->learned_cookies); ovs_list_init(&ofproto->expirable); + ofproto->to_remove = xzalloc(sizeof *ofproto->to_remove); + rule_collection_init(ofproto->to_remove); ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name); ofproto->min_mtu = INT_MAX; cmap_init(&ofproto->groups); @@ -545,6 +549,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, ovs_mutex_init(&ofproto->vl_mff_map.mutex); cmap_init(&ofproto->vl_mff_map.cmap); + ovs_refcount_init(&ofproto->refcount); error = ofproto->ofproto_class->construct(ofproto); if (error) { @@ -1631,6 +1636,7 @@ ofproto_flush__(struct ofproto *ofproto, bool del) } ofproto_group_delete_all__(ofproto); meter_delete_all(ofproto); + remove_rules_postponed(ofproto->to_remove); /* XXX: Concurrent handler threads may insert new learned flows based on * learn actions of the now deleted flows right after we release * 'ofproto_mutex'. */ @@ -1682,12 +1688,41 @@ ofproto_destroy__(struct ofproto *ofproto) ovs_assert(hmap_is_empty(&ofproto->learned_cookies)); hmap_destroy(&ofproto->learned_cookies); + ovs_mutex_lock(&ofproto_mutex); + rule_collection_destroy(ofproto->to_remove); + free(ofproto->to_remove); + ovs_mutex_unlock(&ofproto_mutex); + ofproto->ofproto_class->dealloc(ofproto); } -/* Destroying rules is doubly deferred, must have 'ofproto' around for them. - * - 1st we defer the removal of the rules from the classifier - * - 2nd we defer the actual destruction of the rules. */ +/* + * Rule destruction requires ofproto to remain accessible. + * Depending on the rule destruction call (shown in below), it can take several + * RCU grace periods before the ofproto reference is not needed anymore. + * The ofproto destruction callback is thus protected by a refcount, + * and such destruction is itself deferred. + * + * remove_rules_postponed (one grace period) + * -> remove_rule_rcu + * -> remove_rule_rcu__ + * -> ofproto_rule_unref -> ref count != 1 + * -> ... more grace periods. + * -> rule_destroy_cb (> 2 grace periods) + * -> free + * + * NOTE: The original ofproto destruction is only deferred by two grace + * periods to keep ofproto accessible. By using refcount together the + * destruction can be deferred for longer time. Now ofproto has 3 states: + * + * state 1: alive, with refcount >= 1 + * state 2: dying, with refcount == 0, however pointer is valid + * state 3: died, memory freed, pointer might be dangling. + * + * We only need to add refcount to certain objects whose destruction can + * take several RCU grace periods (rule, group, xlate_cache). Other + * references to ofproto must be cleared before the 2 RCU grace periods. + */ static void ofproto_destroy_defer__(struct ofproto *ofproto) OVS_EXCLUDED(ofproto_mutex) @@ -1695,6 +1730,26 @@ ofproto_destroy_defer__(struct ofproto *ofproto) ovsrcu_postpone(ofproto_destroy__, ofproto); } +void +ofproto_ref(struct ofproto *ofproto) +{ + ovs_refcount_ref(&ofproto->refcount); +} + +bool +ofproto_try_ref(struct ofproto *ofproto) +{ + return ovs_refcount_try_ref_rcu(&ofproto->refcount); +} + +void +ofproto_unref(struct ofproto *ofproto) +{ + if (ofproto && ovs_refcount_unref(&ofproto->refcount) == 1) { + ovsrcu_postpone(ofproto_destroy_defer__, ofproto); + } +} + void ofproto_destroy(struct ofproto *p, bool del) OVS_EXCLUDED(ofproto_mutex) @@ -1726,8 +1781,7 @@ ofproto_destroy(struct ofproto *p, bool del) p->connmgr = NULL; ovs_mutex_unlock(&ofproto_mutex); - /* Destroying rules is deferred, must have 'ofproto' around for them. */ - ovsrcu_postpone(ofproto_destroy_defer__, p); + ofproto_unref(p); } /* Destroys the datapath with the respective 'name' and 'type'. With the Linux @@ -1878,6 +1932,9 @@ ofproto_run(struct ofproto *p) connmgr_run(p->connmgr, handle_openflow); + ovs_mutex_lock(&ofproto_mutex); + remove_rules_postponed(p->to_remove); + ovs_mutex_unlock(&ofproto_mutex); return error; } @@ -2916,6 +2973,9 @@ ofproto_rule_destroy__(struct rule *rule) cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr)); rule_actions_destroy(rule_get_actions(rule)); ovs_mutex_destroy(&rule->mutex); + /* ofproto_unref() must be called first. It is possible because ofproto + * destruction is deferred by an RCU grace period. */ + ofproto_unref(rule->ofproto); rule->ofproto->ofproto_class->rule_dealloc(rule); } @@ -3056,6 +3116,9 @@ group_destroy_cb(struct ofgroup *group) &group->props)); ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *, &group->buckets)); + /* ofproto_unref() must be called first. It is possible because ofproto + * destruction is deferred by an RCU grace period. */ + ofproto_unref(group->ofproto); group->ofproto->ofproto_class->group_dealloc(group); } @@ -4437,6 +4500,20 @@ rule_criteria_destroy(struct rule_criteria *criteria) criteria->version = OVS_VERSION_NOT_REMOVED; /* Mark as destroyed. */ } +/* Adds rules to the 'to_remove' collection, so they can be destroyed + * later all together. Destroys 'rules'. */ +static void +rules_mark_for_removal(struct ofproto *ofproto, struct rule_collection *rules) + OVS_REQUIRES(ofproto_mutex) +{ + struct rule *rule; + + RULE_COLLECTION_FOR_EACH (rule, rules) { + rule_collection_add(ofproto->to_remove, rule); + } + rule_collection_destroy(rules); +} + /* Schedules postponed removal of rules, destroys 'rules'. */ static void remove_rules_postponed(struct rule_collection *rules) @@ -5244,10 +5321,15 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr, struct rule *rule; enum ofperr error; + if (!ofproto_try_ref(ofproto)) { + return OFPERR_OFPFMFC_UNKNOWN; + } + /* Allocate new rule. */ rule = ofproto->ofproto_class->rule_alloc(); if (!rule) { cls_rule_destroy(cr); + ofproto_unref(ofproto); VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); return OFPERR_OFPFMFC_UNKNOWN; } @@ -5833,7 +5915,7 @@ modify_flows_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, } } learned_cookies_flush(ofproto, &dead_cookies); - remove_rules_postponed(old_rules); + rules_mark_for_removal(ofproto, old_rules); } return error; @@ -5941,7 +6023,7 @@ delete_flows_finish__(struct ofproto *ofproto, learned_cookies_dec(ofproto, rule_get_actions(rule), &dead_cookies); } - remove_rules_postponed(rules); + rules_mark_for_removal(ofproto, rules); learned_cookies_flush(ofproto, &dead_cookies); } @@ -7312,8 +7394,13 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm, return OFPERR_OFPGMFC_BAD_TYPE; } + if (!ofproto_try_ref(ofproto)) { + return OFPERR_OFPFMFC_UNKNOWN; + } + *ofgroup = ofproto->ofproto_class->group_alloc(); if (!*ofgroup) { + ofproto_unref(ofproto); VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name); return OFPERR_OFPGMFC_OUT_OF_GROUPS; } @@ -7350,6 +7437,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm, &(*ofgroup)->props)); ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *, &(*ofgroup)->buckets)); + ofproto_unref(ofproto); ofproto->ofproto_class->group_dealloc(*ofgroup); } return error; @@ -7967,6 +8055,7 @@ ofproto_flow_mod_init(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, ofm->criteria.version = OVS_VERSION_NOT_REMOVED; ofm->conjs = NULL; ofm->n_conjs = 0; + ofm->table_id = fm->table_id; bool check_buffer_id = false; @@ -8104,6 +8193,33 @@ ofproto_flow_mod_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm, return error; } +static void +ofproto_table_classifier_defer(struct ofproto *ofproto, + const struct ofproto_flow_mod *ofm) +{ + if (check_table_id(ofproto, ofm->table_id)) { + if (ofm->table_id == OFPTT_ALL) { + struct oftable *table; + + OFPROTO_FOR_EACH_TABLE (table, ofproto) { + classifier_defer(&table->cls); + } + } else { + classifier_defer(&ofproto->tables[ofm->table_id].cls); + } + } +} + +static void +ofproto_publish_classifiers(struct ofproto *ofproto) +{ + struct oftable *table; + + OFPROTO_FOR_EACH_TABLE (table, ofproto) { + classifier_publish(&table->cls); + } +} + /* Commit phases (all while locking ofproto_mutex): * * 1. Begin: Gather resources and make changes visible in the next version. @@ -8165,6 +8281,10 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) /* Store the version in which the changes should take * effect. */ be->ofm.version = version; + /* Publishing of the classifier update for every flow + * modification in a bundle separately is expensive in + * CPU time and memory. Deferring. */ + ofproto_table_classifier_defer(ofproto, &be->ofm); error = ofproto_flow_mod_start(ofproto, &be->ofm); } else if (be->type == OFPTYPE_GROUP_MOD) { /* Store the version in which the changes should take @@ -8173,6 +8293,9 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) error = ofproto_group_mod_start(ofproto, &be->ogm); } else if (be->type == OFPTYPE_PACKET_OUT) { be->opo.version = version; + /* Need to use current version of flows for packet-out, + * so publishing all classifiers now. */ + ofproto_publish_classifiers(ofproto); error = ofproto_packet_out_start(ofproto, &be->opo); } else { OVS_NOT_REACHED(); @@ -8183,6 +8306,9 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) } } + /* Publishing all changes made to classifiers. */ + ofproto_publish_classifiers(ofproto); + if (error) { /* Send error referring to the original message. */ ofconn_send_error(ofconn, be->msg, error); @@ -8191,14 +8317,23 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags) /* 2. Revert. Undo all the changes made above. */ LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) { if (be->type == OFPTYPE_FLOW_MOD) { + /* Publishing of the classifier update for every flow + * modification in a bundle separately is expensive in + * CPU time and memory. Deferring. */ + ofproto_table_classifier_defer(ofproto, &be->ofm); ofproto_flow_mod_revert(ofproto, &be->ofm); } else if (be->type == OFPTYPE_GROUP_MOD) { ofproto_group_mod_revert(ofproto, &be->ogm); } else if (be->type == OFPTYPE_PACKET_OUT) { + /* Need to use current version of flows for packet-out, + * so publishing all classifiers now. */ + ofproto_publish_classifiers(ofproto); ofproto_packet_out_revert(ofproto, &be->opo); } /* Nothing needs to be reverted for a port mod. */ } + /* Publishing all changes made to classifiers. */ + ofproto_publish_classifiers(ofproto); } else { /* 4. Finish. */ LIST_FOR_EACH (be, node, &bundle->msg_list) { diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index b0262da2df..4e15167ab7 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -563,6 +563,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *, enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *, uint8_t table_id); +void ofproto_ref(struct ofproto *); +void ofproto_unref(struct ofproto *); +bool ofproto_try_ref(struct ofproto *); + #ifdef __cplusplus } #endif diff --git a/ovsdb/file.c b/ovsdb/file.c index 59220824fa..ca80c28235 100644 --- a/ovsdb/file.c +++ b/ovsdb/file.c @@ -113,19 +113,17 @@ ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting, if (row_contains_diff && !ovsdb_datum_is_default(&row->fields[column->index], &column->type)) { - struct ovsdb_datum new_datum; - - error = ovsdb_datum_apply_diff(&new_datum, + error = ovsdb_datum_apply_diff_in_place( &row->fields[column->index], &datum, &column->type); ovsdb_datum_destroy(&datum, &column->type); if (error) { return error; } - ovsdb_datum_swap(&datum, &new_datum); + } else { + ovsdb_datum_swap(&row->fields[column->index], &datum); + ovsdb_datum_destroy(&datum, &column->type); } - ovsdb_datum_swap(&row->fields[column->index], &datum); - ovsdb_datum_destroy(&datum, &column->type); } return NULL; @@ -526,6 +524,7 @@ ovsdb_file_read__(const char *filename, bool rw, error = ovsdb_txn_replay_commit(txn); if (error) { + ovsdb_error_destroy(error); ovsdb_storage_unread(storage); break; } diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 532dedcb64..ab814cf20e 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -1231,6 +1231,15 @@ ovsdb_monitor_get_update( condition, ovsdb_monitor_compose_row_update2); if (!condition || !condition->conditional) { + if (json) { + struct json *json_serialized; + + /* Pre-serializing the object to avoid doing this + * for every client. */ + json_serialized = json_serialized_object_create(json); + json_destroy(json); + json = json_serialized; + } ovsdb_monitor_json_cache_insert(dbmon, version, mcs, json); } diff --git a/ovsdb/mutation.c b/ovsdb/mutation.c index 56edc5f000..03d1c3499e 100644 --- a/ovsdb/mutation.c +++ b/ovsdb/mutation.c @@ -383,7 +383,7 @@ ovsdb_mutation_set_execute(struct ovsdb_row *row, break; case OVSDB_M_INSERT: - ovsdb_datum_union(dst, arg, dst_type, false); + ovsdb_datum_union(dst, arg, dst_type); error = ovsdb_mutation_check_count(dst, dst_type); break; diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in index 61cded16d3..a2ee10af1b 100755 --- a/ovsdb/ovsdb-idlc.in +++ b/ovsdb/ovsdb-idlc.in @@ -551,20 +551,20 @@ static void print(" smap_init(&row->%s);" % columnName) print(" for (size_t i = 0; i < datum->n; i++) {") print(" smap_add(&row->%s," % columnName) - print(" datum->keys[i].string,") - print(" datum->values[i].string);") + print(" datum->keys[i].s->string,") + print(" datum->values[i].s->string);") print(" }") elif (type.n_min == 1 and type.n_max == 1) or type.is_optional_pointer(): print("") print(" if (datum->n >= 1) {") if not type.key.ref_table: - print(" %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_string())) + print(" %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_rvalue_string())) else: print(" %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->keys[0].uuid));" % (keyVar, prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower())) if valueVar: if not type.value.ref_table: - print(" %s = datum->values[0].%s;" % (valueVar, type.value.type.to_string())) + print(" %s = datum->values[0].%s;" % (valueVar, type.value.type.to_rvalue_string())) else: print(" %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->values[0].uuid));" % (valueVar, prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower())) print(" } else {") @@ -592,7 +592,7 @@ static void """ % (prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower())) keySrc = "keyRow" else: - keySrc = "datum->keys[i].%s" % type.key.type.to_string() + keySrc = "datum->keys[i].%s" % type.key.type.to_rvalue_string() if type.value and type.value.ref_table: print("""\ struct %s%s *valueRow = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->values[i].uuid)); @@ -602,7 +602,7 @@ static void """ % (prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower())) valueSrc = "valueRow" elif valueVar: - valueSrc = "datum->values[i].%s" % type.value.type.to_string() + valueSrc = "datum->values[i].%s" % type.value.type.to_rvalue_string() print(" if (!row->n_%s) {" % (columnName)) print(" %s = xmalloc(%s * sizeof *%s);" % ( @@ -910,45 +910,45 @@ void 'args': ', '.join(['%(type)s%(name)s' % m for m in members])}) if type.n_min == 1 and type.n_max == 1: - print(" union ovsdb_atom key;") + print(" union ovsdb_atom *key = xmalloc(sizeof *key);") if type.value: - print(" union ovsdb_atom value;") + print(" union ovsdb_atom *value = xmalloc(sizeof *value);") print("") print(" datum.n = 1;") - print(" datum.keys = &key;") - print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar)) + print(" datum.keys = key;") + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar)) if type.value: - print(" datum.values = &value;") - print(" "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar)) + print(" datum.values = value;") + print(" " + type.value.copyCValue("value->%s" % type.value.type.to_lvalue_string(), valueVar)) else: print(" datum.values = NULL;") - txn_write_func = "ovsdb_idl_txn_write_clone" + txn_write_func = "ovsdb_idl_txn_write" elif type.is_optional_pointer(): - print(" union ovsdb_atom key;") print("") print(" if (%s) {" % keyVar) + print(" union ovsdb_atom *key = xmalloc(sizeof *key);") print(" datum.n = 1;") - print(" datum.keys = &key;") - print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar)) + print(" datum.keys = key;") + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar)) print(" } else {") print(" datum.n = 0;") print(" datum.keys = NULL;") print(" }") print(" datum.values = NULL;") - txn_write_func = "ovsdb_idl_txn_write_clone" + txn_write_func = "ovsdb_idl_txn_write" elif type.n_max == 1: - print(" union ovsdb_atom key;") print("") print(" if (%s) {" % nVar) + print(" union ovsdb_atom *key = xmalloc(sizeof *key);") print(" datum.n = 1;") - print(" datum.keys = &key;") - print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar)) + print(" datum.keys = key;") + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar)) print(" } else {") print(" datum.n = 0;") print(" datum.keys = NULL;") print(" }") print(" datum.values = NULL;") - txn_write_func = "ovsdb_idl_txn_write_clone" + txn_write_func = "ovsdb_idl_txn_write" else: print("") print(" datum.n = %s;" % nVar) @@ -958,9 +958,9 @@ void else: print(" datum.values = NULL;") print(" for (size_t i = 0; i < %s; i++) {" % nVar) - print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar)) + print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar)) if type.value: - print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar)) + print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar)) print(" }") if type.value: valueType = type.value.toAtomicType() @@ -996,9 +996,8 @@ void ''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix), 'valtype':column.type.value.to_const_c_type(prefix), 'S': structName.upper(), 'C': columnName.upper(), 't': tableName}) - - print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "new_key")) - print(" "+ type.value.copyCValue("datum->values[0].%s" % type.value.type.to_string(), "new_value")) + print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "new_key")) + print(" " + type.value.copyCValue("datum->values[0].%s" % type.value.type.to_lvalue_string(), "new_value")) print(''' ovsdb_idl_txn_write_partial_map(&row->header_, &%(s)s_col_%(c)s, @@ -1022,8 +1021,7 @@ void ''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix), 'valtype':column.type.value.to_const_c_type(prefix), 'S': structName.upper(), 'C': columnName.upper(), 't': tableName}) - - print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "delete_key")) + print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "delete_key")) print(''' ovsdb_idl_txn_delete_partial_map(&row->header_, &%(s)s_col_%(c)s, @@ -1049,8 +1047,7 @@ void datum->values = NULL; ''' % {'s': structName, 'c': columnName, 'valtype':column.type.key.to_const_c_type(prefix), 't': tableName}) - - print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "new_value")) + print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "new_value")) print(''' ovsdb_idl_txn_write_partial_set(&row->header_, &%(s)s_col_%(c)s, @@ -1074,8 +1071,7 @@ void ''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix), 'valtype':column.type.key.to_const_c_type(prefix), 'S': structName.upper(), 'C': columnName.upper(), 't': tableName}) - - print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "delete_value")) + print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "delete_value")) print(''' ovsdb_idl_txn_delete_partial_set(&row->header_, &%(s)s_col_%(c)s, @@ -1143,37 +1139,36 @@ void print(" struct ovsdb_datum datum;") free = [] if type.n_min == 1 and type.n_max == 1: - print(" union ovsdb_atom key;") + print(" union ovsdb_atom *key = xmalloc(sizeof *key);") if type.value: - print(" union ovsdb_atom value;") + print(" union ovsdb_atom *value = xmalloc(sizeof *value);") print("") print(" datum.n = 1;") - print(" datum.keys = &key;") - print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar, refTable=False)) + print(" datum.keys = key;") + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar, refTable=False)) if type.value: - print(" datum.values = &value;") - print(" "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar, refTable=False)) + print(" " + type.value.copyCValue("value.%s" % type.value.type.to_lvalue_string(), valueVar, refTable=False)) else: print(" datum.values = NULL;") elif type.is_optional_pointer(): - print(" union ovsdb_atom key;") print("") print(" if (%s) {" % keyVar) + print(" union ovsdb_atom *key = xmalloc(sizeof *key);") print(" datum.n = 1;") - print(" datum.keys = &key;") - print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar, refTable=False)) + print(" datum.keys = key;") + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar, refTable=False)) print(" } else {") print(" datum.n = 0;") print(" datum.keys = NULL;") print(" }") print(" datum.values = NULL;") elif type.n_max == 1: - print(" union ovsdb_atom key;") print("") print(" if (%s) {" % nVar) + print(" union ovsdb_atom *key = xmalloc(sizeof *key);") print(" datum.n = 1;") - print(" datum.keys = &key;") - print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar, refTable=False)) + print(" datum.keys = key;") + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar, refTable=False)) print(" } else {") print(" datum.n = 0;") print(" datum.keys = NULL;") @@ -1182,16 +1177,14 @@ void else: print(" datum.n = %s;" % nVar) print(" datum.keys = %s ? xmalloc(%s * sizeof *datum.keys) : NULL;" % (nVar, nVar)) - free += ['datum.keys'] if type.value: print(" datum.values = xmalloc(%s * sizeof *datum.values);" % nVar) - free += ['datum.values'] else: print(" datum.values = NULL;") print(" for (size_t i = 0; i < %s; i++) {" % nVar) - print(" " + type.key.assign_c_value_casting_away_const("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar, refTable=False)) + print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar, refTable=False)) if type.value: - print(" " + type.value.assign_c_value_casting_away_const("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar, refTable=False)) + print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar, refTable=False)) print(" }") if type.value: valueType = type.value.toAtomicType() @@ -1211,8 +1204,8 @@ void 's': structName, 'S': structName.upper(), 'c': columnName}) - for var in free: - print(" free(%s);" % var) + print(" ovsdb_datum_destroy(&datum, &%(s)s_col_%(c)s.type);" \ + % {'s': structName, 'c': columnName}) print("}") # Index table related functions @@ -1272,7 +1265,7 @@ struct ovsdb_idl_cursor struct ovsdb_idl_index *index, const struct %(s)s *target) { ovs_assert(index->table->class_ == &%(p)stable_%(tl)s); - return ovsdb_idl_cursor_first_ge(index, &target->header_); + return ovsdb_idl_cursor_first_ge(index, target ? &target->header_ : NULL); } struct %(s)s * @@ -1309,8 +1302,8 @@ struct %(s)s * i = 0; SMAP_FOR_EACH (node, %(c)s) { - datum->keys[i].string = node->key; - datum->values[i].string = node->value; + datum->keys[i].s = ovsdb_atom_string_create(node->key); + datum->values[i].s = ovsdb_atom_string_create(node->value); i++; } ovsdb_datum_sort_unique(datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING); @@ -1359,10 +1352,10 @@ struct %(s)s * print() print(" datum.n = 1;") print(" datum.keys = key;") - print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar)) + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar)) if type.value: print(" datum.values = value;") - print(" "+ type.value.assign_c_value_casting_away_const("value->%s" % type.value.type.to_string(), valueVar)) + print(" " + type.value.copyCValue("value->%s" % type.value.type.to_lvalue_string(), valueVar)) else: print(" datum.values = NULL;") txn_write_func = "ovsdb_idl_index_write" @@ -1373,7 +1366,7 @@ struct %(s)s * print(" key = xmalloc(sizeof (union ovsdb_atom));") print(" datum.n = 1;") print(" datum.keys = key;") - print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar)) + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar)) print(" } else {") print(" datum.n = 0;") print(" datum.keys = NULL;") @@ -1387,7 +1380,7 @@ struct %(s)s * print(" key = xmalloc(sizeof(union ovsdb_atom));") print(" datum.n = 1;") print(" datum.keys = key;") - print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), "*" + keyVar)) + print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar)) print(" } else {") print(" datum.n = 0;") print(" datum.keys = NULL;") @@ -1404,9 +1397,9 @@ struct %(s)s * else: print(" datum.values = NULL;") print(" for (i = 0; i < %s; i++) {" % nVar) - print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar)) + print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar)) if type.value: - print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar)) + print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar)) print(" }") if type.value: valueType = type.value.toAtomicType() diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index 0b3d2bb714..5772955c92 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -26,6 +26,7 @@ #include "command-line.h" #include "daemon.h" #include "dirs.h" +#include "dns-resolve.h" #include "openvswitch/dynamic-string.h" #include "fatal-signal.h" #include "file.h" @@ -329,6 +330,7 @@ main(int argc, char *argv[]) service_start(&argc, &argv); fatal_ignore_sigpipe(); process_init(); + dns_resolve_init(true); bool active = false; parse_options(argc, argv, &db_filenames, &remotes, &unixctl_path, @@ -511,6 +513,7 @@ main(int argc, char *argv[]) run_command, process_status_msg(status)); } } + dns_resolve_destroy(); perf_counters_destroy(); service_stop(); return 0; @@ -904,8 +907,8 @@ query_db_string(const struct shash *all_dbs, const char *name, datum = &row->fields[column->index]; for (i = 0; i < datum->n; i++) { - if (datum->keys[i].string[0]) { - return datum->keys[i].string; + if (datum->keys[i].s->string[0]) { + return datum->keys[i].s->string; } } } @@ -1018,7 +1021,7 @@ query_db_remotes(const char *name, const struct shash *all_dbs, datum = &row->fields[column->index]; for (i = 0; i < datum->n; i++) { - add_remote(remotes, datum->keys[i].string); + add_remote(remotes, datum->keys[i].s->string); } } } else if (column->type.key.type == OVSDB_TYPE_UUID diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index 05a0223e71..d4a9e34cc4 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -919,7 +919,8 @@ print_raft_header(const struct raft_header *h, if (!uuid_is_zero(&h->snap.eid)) { printf(" prev_eid: %04x\n", uuid_prefix(&h->snap.eid, 4)); } - print_data("prev_", h->snap.data, schemap, names); + print_data("prev_", raft_entry_get_parsed_data(&h->snap), + schemap, names); } } @@ -973,11 +974,13 @@ raft_header_to_standalone_log(const struct raft_header *h, struct ovsdb_log *db_log_data) { if (h->snap_index) { - if (!h->snap.data || json_array(h->snap.data)->n != 2) { + const struct json *data = raft_entry_get_parsed_data(&h->snap); + + if (!data || json_array(data)->n != 2) { ovs_fatal(0, "Incorrect raft header data array length"); } - struct json_array *pa = json_array(h->snap.data); + struct json_array *pa = json_array(data); struct json *schema_json = pa->elems[0]; struct ovsdb_error *error = NULL; @@ -1373,7 +1376,7 @@ do_check_cluster(struct ovs_cmdl_context *ctx) } struct raft_entry *e = &s->entries[log_idx]; e->term = r->term; - e->data = r->entry.data; + raft_entry_set_parsed_data_nocopy(e, r->entry.data); e->eid = r->entry.eid; e->servers = r->entry.servers; break; diff --git a/ovsdb/ovsdb-util.c b/ovsdb/ovsdb-util.c index c4075cdae3..6d7be066b6 100644 --- a/ovsdb/ovsdb-util.c +++ b/ovsdb/ovsdb-util.c @@ -111,13 +111,13 @@ ovsdb_util_read_map_string_column(const struct ovsdb_row *row, for (i = 0; i < datum->n; i++) { atom_key = &datum->keys[i]; - if (!strcmp(atom_key->string, key)) { + if (!strcmp(atom_key->s->string, key)) { atom_value = &datum->values[i]; break; } } - return atom_value ? atom_value->string : NULL; + return atom_value ? atom_value->s->string : NULL; } /* Read string-uuid key-values from a map. Returns the row associated with @@ -143,7 +143,7 @@ ovsdb_util_read_map_string_uuid_column(const struct ovsdb_row *row, const struct ovsdb_datum *datum = &row->fields[column->index]; for (size_t i = 0; i < datum->n; i++) { union ovsdb_atom *atom_key = &datum->keys[i]; - if (!strcmp(atom_key->string, key)) { + if (!strcmp(atom_key->s->string, key)) { const union ovsdb_atom *atom_value = &datum->values[i]; return ovsdb_table_get_row(ref_table, &atom_value->uuid); } @@ -181,7 +181,7 @@ ovsdb_util_read_string_column(const struct ovsdb_row *row, const union ovsdb_atom *atom; atom = ovsdb_util_read_column(row, column_name, OVSDB_TYPE_STRING); - *stringp = atom ? atom->string : NULL; + *stringp = atom ? atom->s->string : NULL; return atom != NULL; } @@ -269,8 +269,10 @@ ovsdb_util_write_string_column(struct ovsdb_row *row, const char *column_name, const char *string) { if (string) { - const union ovsdb_atom atom = { .string = CONST_CAST(char *, string) }; + union ovsdb_atom atom = { + .s = ovsdb_atom_string_create(CONST_CAST(char *, string)) }; ovsdb_util_write_singleton(row, column_name, &atom, OVSDB_TYPE_STRING); + ovsdb_atom_destroy(&atom, OVSDB_TYPE_STRING); } else { ovsdb_util_clear_column(row, column_name); } @@ -305,8 +307,8 @@ ovsdb_util_write_string_string_column(struct ovsdb_row *row, datum->values = xmalloc(n * sizeof *datum->values); for (i = 0; i < n; ++i) { - datum->keys[i].string = keys[i]; - datum->values[i].string = values[i]; + datum->keys[i].s = ovsdb_atom_string_create_nocopy(keys[i]); + datum->values[i].s = ovsdb_atom_string_create_nocopy(values[i]); } /* Sort and check constraints. */ diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c index 126d16a2f5..e6d866182c 100644 --- a/ovsdb/ovsdb.c +++ b/ovsdb/ovsdb.c @@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage) ovs_list_init(&db->triggers); db->run_triggers_now = db->run_triggers = false; + db->n_atoms = 0; + db->is_relay = false; ovs_list_init(&db->txn_forward_new); hmap_init(&db->txn_forward_sent); @@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage) } simap_increase(usage, "cells", cells); + simap_increase(usage, "atoms", db->n_atoms); + simap_increase(usage, "txn-history", db->n_txn_history); + simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms); if (db->storage) { ovsdb_storage_get_memory_usage(db->storage, usage); diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h index 4a7bd0f0ec..ec2d235ec2 100644 --- a/ovsdb/ovsdb.h +++ b/ovsdb/ovsdb.h @@ -90,8 +90,11 @@ struct ovsdb { /* History trasanctions for incremental monitor transfer. */ bool need_txn_history; /* Need to maintain history of transactions. */ unsigned int n_txn_history; /* Current number of history transactions. */ + unsigned int n_txn_history_atoms; /* Total number of atoms in history. */ struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */ + size_t n_atoms; /* Total number of ovsdb atoms in the database. */ + /* Relay mode. */ bool is_relay; /* True, if database is in relay mode. */ /* List that holds transactions waiting to be forwarded to the server. */ diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c index 26d39a087f..4145c8729f 100644 --- a/ovsdb/raft-private.c +++ b/ovsdb/raft-private.c @@ -18,11 +18,14 @@ #include "raft-private.h" +#include "coverage.h" #include "openvswitch/dynamic-string.h" #include "ovsdb-error.h" #include "ovsdb-parser.h" #include "socket-util.h" #include "sset.h" + +COVERAGE_DEFINE(raft_entry_serialize); /* Addresses of Raft servers. */ @@ -33,7 +36,10 @@ raft_address_validate(const char *address) return NULL; } else if (!strncmp(address, "ssl:", 4) || !strncmp(address, "tcp:", 4)) { struct sockaddr_storage ss; - if (!inet_parse_active(address + 4, -1, &ss, true)) { + bool dns_failure = false; + + if (!inet_parse_active(address + 4, -1, &ss, true, &dns_failure) + && !dns_failure) { return ovsdb_error(NULL, "%s: syntax error in address", address); } return NULL; @@ -281,7 +287,8 @@ void raft_entry_clone(struct raft_entry *dst, const struct raft_entry *src) { dst->term = src->term; - dst->data = json_nullable_clone(src->data); + dst->data.full_json = json_nullable_clone(src->data.full_json); + dst->data.serialized = json_nullable_clone(src->data.serialized); dst->eid = src->eid; dst->servers = json_nullable_clone(src->servers); dst->election_timer = src->election_timer; @@ -291,7 +298,8 @@ void raft_entry_uninit(struct raft_entry *e) { if (e) { - json_destroy(e->data); + json_destroy(e->data.full_json); + json_destroy(e->data.serialized); json_destroy(e->servers); } } @@ -301,8 +309,9 @@ raft_entry_to_json(const struct raft_entry *e) { struct json *json = json_object_create(); raft_put_uint64(json, "term", e->term); - if (e->data) { - json_object_put(json, "data", json_clone(e->data)); + if (raft_entry_has_data(e)) { + json_object_put(json, "data", + json_clone(raft_entry_get_serialized_data(e))); json_object_put_format(json, "eid", UUID_FMT, UUID_ARGS(&e->eid)); } if (e->servers) { @@ -323,9 +332,10 @@ raft_entry_from_json(struct json *json, struct raft_entry *e) struct ovsdb_parser p; ovsdb_parser_init(&p, json, "raft log entry"); e->term = raft_parse_required_uint64(&p, "term"); - e->data = json_nullable_clone( + raft_entry_set_parsed_data(e, ovsdb_parser_member(&p, "data", OP_OBJECT | OP_ARRAY | OP_OPTIONAL)); - e->eid = e->data ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO; + e->eid = raft_entry_has_data(e) + ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO; e->servers = json_nullable_clone( ovsdb_parser_member(&p, "servers", OP_OBJECT | OP_OPTIONAL)); if (e->servers) { @@ -344,9 +354,72 @@ bool raft_entry_equals(const struct raft_entry *a, const struct raft_entry *b) { return (a->term == b->term - && json_equal(a->data, b->data) && uuid_equals(&a->eid, &b->eid) - && json_equal(a->servers, b->servers)); + && json_equal(a->servers, b->servers) + && json_equal(raft_entry_get_parsed_data(a), + raft_entry_get_parsed_data(b))); +} + +bool +raft_entry_has_data(const struct raft_entry *e) +{ + return e->data.full_json || e->data.serialized; +} + +static void +raft_entry_data_serialize(struct raft_entry *e) +{ + if (!raft_entry_has_data(e) || e->data.serialized) { + return; + } + COVERAGE_INC(raft_entry_serialize); + e->data.serialized = json_serialized_object_create(e->data.full_json); +} + +void +raft_entry_set_parsed_data_nocopy(struct raft_entry *e, struct json *json) +{ + ovs_assert(!json || json->type != JSON_SERIALIZED_OBJECT); + e->data.full_json = json; + e->data.serialized = NULL; +} + +void +raft_entry_set_parsed_data(struct raft_entry *e, const struct json *json) +{ + raft_entry_set_parsed_data_nocopy(e, json_nullable_clone(json)); +} + +/* Returns a pointer to the fully parsed json object of the data. + * Caller takes the ownership of the result. + * + * Entry will no longer contain a fully parsed json object. + * Subsequent calls for the same raft entry will return NULL. */ +struct json * OVS_WARN_UNUSED_RESULT +raft_entry_steal_parsed_data(struct raft_entry *e) +{ + /* Ensure that serialized version exists. */ + raft_entry_data_serialize(e); + + struct json *json = e->data.full_json; + e->data.full_json = NULL; + + return json; +} + +/* Returns a pointer to the fully parsed json object of the data, if any. */ +const struct json * +raft_entry_get_parsed_data(const struct raft_entry *e) +{ + return e->data.full_json; +} + +/* Returns a pointer to the JSON_SERIALIZED_OBJECT of the data. */ +const struct json * +raft_entry_get_serialized_data(const struct raft_entry *e) +{ + raft_entry_data_serialize(CONST_CAST(struct raft_entry *, e)); + return e->data.serialized; } void @@ -402,8 +475,8 @@ raft_header_from_json__(struct raft_header *h, struct ovsdb_parser *p) * present, all of them must be. */ h->snap_index = raft_parse_optional_uint64(p, "prev_index"); if (h->snap_index) { - h->snap.data = json_nullable_clone( - ovsdb_parser_member(p, "prev_data", OP_ANY)); + raft_entry_set_parsed_data( + &h->snap, ovsdb_parser_member(p, "prev_data", OP_ANY)); h->snap.eid = raft_parse_required_uuid(p, "prev_eid"); h->snap.term = raft_parse_required_uint64(p, "prev_term"); h->snap.election_timer = raft_parse_optional_uint64( @@ -455,8 +528,9 @@ raft_header_to_json(const struct raft_header *h) if (h->snap_index) { raft_put_uint64(json, "prev_index", h->snap_index); raft_put_uint64(json, "prev_term", h->snap.term); - if (h->snap.data) { - json_object_put(json, "prev_data", json_clone(h->snap.data)); + if (raft_entry_has_data(&h->snap)) { + json_object_put(json, "prev_data", + json_clone(raft_entry_get_serialized_data(&h->snap))); } json_object_put_format(json, "prev_eid", UUID_FMT, UUID_ARGS(&h->snap.eid)); diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h index a69e37e5c2..48c6df511f 100644 --- a/ovsdb/raft-private.h +++ b/ovsdb/raft-private.h @@ -118,7 +118,10 @@ void raft_servers_format(const struct hmap *servers, struct ds *ds); * entry. */ struct raft_entry { uint64_t term; - struct json *data; + struct { + struct json *full_json; /* Fully parsed JSON object. */ + struct json *serialized; /* JSON_SERIALIZED_OBJECT version of data. */ + } data; struct uuid eid; struct json *servers; uint64_t election_timer; @@ -130,6 +133,13 @@ struct json *raft_entry_to_json(const struct raft_entry *); struct ovsdb_error *raft_entry_from_json(struct json *, struct raft_entry *) OVS_WARN_UNUSED_RESULT; bool raft_entry_equals(const struct raft_entry *, const struct raft_entry *); +bool raft_entry_has_data(const struct raft_entry *); +void raft_entry_set_parsed_data(struct raft_entry *, const struct json *); +void raft_entry_set_parsed_data_nocopy(struct raft_entry *, struct json *); +struct json *raft_entry_steal_parsed_data(struct raft_entry *) + OVS_WARN_UNUSED_RESULT; +const struct json *raft_entry_get_parsed_data(const struct raft_entry *); +const struct json *raft_entry_get_serialized_data(const struct raft_entry *); /* On disk data serialization and deserialization. */ diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 2fb5156519..b70fbed5d4 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -74,9 +74,12 @@ enum raft_failure_test { FT_CRASH_BEFORE_SEND_EXEC_REQ, FT_CRASH_AFTER_SEND_EXEC_REQ, FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, + FT_CRASH_BEFORE_SEND_SNAPSHOT_REP, FT_DELAY_ELECTION, FT_DONT_SEND_VOTE_REQUEST, FT_STOP_RAFT_RPC, + FT_TRANSFER_LEADERSHIP, + FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ, }; static enum raft_failure_test failure_test; @@ -379,12 +382,19 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *); static void raft_run_reconfigure(struct raft *); static void raft_set_leader(struct raft *, const struct uuid *sid); + static struct raft_server * raft_find_server(const struct raft *raft, const struct uuid *sid) { return raft_server_find(&raft->servers, sid); } +static struct raft_server * +raft_find_new_server(struct raft *raft, const struct uuid *uuid) +{ + return raft_server_find(&raft->add_servers, uuid); +} + static char * raft_make_address_passive(const char *address_) { @@ -494,11 +504,11 @@ raft_create_cluster(const char *file_name, const char *name, .snap_index = index++, .snap = { .term = term, - .data = json_nullable_clone(data), .eid = uuid_random(), .servers = json_object_create(), }, }; + raft_entry_set_parsed_data(&h.snap, data); shash_add_nocopy(json_object(h.snap.servers), xasprintf(UUID_FMT, UUID_ARGS(&h.sid)), json_string_create(local_address)); @@ -727,10 +737,10 @@ raft_add_entry(struct raft *raft, uint64_t index = raft->log_end++; struct raft_entry *entry = &raft->entries[index - raft->log_start]; entry->term = term; - entry->data = data; entry->eid = eid ? *eid : UUID_ZERO; entry->servers = servers; entry->election_timer = election_timer; + raft_entry_set_parsed_data_nocopy(entry, data); return index; } @@ -741,13 +751,16 @@ raft_write_entry(struct raft *raft, uint64_t term, struct json *data, const struct uuid *eid, struct json *servers, uint64_t election_timer) { + uint64_t index = raft_add_entry(raft, term, data, eid, servers, + election_timer); + const struct json *entry_data = raft_entry_get_serialized_data( + &raft->entries[index - raft->log_start]); struct raft_record r = { .type = RAFT_REC_ENTRY, .term = term, .entry = { - .index = raft_add_entry(raft, term, data, eid, servers, - election_timer), - .data = data, + .index = index, + .data = CONST_CAST(struct json *, entry_data), .servers = servers, .election_timer = election_timer, .eid = eid ? *eid : UUID_ZERO, @@ -1864,6 +1877,8 @@ raft_open_conn(struct raft *raft, const char *address, const struct uuid *sid) static void raft_conn_close(struct raft_conn *conn) { + VLOG_DBG("closing connection to server %s (%s)", + conn->nickname, jsonrpc_session_get_name(conn->js)); jsonrpc_session_close(conn->js); ovs_list_remove(&conn->list_node); free(conn->nickname); @@ -1918,6 +1933,13 @@ raft_run(struct raft *raft) return; } + if (failure_test == FT_TRANSFER_LEADERSHIP) { + /* Using this function as it conveniently implements all we need and + * snapshotting is the main test scenario for leadership transfer. */ + raft_notify_snapshot_recommended(raft); + failure_test = FT_NO_TEST; + } + raft_waiters_run(raft); if (!raft->listener && time_msec() >= raft->listen_backoff) { @@ -1954,16 +1976,30 @@ raft_run(struct raft *raft) } /* Close unneeded sessions. */ + struct raft_server *server; struct raft_conn *next; LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { if (!raft_conn_should_stay_open(raft, conn)) { + server = raft_find_new_server(raft, &conn->sid); + if (server) { + /* We only have one incoming connection from joining servers, + * so if it's closed, we need to destroy the record about the + * server. This way the process can be started over on the + * next join request. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + VLOG_INFO_RL(&rl, "cluster "CID_FMT": server %s (%s) " + "disconnected while joining", + CID_ARGS(&raft->cid), + server->nickname, server->address); + hmap_remove(&raft->add_servers, &server->hmap_node); + raft_server_destroy(server); + } raft->n_disconnections++; raft_conn_close(conn); } } /* Open needed sessions. */ - struct raft_server *server; HMAP_FOR_EACH (server, hmap_node, &raft->servers) { raft_open_conn(raft, server->address, &server->sid); } @@ -2040,7 +2076,14 @@ raft_run(struct raft *raft) HMAP_FOR_EACH_SAFE (cmd, next_cmd, hmap_node, &raft->commands) { if (cmd->timestamp && now - cmd->timestamp > raft->election_timer * 2) { - raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT); + if (cmd->index && raft->role != RAFT_LEADER) { + /* This server lost leadership and command didn't complete + * in time. Likely, it wasn't replicated to the majority + * of servers before losing the leadership. */ + raft_command_complete(raft, cmd, RAFT_CMD_LOST_LEADERSHIP); + } else { + raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT); + } } } raft_reset_ping_timer(raft); @@ -2161,7 +2204,7 @@ raft_get_eid(const struct raft *raft, uint64_t index) { for (; index >= raft->log_start; index--) { const struct raft_entry *e = raft_get_entry(raft, index); - if (e->data) { + if (raft_entry_has_data(e)) { return &e->eid; } } @@ -2232,6 +2275,9 @@ raft_command_initiate(struct raft *raft, if (failure_test == FT_CRASH_AFTER_SEND_APPEND_REQ) { ovs_fatal(0, "Raft test: crash after sending append_request."); } + if (failure_test == FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ) { + failure_test = FT_TRANSFER_LEADERSHIP; + } raft_reset_ping_timer(raft); return cmd; @@ -2598,7 +2644,13 @@ raft_become_follower(struct raft *raft) * configuration is already part of the log. Possibly the configuration * log entry will not be committed, but until we know that we must use the * new configuration. Our AppendEntries processing will properly update - * the server configuration later, if necessary. */ + * the server configuration later, if necessary. + * + * Also we do not complete commands here, as they can still be completed + * if their log entries have already been replicated to other servers. + * If the entries were actually committed according to the new leader, our + * AppendEntries processing will complete the corresponding commands. + */ struct raft_server *s; HMAP_FOR_EACH (s, hmap_node, &raft->add_servers) { raft_send_add_server_reply__(raft, &s->sid, s->address, false, @@ -2612,8 +2664,6 @@ raft_become_follower(struct raft *raft) raft_server_destroy(raft->remove_server); raft->remove_server = NULL; } - - raft_complete_all_commands(raft, RAFT_CMD_LOST_LEADERSHIP); } static void @@ -2826,8 +2876,8 @@ raft_truncate(struct raft *raft, uint64_t new_end) return servers_changed; } -static const struct json * -raft_peek_next_entry(struct raft *raft, struct uuid *eid) +static const struct raft_entry * +raft_peek_next_entry(struct raft *raft) { /* Invariant: log_start - 2 <= last_applied <= commit_index < log_end. */ ovs_assert(raft->log_start <= raft->last_applied + 2); @@ -2839,32 +2889,20 @@ raft_peek_next_entry(struct raft *raft, struct uuid *eid) } if (raft->log_start == raft->last_applied + 2) { - *eid = raft->snap.eid; - return raft->snap.data; + return &raft->snap; } while (raft->last_applied < raft->commit_index) { const struct raft_entry *e = raft_get_entry(raft, raft->last_applied + 1); - if (e->data) { - *eid = e->eid; - return e->data; + if (raft_entry_has_data(e)) { + return e; } raft->last_applied++; } return NULL; } -static const struct json * -raft_get_next_entry(struct raft *raft, struct uuid *eid) -{ - const struct json *data = raft_peek_next_entry(raft, eid); - if (data) { - raft->last_applied++; - } - return data; -} - /* Updates commit index in raft log. If commit index is already up-to-date * it does nothing and return false, otherwise, returns true. */ static bool @@ -2874,61 +2912,56 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) return false; } - if (raft->role == RAFT_LEADER) { - while (raft->commit_index < new_commit_index) { - uint64_t index = ++raft->commit_index; - const struct raft_entry *e = raft_get_entry(raft, index); - if (e->data) { - struct raft_command *cmd - = raft_find_command_by_eid(raft, &e->eid); - if (cmd) { - if (!cmd->index) { - VLOG_DBG("Command completed after role change from" - " follower to leader "UUID_FMT, - UUID_ARGS(&e->eid)); - cmd->index = index; - } - raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); + while (raft->commit_index < new_commit_index) { + uint64_t index = ++raft->commit_index; + const struct raft_entry *e = raft_get_entry(raft, index); + + if (raft_entry_has_data(e)) { + struct raft_command *cmd = raft_find_command_by_eid(raft, &e->eid); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + + if (cmd) { + if (!cmd->index && raft->role == RAFT_LEADER) { + VLOG_INFO_RL(&rl, + "command completed after role change from " + "follower to leader (eid: "UUID_FMT", " + "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); + } else if (!cmd->index && raft->role != RAFT_LEADER) { + /* This can happen when leader fail-over before sending + * execute_command_reply. */ + VLOG_INFO_RL(&rl, + "command completed without reply (eid: "UUID_FMT", " + "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); + } else if (cmd->index && raft->role != RAFT_LEADER) { + /* This can happen if current server lost leadership after + * sending append requests to the majority of servers, but + * before receiving majority of append replies. */ + VLOG_INFO_RL(&rl, + "command completed after role change from " + "leader to follower (eid: "UUID_FMT", " + "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); + /* Clearing 'sid' to avoid sending cmd execution reply. */ + cmd->sid = UUID_ZERO; + } else { + /* (cmd->index && raft->role == RAFT_LEADER) + * Normal command completion on a leader. */ } - } - if (e->election_timer) { - VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, - raft->election_timer, e->election_timer); - raft->election_timer = e->election_timer; - raft->election_timer_new = 0; - raft_update_probe_intervals(raft); - } - if (e->servers) { - /* raft_run_reconfigure() can write a new Raft entry, which can - * reallocate raft->entries, which would invalidate 'e', so - * this case must be last, after the one for 'e->data'. */ - raft_run_reconfigure(raft); + cmd->index = index; + raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); } } - } else { - while (raft->commit_index < new_commit_index) { - uint64_t index = ++raft->commit_index; - const struct raft_entry *e = raft_get_entry(raft, index); - if (e->election_timer) { - VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, - raft->election_timer, e->election_timer); - raft->election_timer = e->election_timer; - raft_update_probe_intervals(raft); - } + if (e->election_timer) { + VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, + raft->election_timer, e->election_timer); + raft->election_timer = e->election_timer; + raft->election_timer_new = 0; + raft_update_probe_intervals(raft); } - /* Check if any pending command can be completed, and complete it. - * This can happen when leader fail-over before sending - * execute_command_reply. */ - const struct uuid *eid = raft_get_eid(raft, new_commit_index); - struct raft_command *cmd = raft_find_command_by_eid(raft, eid); - if (cmd) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); - VLOG_INFO_RL(&rl, - "Command completed without reply (eid: "UUID_FMT", " - "commit index: %"PRIu64")", - UUID_ARGS(eid), new_commit_index); - cmd->index = new_commit_index; - raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); + if (e->servers && raft->role == RAFT_LEADER) { + /* raft_run_reconfigure() can write a new Raft entry, which can + * reallocate raft->entries, which would invalidate 'e', so + * this case must be last, after the one for 'e->data'. */ + raft_run_reconfigure(raft); } } @@ -3059,7 +3092,9 @@ raft_handle_append_entries(struct raft *raft, for (; i < n_entries; i++) { const struct raft_entry *e = &entries[i]; error = raft_write_entry(raft, e->term, - json_nullable_clone(e->data), &e->eid, + json_nullable_clone( + raft_entry_get_parsed_data(e)), + &e->eid, json_nullable_clone(e->servers), e->election_timer); if (error) { @@ -3314,20 +3349,29 @@ bool raft_has_next_entry(const struct raft *raft_) { struct raft *raft = CONST_CAST(struct raft *, raft_); - struct uuid eid; - return raft_peek_next_entry(raft, &eid) != NULL; + return raft_peek_next_entry(raft) != NULL; } /* Returns the next log entry or snapshot from 'raft', or NULL if there are - * none left to read. Stores the entry ID of the log entry in '*eid'. Stores - * true in '*is_snapshot' if the returned data is a snapshot, false if it is a - * log entry. */ -const struct json * -raft_next_entry(struct raft *raft, struct uuid *eid, bool *is_snapshot) + * none left to read. Stores the entry ID of the log entry in '*eid'. + * + * The caller takes ownership of the result. */ +struct json * OVS_WARN_UNUSED_RESULT +raft_next_entry(struct raft *raft, struct uuid *eid) { - const struct json *data = raft_get_next_entry(raft, eid); - *is_snapshot = data == raft->snap.data; - return data; + const struct raft_entry *e = raft_peek_next_entry(raft); + + if (!e) { + return NULL; + } + + raft->last_applied++; + *eid = e->eid; + + /* DB will only read each entry once, so we don't need to store the fully + * parsed json object any longer. The serialized version is sufficient + * for sending to other cluster members or writing to the log. */ + return raft_entry_steal_parsed_data(CONST_CAST(struct raft_entry *, e)); } /* Returns the log index of the last-read snapshot or log entry. */ @@ -3352,12 +3396,6 @@ raft_find_peer(struct raft *raft, const struct uuid *uuid) return s && !uuid_equals(&raft->sid, &s->sid) ? s : NULL; } -static struct raft_server * -raft_find_new_server(struct raft *raft, const struct uuid *uuid) -{ - return raft_server_find(&raft->add_servers, uuid); -} - /* Figure 3.1: "If there exists an N such that N > commitIndex, a * majority of matchIndex[i] >= N, and log[N].term == currentTerm, set * commitIndex = N (sections 3.5 and 3.6)." */ @@ -3420,6 +3458,7 @@ raft_send_install_snapshot_request(struct raft *raft, const struct raft_server *s, const char *comment) { + const struct json *data = raft_entry_get_serialized_data(&raft->snap); union raft_rpc rpc = { .install_snapshot_request = { .common = { @@ -3432,7 +3471,7 @@ raft_send_install_snapshot_request(struct raft *raft, .last_term = raft->snap.term, .last_servers = raft->snap.servers, .last_eid = raft->snap.eid, - .data = raft->snap.data, + .data = CONST_CAST(struct json *, data), .election_timer = raft->election_timer, /* use latest value */ } }; @@ -3980,6 +4019,10 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, uint64_t new_log_start, const struct raft_entry *new_snapshot) { + /* Ensure that new snapshot contains serialized data object, so it will + * not be allocated while serializing the on-stack raft header object. */ + ovs_assert(raft_entry_get_serialized_data(new_snapshot)); + struct raft_header h = { .sid = raft->sid, .cid = raft->cid, @@ -3998,12 +4041,13 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log, /* Write log records. */ for (uint64_t index = new_log_start; index < raft->log_end; index++) { const struct raft_entry *e = &raft->entries[index - raft->log_start]; + const struct json *log_data = raft_entry_get_serialized_data(e); struct raft_record r = { .type = RAFT_REC_ENTRY, .term = e->term, .entry = { .index = index, - .data = e->data, + .data = CONST_CAST(struct json *, log_data), .servers = e->servers, .election_timer = e->election_timer, .eid = e->eid, @@ -4093,19 +4137,21 @@ raft_handle_install_snapshot_request__( /* Case 3: The new snapshot starts past the end of our current log, so * discard all of our current log. */ - const struct raft_entry new_snapshot = { + struct raft_entry new_snapshot = { .term = rq->last_term, - .data = rq->data, .eid = rq->last_eid, - .servers = rq->last_servers, + .servers = json_clone(rq->last_servers), .election_timer = rq->election_timer, }; + raft_entry_set_parsed_data(&new_snapshot, rq->data); + struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, &new_snapshot); if (error) { char *error_s = ovsdb_error_to_string_free(error); VLOG_WARN("could not save snapshot: %s", error_s); free(error_s); + raft_entry_uninit(&new_snapshot); return false; } @@ -4120,7 +4166,7 @@ raft_handle_install_snapshot_request__( } raft_entry_uninit(&raft->snap); - raft_entry_clone(&raft->snap, &new_snapshot); + raft->snap = new_snapshot; raft_get_servers_from_log(raft, VLL_INFO); raft_get_election_timer_from_log(raft); @@ -4132,6 +4178,10 @@ static void raft_handle_install_snapshot_request( struct raft *raft, const struct raft_install_snapshot_request *rq) { + if (failure_test == FT_CRASH_BEFORE_SEND_SNAPSHOT_REP) { + ovs_fatal(0, "Raft test: crash before sending install_snapshot_reply"); + } + if (raft_handle_install_snapshot_request__(raft, rq)) { union raft_rpc rpy = { .install_snapshot_reply = { @@ -4216,7 +4266,7 @@ raft_may_snapshot(const struct raft *raft) && !raft->leaving && !raft->left && !raft->failed - && raft->role != RAFT_LEADER + && (raft->role == RAFT_FOLLOWER || hmap_count(&raft->servers) == 1) && raft->last_applied >= raft->log_start); } @@ -4265,11 +4315,12 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) uint64_t new_log_start = raft->last_applied + 1; struct raft_entry new_snapshot = { .term = raft_get_term(raft, new_log_start - 1), - .data = json_clone(new_snapshot_data), .eid = *raft_get_eid(raft, new_log_start - 1), .servers = json_clone(raft_servers_for_index(raft, new_log_start - 1)), .election_timer = raft->election_timer, }; + raft_entry_set_parsed_data(&new_snapshot, new_snapshot_data); + struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, &new_snapshot); if (error) { @@ -4286,6 +4337,9 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data) memmove(&raft->entries[0], &raft->entries[new_log_start - raft->log_start], (raft->log_end - new_log_start) * sizeof *raft->entries); raft->log_start = new_log_start; + /* It's a snapshot of the current database state, ovsdb-server will not + * read it back. Destroying the parsed json object to not waste memory. */ + json_destroy(raft_entry_steal_parsed_data(&raft->snap)); return NULL; } @@ -4926,6 +4980,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, failure_test = FT_CRASH_AFTER_SEND_EXEC_REQ; } else if (!strcmp(test, "crash-after-receiving-append-request-update")) { failure_test = FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE; + } else if (!strcmp(test, "crash-before-sending-install-snapshot-reply")) { + failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP; } else if (!strcmp(test, "delay-election")) { failure_test = FT_DELAY_ELECTION; struct raft *raft; @@ -4938,6 +4994,11 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, failure_test = FT_DONT_SEND_VOTE_REQUEST; } else if (!strcmp(test, "stop-raft-rpc")) { failure_test = FT_STOP_RAFT_RPC; + } else if (!strcmp(test, + "transfer-leadership-after-sending-append-request")) { + failure_test = FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ; + } else if (!strcmp(test, "transfer-leadership")) { + failure_test = FT_TRANSFER_LEADERSHIP; } else if (!strcmp(test, "clear")) { failure_test = FT_NO_TEST; unixctl_command_reply(conn, "test dismissed"); diff --git a/ovsdb/raft.h b/ovsdb/raft.h index 3545c41c2c..599bc0ae86 100644 --- a/ovsdb/raft.h +++ b/ovsdb/raft.h @@ -132,8 +132,8 @@ bool raft_left(const struct raft *); bool raft_failed(const struct raft *); /* Reading snapshots and log entries. */ -const struct json *raft_next_entry(struct raft *, struct uuid *eid, - bool *is_snapshot); +struct json *raft_next_entry(struct raft *, struct uuid *eid) + OVS_WARN_UNUSED_RESULT; bool raft_has_next_entry(const struct raft *); uint64_t raft_get_applied_index(const struct raft *); diff --git a/ovsdb/rbac.c b/ovsdb/rbac.c index 2986027c90..ff411675f0 100644 --- a/ovsdb/rbac.c +++ b/ovsdb/rbac.c @@ -53,8 +53,8 @@ ovsdb_find_row_by_string_key(const struct ovsdb_table *table, HMAP_FOR_EACH (row, hmap_node, &table->rows) { const struct ovsdb_datum *datum = &row->fields[column->index]; for (size_t i = 0; i < datum->n; i++) { - if (datum->keys[i].string[0] && - !strcmp(key, datum->keys[i].string)) { + if (datum->keys[i].s->string[0] && + !strcmp(key, datum->keys[i].s->string)) { return row; } } @@ -113,7 +113,7 @@ ovsdb_rbac_authorized(const struct ovsdb_row *perms, } for (i = 0; i < datum->n; i++) { - const char *name = datum->keys[i].string; + const char *name = datum->keys[i].s->string; const char *value = NULL; bool is_map; @@ -271,7 +271,7 @@ rbac_column_modification_permitted(const struct ovsdb_column *column, size_t i; for (i = 0; i < modifiable->n; i++) { - char *name = modifiable->keys[i].string; + char *name = modifiable->keys[i].s->string; if (!strcmp(name, column->name)) { return true; diff --git a/ovsdb/row.c b/ovsdb/row.c index 65a0546211..e83c60a218 100644 --- a/ovsdb/row.c +++ b/ovsdb/row.c @@ -38,8 +38,7 @@ allocate_row(const struct ovsdb_table *table) struct ovsdb_row *row = xmalloc(row_size); row->table = CONST_CAST(struct ovsdb_table *, table); row->txn_row = NULL; - ovs_list_init(&row->src_refs); - ovs_list_init(&row->dst_refs); + hmap_init(&row->dst_refs); row->n_refs = 0; return row; } @@ -61,6 +60,78 @@ ovsdb_row_create(const struct ovsdb_table *table) return row; } +static struct ovsdb_weak_ref * +ovsdb_weak_ref_clone(struct ovsdb_weak_ref *src) +{ + struct ovsdb_weak_ref *weak = xzalloc(sizeof *weak); + + hmap_node_nullify(&weak->dst_node); + ovs_list_init(&weak->src_node); + weak->src_table = src->src_table; + weak->src = src->src; + weak->dst_table = src->dst_table; + weak->dst = src->dst; + ovsdb_atom_clone(&weak->key, &src->key, src->type.key.type); + if (src->type.value.type != OVSDB_TYPE_VOID) { + ovsdb_atom_clone(&weak->value, &src->value, src->type.value.type); + } + ovsdb_type_clone(&weak->type, &src->type); + weak->column_idx = src->column_idx; + weak->by_key = src->by_key; + return weak; +} + +uint32_t +ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *weak) +{ + return uuid_hash(&weak->src); +} + +static bool +ovsdb_weak_ref_equals(const struct ovsdb_weak_ref *a, + const struct ovsdb_weak_ref *b) +{ + if (a == b) { + return true; + } + return a->src_table == b->src_table + && a->dst_table == b->dst_table + && uuid_equals(&a->src, &b->src) + && uuid_equals(&a->dst, &b->dst) + && a->column_idx == b->column_idx + && a->by_key == b->by_key + && ovsdb_atom_equals(&a->key, &b->key, a->type.key.type); +} + +struct ovsdb_weak_ref * +ovsdb_row_find_weak_ref(const struct ovsdb_row *row, + const struct ovsdb_weak_ref *ref) +{ + struct ovsdb_weak_ref *weak; + HMAP_FOR_EACH_WITH_HASH (weak, dst_node, + ovsdb_weak_ref_hash(ref), &row->dst_refs) { + if (ovsdb_weak_ref_equals(weak, ref)) { + return weak; + } + } + return NULL; +} + +void +ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *weak) +{ + if (!weak) { + return; + } + ovs_assert(ovs_list_is_empty(&weak->src_node)); + ovsdb_atom_destroy(&weak->key, weak->type.key.type); + if (weak->type.value.type != OVSDB_TYPE_VOID) { + ovsdb_atom_destroy(&weak->value, weak->type.value.type); + } + ovsdb_type_destroy(&weak->type); + free(weak); +} + struct ovsdb_row * ovsdb_row_clone(const struct ovsdb_row *old) { @@ -75,6 +146,13 @@ ovsdb_row_clone(const struct ovsdb_row *old) &old->fields[column->index], &column->type); } + + struct ovsdb_weak_ref *weak, *clone; + HMAP_FOR_EACH (weak, dst_node, &old->dst_refs) { + clone = ovsdb_weak_ref_clone(weak); + hmap_insert(&new->dst_refs, &clone->dst_node, + ovsdb_weak_ref_hash(clone)); + } return new; } @@ -85,20 +163,13 @@ ovsdb_row_destroy(struct ovsdb_row *row) { if (row) { const struct ovsdb_table *table = row->table; - struct ovsdb_weak_ref *weak, *next; + struct ovsdb_weak_ref *weak; const struct shash_node *node; - LIST_FOR_EACH_SAFE (weak, next, dst_node, &row->dst_refs) { - ovs_list_remove(&weak->src_node); - ovs_list_remove(&weak->dst_node); - free(weak); - } - - LIST_FOR_EACH_SAFE (weak, next, src_node, &row->src_refs) { - ovs_list_remove(&weak->src_node); - ovs_list_remove(&weak->dst_node); - free(weak); + HMAP_FOR_EACH_POP (weak, dst_node, &row->dst_refs) { + ovsdb_weak_ref_destroy(weak); } + hmap_destroy(&row->dst_refs); SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; diff --git a/ovsdb/row.h b/ovsdb/row.h index 394ac8eb49..fe04555d0c 100644 --- a/ovsdb/row.h +++ b/ovsdb/row.h @@ -36,11 +36,28 @@ struct ovsdb_column_set; * ovsdb_weak_ref" structures are created for them. */ struct ovsdb_weak_ref { - struct ovs_list src_node; /* In src->src_refs list. */ - struct ovs_list dst_node; /* In destination row's dst_refs list. */ - struct ovsdb_row *src; /* Source row. */ - struct ovsdb_table *dst_table; /* Destination table. */ + struct hmap_node dst_node; /* In ovsdb_row's 'dst_refs' hmap. */ + struct ovs_list src_node; /* In txn_row's 'deleted/added_refs'. */ + + struct ovsdb_table *src_table; /* Source row table. */ + struct uuid src; /* Source row uuid. */ + + struct ovsdb_table *dst_table; /* Destination row table. */ struct uuid dst; /* Destination row uuid. */ + + /* Source row's key-value pair that created this reference. + * This information is needed in order to find and delete the reference + * from the source row. We need both key and value in order to avoid + * accidential deletion of an updated data, i.e. if value in datum got + * updated and the reference was created by the old value. + * Storing column index in order to remove references from the correct + * column. 'by_key' flag allows to distinguish 2 references in a corner + * case where key and value are the same. */ + union ovsdb_atom key; + union ovsdb_atom value; + struct ovsdb_type type; /* Datum type of the key-value pair. */ + unsigned int column_idx; /* Row column index for this pair. */ + bool by_key; /* 'true' if reference is a 'key'. */ }; /* A row in a database table. */ @@ -50,8 +67,7 @@ struct ovsdb_row { struct ovsdb_txn_row *txn_row; /* Transaction that row is in, if any. */ /* Weak references. Updated and checked only at transaction commit. */ - struct ovs_list src_refs; /* Weak references from this row. */ - struct ovs_list dst_refs; /* Weak references to this row. */ + struct hmap dst_refs; /* Weak references to this row. */ /* Number of strong refs to this row from other rows, in this table or * other tables, through 'uuid' columns that have a 'refTable' constraint @@ -69,6 +85,12 @@ struct ovsdb_row { * index 'i' is contained in hmap table->indexes[i]. */ }; +uint32_t ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *); +struct ovsdb_weak_ref * ovsdb_row_find_weak_ref(const struct ovsdb_row *, + const struct ovsdb_weak_ref *); +void ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *); + + struct ovsdb_row *ovsdb_row_create(const struct ovsdb_table *); struct ovsdb_row *ovsdb_row_clone(const struct ovsdb_row *); void ovsdb_row_destroy(struct ovsdb_row *); diff --git a/ovsdb/storage.c b/ovsdb/storage.c index d727b1eacd..d4984be250 100644 --- a/ovsdb/storage.c +++ b/ovsdb/storage.c @@ -268,9 +268,7 @@ ovsdb_storage_read(struct ovsdb_storage *storage, struct json *schema_json = NULL; struct json *txn_json = NULL; if (storage->raft) { - bool is_snapshot; - json = json_nullable_clone( - raft_next_entry(storage->raft, txnid, &is_snapshot)); + json = raft_next_entry(storage->raft, txnid); if (!json) { return NULL; } else if (json->type != JSON_ARRAY || json->array.n != 2) { @@ -509,7 +507,11 @@ schedule_next_snapshot(struct ovsdb_storage *storage, bool quick) long long int now = time_msec(); storage->next_snapshot_min = now + base + random_range(range); - storage->next_snapshot_max = now + 60LL * 60 * 24 * 1000; /* 1 day */ + if (!quick) { + long long int one_day = 60LL * 60 * 24 * 1000; + + storage->next_snapshot_max = now + one_day; + } } else { storage->next_snapshot_min = LLONG_MAX; storage->next_snapshot_max = LLONG_MAX; @@ -517,7 +519,7 @@ schedule_next_snapshot(struct ovsdb_storage *storage, bool quick) } bool -ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage) +ovsdb_storage_should_snapshot(struct ovsdb_storage *storage) { if (storage->raft || storage->log) { /* If we haven't reached the minimum snapshot time, don't snapshot. */ @@ -546,6 +548,15 @@ ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage) } if (!snapshot_recommended) { + if (storage->raft) { + /* Re-scheduling with a quick retry in order to avoid condition + * where all the raft servers passed the minimal time already, + * but the log didn't grow a lot, so they are all checking on + * every iteration. This will randomize the time of the next + * attempt, so all the servers will not start snapshotting at + * the same time when the log reaches a critical size. */ + schedule_next_snapshot(storage, true); + } return false; } diff --git a/ovsdb/storage.h b/ovsdb/storage.h index e120094d7a..ff026b77fa 100644 --- a/ovsdb/storage.h +++ b/ovsdb/storage.h @@ -76,7 +76,7 @@ uint64_t ovsdb_write_get_commit_index(const struct ovsdb_write *); void ovsdb_write_wait(const struct ovsdb_write *); void ovsdb_write_destroy(struct ovsdb_write *); -bool ovsdb_storage_should_snapshot(const struct ovsdb_storage *); +bool ovsdb_storage_should_snapshot(struct ovsdb_storage *); struct ovsdb_error *ovsdb_storage_store_snapshot(struct ovsdb_storage *storage, const struct json *schema, const struct json *snapshot) diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c index 8ffefcf7c9..db86d847c3 100644 --- a/ovsdb/transaction.c +++ b/ovsdb/transaction.c @@ -41,6 +41,9 @@ struct ovsdb_txn { struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */ struct ds comment; struct uuid txnid; /* For clustered mode only. It is the eid. */ + size_t n_atoms; /* Number of atoms in all transaction rows. */ + ssize_t n_atoms_diff; /* Difference between number of added and + * removed atoms. */ }; /* A table modified by a transaction. */ @@ -86,6 +89,10 @@ struct ovsdb_txn_row { struct uuid uuid; struct ovsdb_table *table; + /* Weak refs that needs to be added/deleted to/from destination rows. */ + struct ovs_list added_refs; + struct ovs_list deleted_refs; + /* Used by for_each_txn_row(). */ unsigned int serial; /* Serial number of in-progress commit. */ @@ -151,6 +158,23 @@ ovsdb_txn_row_abort(struct ovsdb_txn *txn OVS_UNUSED, } else { hmap_replace(&new->table->rows, &new->hmap_node, &old->hmap_node); } + + struct ovsdb_weak_ref *weak, *next; + LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { + ovs_list_remove(&weak->src_node); + ovs_list_init(&weak->src_node); + if (hmap_node_is_null(&weak->dst_node)) { + ovsdb_weak_ref_destroy(weak); + } + } + LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { + ovs_list_remove(&weak->src_node); + ovs_list_init(&weak->src_node); + if (hmap_node_is_null(&weak->dst_node)) { + ovsdb_weak_ref_destroy(weak); + } + } + ovsdb_row_destroy(new); free(txn_row); @@ -266,9 +290,9 @@ ovsdb_txn_adjust_atom_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, static struct ovsdb_error * OVS_WARN_UNUSED_RESULT ovsdb_txn_adjust_row_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r, - const struct ovsdb_column *column, int delta) + const struct ovsdb_column *column, + const struct ovsdb_datum *field, int delta) { - const struct ovsdb_datum *field = &r->fields[column->index]; struct ovsdb_error *error; error = ovsdb_txn_adjust_atom_refs(txn, r, column, &column->type.key, @@ -291,14 +315,39 @@ update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r) struct ovsdb_error *error; if (bitmap_is_set(r->changed, column->index)) { - if (r->old) { - error = ovsdb_txn_adjust_row_refs(txn, r->old, column, -1); + if (r->old && !r->new) { + error = ovsdb_txn_adjust_row_refs( + txn, r->old, column, + &r->old->fields[column->index], -1); if (error) { return OVSDB_WRAP_BUG("error decreasing refcount", error); } - } - if (r->new) { - error = ovsdb_txn_adjust_row_refs(txn, r->new, column, 1); + } else if (!r->old && r->new) { + error = ovsdb_txn_adjust_row_refs( + txn, r->new, column, + &r->new->fields[column->index], 1); + if (error) { + return error; + } + } else if (r->old && r->new) { + struct ovsdb_datum added, removed; + + ovsdb_datum_added_removed(&added, &removed, + &r->old->fields[column->index], + &r->new->fields[column->index], + &column->type); + + error = ovsdb_txn_adjust_row_refs( + txn, r->old, column, &removed, -1); + ovsdb_datum_destroy(&removed, &column->type); + if (error) { + ovsdb_datum_destroy(&added, &column->type); + return OVSDB_WRAP_BUG("error decreasing refcount", error); + } + + error = ovsdb_txn_adjust_row_refs( + txn, r->new, column, &added, 1); + ovsdb_datum_destroy(&added, &column->type); if (error) { return error; } @@ -459,93 +508,125 @@ static struct ovsdb_error * ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row) { - struct ovsdb_weak_ref *weak, *next; + struct ovsdb_weak_ref *weak, *next, *dst_weak; + struct ovsdb_row *dst_row; - /* Remove the weak references originating in the old version of the row. */ - if (txn_row->old) { - LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->old->src_refs) { - ovs_list_remove(&weak->src_node); - ovs_list_remove(&weak->dst_node); - free(weak); + /* Find and clean up deleted references from destination rows. */ + LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { + dst_row = CONST_CAST(struct ovsdb_row *, + ovsdb_table_get_row(weak->dst_table, &weak->dst)); + if (dst_row) { + dst_weak = ovsdb_row_find_weak_ref(dst_row, weak); + hmap_remove(&dst_row->dst_refs, &dst_weak->dst_node); + ovs_assert(ovs_list_is_empty(&dst_weak->src_node)); + ovsdb_weak_ref_destroy(dst_weak); + } + ovs_list_remove(&weak->src_node); + ovs_list_init(&weak->src_node); + if (hmap_node_is_null(&weak->dst_node)) { + ovsdb_weak_ref_destroy(weak); } } - /* Although the originating rows have the responsibility of updating the - * weak references in the dst, it is possible that some source rows aren't - * part of the transaction. In that situation this row needs to move the - * list of incoming weak references from the old row into the new one. - */ - if (txn_row->old && txn_row->new) { - /* Move the incoming weak references from old to new. */ - ovs_list_push_back_all(&txn_row->new->dst_refs, - &txn_row->old->dst_refs); - } - - /* Insert the weak references originating in the new version of the row. */ - struct ovsdb_row *dst_row; - if (txn_row->new) { - LIST_FOR_EACH (weak, src_node, &txn_row->new->src_refs) { - /* dst_row MUST exist. */ - dst_row = CONST_CAST(struct ovsdb_row *, + /* Insert the weak references added in the new version of the row. */ + LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { + dst_row = CONST_CAST(struct ovsdb_row *, ovsdb_table_get_row(weak->dst_table, &weak->dst)); - ovs_list_insert(&dst_row->dst_refs, &weak->dst_node); - } + + ovs_assert(!ovsdb_row_find_weak_ref(dst_row, weak)); + hmap_insert(&dst_row->dst_refs, &weak->dst_node, + ovsdb_weak_ref_hash(weak)); + ovs_list_remove(&weak->src_node); + ovs_list_init(&weak->src_node); } return NULL; } static void -add_weak_ref(const struct ovsdb_row *src_, const struct ovsdb_row *dst_) +add_weak_ref(struct ovsdb_txn_row *txn_row, const struct ovsdb_row *dst_, + struct ovs_list *ref_list, + const union ovsdb_atom *key, const union ovsdb_atom *value, + bool by_key, const struct ovsdb_column *column) { - struct ovsdb_row *src = CONST_CAST(struct ovsdb_row *, src_); struct ovsdb_row *dst = CONST_CAST(struct ovsdb_row *, dst_); struct ovsdb_weak_ref *weak; - if (src == dst) { + if (txn_row->new == dst) { return; } - if (!ovs_list_is_empty(&dst->dst_refs)) { - /* Omit duplicates. */ - weak = CONTAINER_OF(ovs_list_back(&dst->dst_refs), - struct ovsdb_weak_ref, dst_node); - if (weak->src == src) { - return; - } - } - - weak = xmalloc(sizeof *weak); - weak->src = src; + weak = xzalloc(sizeof *weak); + weak->src_table = txn_row->new->table; + weak->src = *ovsdb_row_get_uuid(txn_row->new); weak->dst_table = dst->table; weak->dst = *ovsdb_row_get_uuid(dst); - /* The dst_refs list is updated at commit time. */ - ovs_list_init(&weak->dst_node); - ovs_list_push_back(&src->src_refs, &weak->src_node); + ovsdb_type_clone(&weak->type, &column->type); + ovsdb_atom_clone(&weak->key, key, column->type.key.type); + if (column->type.value.type != OVSDB_TYPE_VOID) { + ovsdb_atom_clone(&weak->value, value, column->type.value.type); + } + weak->by_key = by_key; + weak->column_idx = column->index; + hmap_node_nullify(&weak->dst_node); + ovs_list_push_back(ref_list, &weak->src_node); +} + +static void +find_and_add_weak_ref(struct ovsdb_txn_row *txn_row, + const union ovsdb_atom *key, + const union ovsdb_atom *value, + const struct ovsdb_column *column, + bool by_key, struct ovs_list *ref_list, + struct ovsdb_datum *not_found, bool *zero) +{ + const struct ovsdb_row *row = by_key + ? ovsdb_table_get_row(column->type.key.uuid.refTable, &key->uuid) + : ovsdb_table_get_row(column->type.value.uuid.refTable, &value->uuid); + + if (row) { + add_weak_ref(txn_row, row, ref_list, key, value, by_key, column); + } else if (not_found) { + if (uuid_is_zero(by_key ? &key->uuid : &value->uuid)) { + *zero = true; + } + ovsdb_datum_add_unsafe(not_found, key, value, &column->type, NULL); + } } static struct ovsdb_error * OVS_WARN_UNUSED_RESULT assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) { + struct ovsdb_weak_ref *weak, *next; struct ovsdb_table *table; struct shash_node *node; if (txn_row->old && !txn_row->new) { /* Mark rows that have weak references to 'txn_row' as modified, so - * that their weak references will get reassessed. */ - struct ovsdb_weak_ref *weak, *next; - - LIST_FOR_EACH_SAFE (weak, next, dst_node, &txn_row->old->dst_refs) { - if (!weak->src->txn_row) { - ovsdb_txn_row_modify(txn, weak->src); + * that their weak references will get reassessed. Adding all weak + * refs to 'deleted_ref' lists of their source rows, so they will be + * cleaned up from datums and deleted on commit. */ + + HMAP_FOR_EACH (weak, dst_node, &txn_row->old->dst_refs) { + struct ovsdb_txn_row *src_txn_row; + + src_txn_row = find_or_make_txn_row(txn, weak->src_table, + &weak->src); + if (!src_txn_row) { + /* Source row is also removed. */ + continue; } + ovs_assert(src_txn_row); + ovs_assert(ovs_list_is_empty(&weak->src_node)); + ovs_list_insert(&src_txn_row->deleted_refs, &weak->src_node); } } if (!txn_row->new) { - /* We don't have to do anything about references that originate at - * 'txn_row', because ovsdb_row_destroy() will remove those weak - * references. */ + /* Since all the atoms will be destroyed by the ovsdb_row_destroy(), + * there is no need to check them here. Source references queued + * into 'deleted_ref' while removing other rows will be cleaned up at + * commit time. */ return NULL; } @@ -553,50 +634,94 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) SHASH_FOR_EACH (node, &table->schema->columns) { const struct ovsdb_column *column = node->data; struct ovsdb_datum *datum = &txn_row->new->fields[column->index]; + struct ovsdb_datum added, removed, deleted_refs; unsigned int orig_n, i; bool zero = false; orig_n = datum->n; + /* Collecting all key-value pairs that references deleted rows. */ + ovsdb_datum_init_empty(&deleted_refs); + LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { + if (column->index == weak->column_idx) { + ovsdb_datum_add_unsafe(&deleted_refs, &weak->key, &weak->value, + &column->type, NULL); + ovs_list_remove(&weak->src_node); + ovs_list_init(&weak->src_node); + } + } + ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type, + column->type.value.type); + + /* Removing elements that references deleted rows. */ + ovsdb_datum_subtract(datum, &column->type, + &deleted_refs, &column->type); + ovsdb_datum_destroy(&deleted_refs, &column->type); + + /* Generating the difference between old and new data. */ + if (txn_row->old) { + ovsdb_datum_added_removed(&added, &removed, + &txn_row->old->fields[column->index], + datum, &column->type); + } else { + ovsdb_datum_init_empty(&removed); + ovsdb_datum_clone(&added, datum, &column->type); + } + + /* Checking added data and creating new references. */ + ovsdb_datum_init_empty(&deleted_refs); if (ovsdb_base_type_is_weak_ref(&column->type.key)) { - for (i = 0; i < datum->n; ) { - const struct ovsdb_row *row; - - row = ovsdb_table_get_row(column->type.key.uuid.refTable, - &datum->keys[i].uuid); - if (row) { - add_weak_ref(txn_row->new, row); - i++; - } else { - if (uuid_is_zero(&datum->keys[i].uuid)) { - zero = true; - } - ovsdb_datum_remove_unsafe(datum, i, &column->type); - } + for (i = 0; i < added.n; i++) { + find_and_add_weak_ref(txn_row, &added.keys[i], + added.values ? &added.values[i] : NULL, + column, true, &txn_row->added_refs, + &deleted_refs, &zero); } } if (ovsdb_base_type_is_weak_ref(&column->type.value)) { - for (i = 0; i < datum->n; ) { - const struct ovsdb_row *row; - - row = ovsdb_table_get_row(column->type.value.uuid.refTable, - &datum->values[i].uuid); - if (row) { - add_weak_ref(txn_row->new, row); - i++; - } else { - if (uuid_is_zero(&datum->values[i].uuid)) { - zero = true; - } - ovsdb_datum_remove_unsafe(datum, i, &column->type); - } + for (i = 0; i < added.n; i++) { + find_and_add_weak_ref(txn_row, &added.keys[i], + &added.values[i], + column, false, &txn_row->added_refs, + &deleted_refs, &zero); + } + } + if (deleted_refs.n) { + /* Removing all the references that doesn't point to valid rows. */ + ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type, + column->type.value.type); + ovsdb_datum_subtract(datum, &column->type, + &deleted_refs, &column->type); + ovsdb_datum_destroy(&deleted_refs, &column->type); + } + ovsdb_datum_destroy(&added, &column->type); + + /* Creating refs that needs to be removed on commit. This includes + * both: the references that got directly removed from the datum and + * references removed due to deletion of a referenced row. */ + if (ovsdb_base_type_is_weak_ref(&column->type.key)) { + for (i = 0; i < removed.n; i++) { + find_and_add_weak_ref(txn_row, &removed.keys[i], + removed.values + ? &removed.values[i] : NULL, + column, true, &txn_row->deleted_refs, + NULL, NULL); } } + if (ovsdb_base_type_is_weak_ref(&column->type.value)) { + for (i = 0; i < removed.n; i++) { + find_and_add_weak_ref(txn_row, &removed.keys[i], + &removed.values[i], + column, false, &txn_row->deleted_refs, + NULL, NULL); + } + } + ovsdb_datum_destroy(&removed, &column->type); + if (datum->n != orig_n) { bitmap_set1(txn_row->changed, column->index); - ovsdb_datum_sort_assert(datum, column->type.key.type); if (datum->n < column->type.n_min) { const struct uuid *row_uuid = ovsdb_row_get_uuid(txn_row->new); if (zero && !txn_row->old) { @@ -817,6 +942,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED, return NULL; } +static struct ovsdb_error * OVS_WARN_UNUSED_RESULT +count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) +{ + struct ovsdb_table *table = txn_row->table; + ssize_t n_atoms_old = 0, n_atoms_new = 0; + struct shash_node *node; + + SHASH_FOR_EACH (node, &table->schema->columns) { + const struct ovsdb_column *column = node->data; + const struct ovsdb_type *type = &column->type; + unsigned int idx = column->index; + + if (txn_row->old) { + n_atoms_old += txn_row->old->fields[idx].n; + if (type->value.type != OVSDB_TYPE_VOID) { + n_atoms_old += txn_row->old->fields[idx].n; + } + } + if (txn_row->new) { + n_atoms_new += txn_row->new->fields[idx].n; + if (type->value.type != OVSDB_TYPE_VOID) { + n_atoms_new += txn_row->new->fields[idx].n; + } + } + } + + txn->n_atoms += n_atoms_old + n_atoms_new; + txn->n_atoms_diff += n_atoms_new - n_atoms_old; + return NULL; +} + static struct ovsdb_error * OVS_WARN_UNUSED_RESULT update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row) { @@ -885,6 +1041,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn) return error; } + /* Count atoms. */ + error = for_each_txn_row(txn, count_atoms); + if (error) { + return OVSDB_WRAP_BUG("can't happen", error); + } + /* Update _version for rows that changed. */ error = for_each_txn_row(txn, update_version); if (error) { @@ -900,6 +1062,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn) struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned); ovs_list_init(&txn_cloned->txn_tables); txn_cloned->txnid = txn->txnid; + txn_cloned->n_atoms = txn->n_atoms; + txn_cloned->n_atoms_diff = txn->n_atoms_diff; struct ovsdb_txn_table *t; LIST_FOR_EACH (t, node, &txn->txn_tables) { @@ -958,6 +1122,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn) node->txn = ovsdb_txn_clone(txn); ovs_list_push_back(&txn->db->txn_history, &node->node); txn->db->n_txn_history++; + txn->db->n_txn_history_atoms += txn->n_atoms; } } @@ -968,6 +1133,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn) if (!ovsdb_txn_is_empty(txn)) { txn->db->run_triggers_now = txn->db->run_triggers = true; + txn->db->n_atoms += txn->n_atoms_diff; ovsdb_monitors_commit(txn->db, txn); ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs)); ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit)); @@ -1215,6 +1381,9 @@ ovsdb_txn_row_create(struct ovsdb_txn *txn, struct ovsdb_table *table, txn_row->n_refs = old ? old->n_refs : 0; txn_row->serial = serial - 1; + ovs_list_init(&txn_row->added_refs); + ovs_list_init(&txn_row->deleted_refs); + if (old) { old->txn_row = txn_row; } @@ -1423,12 +1592,20 @@ ovsdb_txn_history_run(struct ovsdb *db) if (!db->need_txn_history) { return; } - /* Remove old histories to limit the size of the history */ - while (db->n_txn_history > 100) { + /* Remove old histories to limit the size of the history. Removing until + * the number of ovsdb atoms in history becomes less than the number of + * atoms in the database, because it will be faster to just get a database + * snapshot than re-constructing changes from the history that big. + * Keeping at least one transaction to avoid sending UUID_ZERO as a last id + * if all entries got removed due to the size limit. */ + while (db->n_txn_history > 1 && + (db->n_txn_history > 100 || + db->n_txn_history_atoms > db->n_atoms)) { struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF( ovs_list_pop_front(&db->txn_history), struct ovsdb_txn_history_node, node); + db->n_txn_history_atoms -= txn_h_node->txn->n_atoms; ovsdb_txn_destroy_cloned(txn_h_node->txn); free(txn_h_node); db->n_txn_history--; @@ -1440,6 +1617,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history) { db->need_txn_history = need_txn_history; db->n_txn_history = 0; + db->n_txn_history_atoms = 0; ovs_list_init(&db->txn_history); } @@ -1458,4 +1636,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db) free(txn_h_node); } db->n_txn_history = 0; + db->n_txn_history_atoms = 0; } diff --git a/python/ovs/db/data.py b/python/ovs/db/data.py index 2a2102d6be..99bf80ed62 100644 --- a/python/ovs/db/data.py +++ b/python/ovs/db/data.py @@ -204,7 +204,7 @@ class Atom(object): else: return '.boolean = false' elif self.type == ovs.db.types.StringType: - return '.string = "%s"' % escapeCString(self.value) + return '.s = %s' % escapeCString(self.value) elif self.type == ovs.db.types.UuidType: return '.uuid = %s' % ovs.ovsuuid.to_c_assignment(self.value) @@ -563,16 +563,41 @@ class Datum(object): if n == 0: return ["static struct ovsdb_datum %s = { .n = 0 };"] - s = ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)] - for key in sorted(self.values): - s += [" { %s }," % key.cInitAtom(key)] - s += ["};"] + s = [] + if self.type.key.type == ovs.db.types.StringType: + s += ["static struct ovsdb_atom_string %s_key_strings[%d] = {" + % (name, n)] + for key in sorted(self.values): + s += [' { .string = "%s", .n_refs = 2 },' + % escapeCString(key.value)] + s += ["};"] + s += ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)] + for i in range(n): + s += [" { .s = &%s_key_strings[%d] }," % (name, i)] + s += ["};"] + else: + s = ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)] + for key in sorted(self.values): + s += [" { %s }," % key.cInitAtom(key)] + s += ["};"] if self.type.value: - s = ["static union ovsdb_atom %s_values[%d] = {" % (name, n)] - for k, v in sorted(self.values.items()): - s += [" { %s }," % v.cInitAtom(v)] - s += ["};"] + if self.type.value.type == ovs.db.types.StringType: + s += ["static struct ovsdb_atom_string %s_val_strings[%d] = {" + % (name, n)] + for k, v in sorted(self.values): + s += [' { .string = "%s", .n_refs = 2 },' + % escapeCString(v.value)] + s += ["};"] + s += ["static union ovsdb_atom %s_values[%d] = {" % (name, n)] + for i in range(n): + s += [" { .s = &%s_val_strings[%d] }," % (name, i)] + s += ["};"] + else: + s = ["static union ovsdb_atom %s_values[%d] = {" % (name, n)] + for k, v in sorted(self.values.items()): + s += [" { %s }," % v.cInitAtom(v)] + s += ["};"] s += ["static struct ovsdb_datum %s = {" % name] s += [" .n = %d," % n] diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py index ecae5e1432..87ee06cdef 100644 --- a/python/ovs/db/idl.py +++ b/python/ovs/db/idl.py @@ -1505,6 +1505,11 @@ class Transaction(object): if self != self.idl.txn: return self._status + if self.idl.state != Idl.IDL_S_MONITORING: + self._status = Transaction.TRY_AGAIN + self.__disassemble() + return self._status + # If we need a lock but don't have it, give up quickly. if self.idl.lock_name and not self.idl.has_lock: self._status = Transaction.NOT_LOCKED diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py index 626ae8fc44..3318a3b6f8 100644 --- a/python/ovs/db/types.py +++ b/python/ovs/db/types.py @@ -48,6 +48,16 @@ class AtomicType(object): def to_string(self): return self.name + def to_rvalue_string(self): + if self == StringType: + return 's->' + self.name + return self.name + + def to_lvalue_string(self): + if self == StringType: + return 's' + return self.name + def to_json(self): return self.name @@ -373,18 +383,7 @@ class BaseType(object): return "%(dst)s = *%(src)s;" % args return ("%(dst)s = %(src)s->header_.uuid;") % args elif self.type == StringType: - return "%(dst)s = xstrdup(%(src)s);" % args - else: - return "%(dst)s = %(src)s;" % args - - def assign_c_value_casting_away_const(self, dst, src, refTable=True): - args = {'dst': dst, 'src': src} - if self.ref_table_name: - if not refTable: - return "%(dst)s = *%(src)s;" % args - return ("%(dst)s = %(src)s->header_.uuid;") % args - elif self.type == StringType: - return "%(dst)s = CONST_CAST(char *, %(src)s);" % args + return "%(dst)s = ovsdb_atom_string_create(%(src)s);" % args else: return "%(dst)s = %(src)s;" % args diff --git a/python/ovs/poller.py b/python/ovs/poller.py index 3624ec8655..157719c3a4 100644 --- a/python/ovs/poller.py +++ b/python/ovs/poller.py @@ -26,9 +26,9 @@ if sys.platform == "win32": import ovs.winutils as winutils try: - from OpenSSL import SSL + import ssl except ImportError: - SSL = None + ssl = None try: from eventlet import patcher as eventlet_patcher @@ -73,7 +73,7 @@ class _SelectSelect(object): def register(self, fd, events): if isinstance(fd, socket.socket): fd = fd.fileno() - if SSL and isinstance(fd, SSL.Connection): + if ssl and isinstance(fd, ssl.SSLSocket): fd = fd.fileno() if sys.platform != 'win32': diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py index c4c6c87e9f..6b0d023ae3 100644 --- a/python/ovs/reconnect.py +++ b/python/ovs/reconnect.py @@ -44,7 +44,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return None @staticmethod @@ -56,7 +56,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return None @staticmethod @@ -68,7 +68,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered + fsm.backoff @staticmethod @@ -80,7 +80,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered + max(1000, fsm.backoff) @staticmethod @@ -92,13 +92,24 @@ class Reconnect(object): is_connected = True @staticmethod - def deadline(fsm): + def deadline(fsm, now): if fsm.probe_interval: base = max(fsm.last_activity, fsm.state_entered) expiration = base + fsm.probe_interval - if (fsm.last_receive_attempt is None or + if (now < expiration or + fsm.last_receive_attempt is None or fsm.last_receive_attempt >= expiration): + # We still have time before the expiration or the time has + # already passed and there was no activity. In the first + # case we need to wait for the expiration, in the second - + # we're already past the deadline. */ return expiration + else: + # Time has already passed, but we didn't attempt to receive + # anything. We need to wake up and try to receive even if + # nothing is pending, so we can update the expiration time + # or transition to a different state. + return now + 1 return None @staticmethod @@ -114,12 +125,15 @@ class Reconnect(object): is_connected = True @staticmethod - def deadline(fsm): + def deadline(fsm, now): if fsm.probe_interval: expiration = fsm.state_entered + fsm.probe_interval - if (fsm.last_receive_attempt is None or + if (now < expiration or + fsm.last_receive_attempt is None or fsm.last_receive_attempt >= expiration): return expiration + else: + return now + 1 return None @staticmethod @@ -134,7 +148,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered @staticmethod @@ -545,7 +559,7 @@ class Reconnect(object): returned if the "probe interval" is nonzero--see self.set_probe_interval()).""" - deadline = self.state.deadline(self) + deadline = self.state.deadline(self, now) if deadline is not None and now >= deadline: return self.state.run(self, now) else: @@ -562,7 +576,7 @@ class Reconnect(object): """Returns the number of milliseconds after which self.run() should be called if nothing else notable happens in the meantime, or None if this is currently unnecessary.""" - deadline = self.state.deadline(self) + deadline = self.state.deadline(self, now) if deadline is not None: remaining = deadline - now return max(0, remaining) diff --git a/python/ovs/socket_util.py b/python/ovs/socket_util.py index 3faa64e9d7..651012bf06 100644 --- a/python/ovs/socket_util.py +++ b/python/ovs/socket_util.py @@ -222,8 +222,7 @@ def inet_parse_active(target, default_port): return (host_name, port) -def inet_open_active(style, target, default_port, dscp): - address = inet_parse_active(target, default_port) +def inet_create_socket_active(style, address): try: is_addr_inet = is_valid_ipv4_address(address[0]) if is_addr_inet: @@ -235,23 +234,32 @@ def inet_open_active(style, target, default_port, dscp): except socket.error as e: return get_exception_errno(e), None + return family, sock + + +def inet_connect_active(sock, address, family, dscp): try: set_nonblocking(sock) set_dscp(sock, family, dscp) - try: - sock.connect(address) - except socket.error as e: - error = get_exception_errno(e) - if sys.platform == 'win32' and error == errno.WSAEWOULDBLOCK: - # WSAEWOULDBLOCK would be the equivalent on Windows - # for EINPROGRESS on Unix. - error = errno.EINPROGRESS - if error != errno.EINPROGRESS: - raise - return 0, sock + error = sock.connect_ex(address) + if error not in (0, errno.EINPROGRESS, errno.EWOULDBLOCK): + sock.close() + return error + return 0 except socket.error as e: sock.close() - return get_exception_errno(e), None + return get_exception_errno(e) + + +def inet_open_active(style, target, default_port, dscp): + address = inet_parse_active(target, default_port) + family, sock = inet_create_socket_active(style, address) + if sock is None: + return family, sock + error = inet_connect_active(sock, address, family, dscp) + if error: + return error, None + return 0, sock def get_exception_errno(e): diff --git a/python/ovs/stream.py b/python/ovs/stream.py index f5a520862c..ac5b0fd0c6 100644 --- a/python/ovs/stream.py +++ b/python/ovs/stream.py @@ -22,9 +22,9 @@ import ovs.socket_util import ovs.vlog try: - from OpenSSL import SSL + import ssl except ImportError: - SSL = None + ssl = None if sys.platform == 'win32': import ovs.winutils as winutils @@ -322,6 +322,12 @@ class Stream(object): The recv function will not block waiting for data to arrive. If no data have been received, it returns (errno.EAGAIN, "") immediately.""" + try: + return self._recv(n) + except socket.error as e: + return (ovs.socket_util.get_exception_errno(e), "") + + def _recv(self, n): retval = self.connect() if retval != 0: return (retval, "") @@ -331,10 +337,7 @@ class Stream(object): if sys.platform == 'win32' and self.socket is None: return self.__recv_windows(n) - try: - return (0, self.socket.recv(n)) - except socket.error as e: - return (ovs.socket_util.get_exception_errno(e), "") + return (0, self.socket.recv(n)) def __recv_windows(self, n): if self._read_pending: @@ -396,6 +399,12 @@ class Stream(object): Will not block. If no bytes can be immediately accepted for transmission, returns -errno.EAGAIN immediately.""" + try: + return self._send(buf) + except socket.error as e: + return -ovs.socket_util.get_exception_errno(e) + + def _send(self, buf): retval = self.connect() if retval != 0: return -retval @@ -409,10 +418,7 @@ class Stream(object): if sys.platform == 'win32' and self.socket is None: return self.__send_windows(buf) - try: - return self.socket.send(buf) - except socket.error as e: - return -ovs.socket_util.get_exception_errno(e) + return self.socket.send(buf) def __send_windows(self, buf): if self._write_pending: @@ -769,35 +775,42 @@ class SSLStream(Stream): def check_connection_completion(sock): try: return Stream.check_connection_completion(sock) - except SSL.SysCallError as e: + except ssl.SSLSyscallError as e: return ovs.socket_util.get_exception_errno(e) @staticmethod def needs_probes(): return True - @staticmethod - def verify_cb(conn, cert, errnum, depth, ok): - return ok - @staticmethod def _open(suffix, dscp): - error, sock = TCPStream._open(suffix, dscp) - if error: - return error, None + address = ovs.socket_util.inet_parse_active(suffix, 0) + family, sock = ovs.socket_util.inet_create_socket_active( + socket.SOCK_STREAM, address) + if sock is None: + return family, sock # Create an SSL context - ctx = SSL.Context(SSL.SSLv23_METHOD) - ctx.set_verify(SSL.VERIFY_PEER, SSLStream.verify_cb) - ctx.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3) + ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + ctx.verify_mode = ssl.CERT_REQUIRED + ctx.options |= ssl.OP_NO_SSLv2 + ctx.options |= ssl.OP_NO_SSLv3 # If the client has not set the SSL configuration files # exception would be raised. - ctx.use_privatekey_file(Stream._SSL_private_key_file) - ctx.use_certificate_file(Stream._SSL_certificate_file) ctx.load_verify_locations(Stream._SSL_ca_cert_file) + ctx.load_cert_chain(Stream._SSL_certificate_file, + Stream._SSL_private_key_file) + ssl_sock = ctx.wrap_socket(sock, do_handshake_on_connect=False) - ssl_sock = SSL.Connection(ctx, sock) - ssl_sock.set_connect_state() + # Connect + error = ovs.socket_util.inet_connect_active(ssl_sock, address, family, + dscp) + if not error: + try: + ssl_sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) + except socket.error as e: + ssl_sock.close() + return ovs.socket_util.get_exception_errno(e), None return error, ssl_sock def connect(self): @@ -809,40 +822,44 @@ class SSLStream(Stream): # TCP Connection is successful. Now do the SSL handshake try: self.socket.do_handshake() - except SSL.WantReadError: + except ssl.SSLWantReadError: return errno.EAGAIN - except SSL.SysCallError as e: + except ssl.SSLSyscallError as e: return ovs.socket_util.get_exception_errno(e) return 0 def recv(self, n): try: - return super(SSLStream, self).recv(n) - except SSL.WantReadError: + return super(SSLStream, self)._recv(n) + except ssl.SSLWantReadError: return (errno.EAGAIN, "") - except SSL.SysCallError as e: + except ssl.SSLSyscallError as e: return (ovs.socket_util.get_exception_errno(e), "") - except SSL.ZeroReturnError: + except ssl.SSLZeroReturnError: return (0, "") + except socket.error as e: + return (ovs.socket_util.get_exception_errno(e), "") def send(self, buf): try: - return super(SSLStream, self).send(buf) - except SSL.WantWriteError: + return super(SSLStream, self)._send(buf) + except ssl.SSLWantWriteError: return -errno.EAGAIN - except SSL.SysCallError as e: + except ssl.SSLSyscallError as e: + return -ovs.socket_util.get_exception_errno(e) + except socket.error as e: return -ovs.socket_util.get_exception_errno(e) def close(self): if self.socket: try: - self.socket.shutdown() - except SSL.Error: + self.socket.shutdown(socket.SHUT_RDWR) + except socket.error: pass return super(SSLStream, self).close() -if SSL: +if ssl: # Register SSL only if the OpenSSL module is available Stream.register_method("ssl", SSLStream) diff --git a/tests/alb.at b/tests/alb.at index 903238fcb2..67eb14f473 100644 --- a/tests/alb.at +++ b/tests/alb.at @@ -86,6 +86,52 @@ OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ALB - cross-numa]) +OVS_VSWITCHD_START([add-port br0 p0 \ + -- set Interface p0 type=dummy-pmd options:n_rxq=4 \ + -- set Interface p0 options:numa_id=0 \ + -- set Open_vSwitch . other_config:pmd-cpu-mask=0x3 \ + -- set open_vswitch . other_config:pmd-rxq-assign=group \ + -- set open_vswitch . other_config:pmd-rxq-isolate=false \ + -- set open_vswitch . other_config:pmd-auto-lb="true" \ + -- set open_vswitch . other_config:pmd-auto-lb-load-threshold=0], + [], [], [--dummy-numa 1,2,1,2]) +OVS_WAIT_UNTIL([grep "PMD auto load balance is enabled" ovs-vswitchd.log]) +AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg]) + +# no pinned rxqs - cross-numa pmd could change +get_log_next_line_num +ovs-appctl time/warp 600000 10000 +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) + +# all pinned rxqs - cross-numa pmd will not change +AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1,3:1']) +get_log_next_line_num +ovs-appctl time/warp 600000 10000 +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "Variance improvement 0%."]) + +# mix of pinned (non-isolated) and non-pinned rxqs - cross-numa pmd could change +AT_CHECK([ovs-vsctl remove Interface p0 other_config pmd-rxq-affinity]) +AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1']) +get_log_next_line_num +ovs-appctl time/warp 600000 10000 +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) + +# mix of pinned (isolated) and non-pinned rxqs - cross-numa pmd could change +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf]) +AT_CHECK([ovs-vsctl set Interface p0 options:n_rxq=6]) +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-isolate=true]) +get_log_next_line_num +ovs-appctl time/warp 600000 10000 +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ALB - PMD/RxQ assignment type]) OVS_VSWITCHD_START([add-port br0 p0 \ -- set Interface p0 type=dummy-pmd options:n_rxq=3 \ diff --git a/tests/classifier.at b/tests/classifier.at index cdcd72c156..f652b59837 100644 --- a/tests/classifier.at +++ b/tests/classifier.at @@ -129,6 +129,31 @@ Datapath actions: 3 OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"]) AT_CLEANUP +AT_SETUP([flow classifier - ipv6 ND dependency]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 +AT_DATA([flows.txt], [dnl + table=0,priority=100,ipv6,ipv6_src=1000::/10 actions=resubmit(,1) + table=0,priority=0 actions=NORMAL + table=1,priority=110,ipv6,ipv6_dst=1000::3 actions=resubmit(,2) + table=1,priority=100,ipv6,ipv6_dst=1000::4 actions=resubmit(,2) + table=1,priority=0 actions=NORMAL + table=2,priority=120,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1 actions=NORMAL + table=2,priority=100,tcp actions=NORMAL + table=2,priority=100,icmp6 actions=NORMAL + table=2,priority=0 actions=NORMAL +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +# test ICMPv6 echo request (which should have no nd_target field) +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=1,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,icmpv6_type=128,icmpv6_code=0"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=1,dl_src=f6:d2:b0:19:5e:7b,dl_dst=d2:49:19:91:78:fe,ipv6_src=1000::/10,ipv6_dst=1000::4,nw_ttl=0,nw_frag=no +Datapath actions: 100,2 +]) +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([conjunctive match]) AT_SETUP([single conjunctive match]) diff --git a/tests/drop-stats.at b/tests/drop-stats.at index f3e19cd83b..1d3af98dab 100644 --- a/tests/drop-stats.at +++ b/tests/drop-stats.at @@ -83,6 +83,9 @@ AT_CHECK([ ovs-ofctl -Oopenflow13 add-flows br0 flows.txt ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions ], [0], [ignore]) +ovs-appctl time/warp 15000 +AT_CHECK([ovs-appctl revalidator/wait]) + AT_CHECK([ ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ], [0], [ignore]) diff --git a/tests/flowgen.py b/tests/flowgen.py index 7ef32d13cb..cb0e9df388 100755 --- a/tests/flowgen.py +++ b/tests/flowgen.py @@ -135,7 +135,7 @@ def output(attrs): 12893) # urgent pointer if attrs['TP_PROTO'] == 'TCP+options': tcp = (tcp[:12] - + struct.pack('H', (6 << 12) | 0x02 | 0x10) + + struct.pack('>H', (6 << 12) | 0x02 | 0x10) + tcp[14:]) tcp += struct.pack('>BBH', 2, 4, 1975) # MSS option tcp += b'payload' @@ -166,15 +166,15 @@ def output(attrs): ip = ip[:2] + struct.pack('>H', len(ip)) + ip[4:] packet += ip if attrs['DL_HEADER'].startswith('802.2'): - packet_len = len(packet) + packet_len = len(packet) - 14 if flow['DL_VLAN'] != 0xffff: packet_len -= 4 packet = (packet[:len_ofs] + struct.pack('>H', packet_len) + packet[len_ofs + 2:]) - print(' '.join(['%s=%s' for k, v in attrs.items()])) - print(' '.join(['%s=%s' for k, v in flow.items()])) + print(' '.join(['%s=%s' % (k, v) for k, v in attrs.items()])) + print(' '.join(['%s=%s' % (k, v) for k, v in flow.items()])) print() flows.write(struct.pack('>LH', diff --git a/tests/library.at b/tests/library.at index 1702b7556b..e27d9e8bce 100644 --- a/tests/library.at +++ b/tests/library.at @@ -247,7 +247,7 @@ AT_CHECK([ovstest test-ofpbuf], [0], []) AT_CLEANUP AT_SETUP([rcu]) -AT_CHECK([ovstest test-rcu-quiesce], [0], []) +AT_CHECK([ovstest test-rcu], [0], []) AT_CLEANUP AT_SETUP([stopwatch module]) diff --git a/tests/mcast-snooping.at b/tests/mcast-snooping.at index 757cf7186e..fe475e7b38 100644 --- a/tests/mcast-snooping.at +++ b/tests/mcast-snooping.at @@ -216,3 +216,70 @@ AT_CHECK([ovs-appctl mdb/show br0], [0], [dnl ]) AT_CLEANUP + + +AT_SETUP([mcast - igmp flood for non-snoop enabled]) +OVS_VSWITCHD_START([]) + +AT_CHECK([ + ovs-vsctl set bridge br0 \ + datapath_type=dummy], [0]) + +add_of_ports br0 1 2 + +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) + +ovs-appctl time/stop + +dnl Basic scenario - needs to flood for IGMP followed by unicast ICMP +dnl in reverse direction +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ + '0101000c29a0aa55aa550001080046c00028000040000102d3494565eb4ae0000016940400002200f9020000000104000000e00000fb000000000000']) +AT_CHECK([ovs-appctl netdev-dummy/receive p2 \ + 'aa55aa5500010101000c29a008004500001c00010000400164dc0a0101010a0101020800f7ffffffffff']) + + +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -e .*ipv4 | sort | dnl + strip_stats | strip_used | strip_recirc | dnl + sed -e 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], + [0], [dnl +recirc_id(),in_port(1),eth(src=aa:55:aa:55:00:01,dst=01:01:00:0c:29:a0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:100,2 +recirc_id(),in_port(2),eth(src=01:01:00:0c:29:a0,dst=aa:55:aa:55:00:01),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:1 +]) + +ovs-appctl time/warp 100000 + +dnl Next we should clear the flows and install a complex case +AT_CHECK([ovs-ofctl del-flows br0]) + +AT_DATA([flows.txt], [dnl +table=0, arp actions=NORMAL +table=0, ip,in_port=1 actions=ct(table=1,zone=64000) +table=0, in_port=2 actions=output:1 +table=1, ip,ct_state=+trk+inv actions=drop +table=1 ip,in_port=1,icmp,ct_state=+trk+new actions=output:2 +table=1, in_port=1,ip,ct_state=+trk+new actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) +table=1, in_port=1,ip,ct_state=+trk+est actions=output:2 +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +ovs-appctl time/warp 100000 + +dnl Send the IGMP, followed by a unicast ICMP - ensure we won't black hole +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ + '0101000c29a0aa55aa550001080046c00028000040000102d3494565eb4ae0000016940400002200f9020000000104000000e00000fb000000000000']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ + 'aa55aa550001aa55aa55000208004500001c00010000400164dc0a0101010a0101020800f7ffffffffff']) + + +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -e .*ipv4 | sort | dnl + strip_stats | strip_used | strip_recirc | dnl + sed 's/pid=[[0-9]]*,// + s/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], + [0], [dnl +ct_state(+new-inv+trk),recirc_id(),in_port(1),eth_type(0x0800),ipv4(proto=1,frag=no), packets:0, bytes:0, used:never, actions:2 +ct_state(+new-inv+trk),recirc_id(),in_port(1),eth_type(0x0800),ipv4(proto=2,frag=no), packets:0, bytes:0, used:never, actions:userspace(controller(reason=1,dont_send=0,continuation=0,recirc_id=,rule_cookie=0,controller_id=0,max_len=65535)) +recirc_id(),in_port(1),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:0.0s, actions:ct(zone=64000),recirc() +]) + +AT_CLEANUP diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 956a69e1fa..43cded03b8 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -81,11 +81,12 @@ recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff: ovs-appctl netdev-dummy/set-admin-state p1 up ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -99,7 +100,6 @@ member p1: enabled member p2: enabled may_enable: true - ]) OVS_VSWITCHD_STOP @@ -129,11 +129,12 @@ ovs-appctl time/warp 100 OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'member p1: disabled'`"]) ovs-appctl netdev-dummy/set-admin-state p1 up ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -150,7 +151,6 @@ member p2: enabled member p3: enabled may_enable: true - ]) dnl Now delete the primary and verify that the output shows that the @@ -171,11 +171,12 @@ ovs-vsctl \ --id=@p1 create Interface name=p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ set Port bond0 interfaces="$uuids, @p1]" ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -192,17 +193,17 @@ member p2: enabled member p3: enabled may_enable: true - ]) dnl Switch to another primary ovs-vsctl set port bond0 other_config:bond-primary=p2 ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -211,25 +212,25 @@ active-backup primary: p2 member p1: enabled - active member may_enable: true member p2: enabled + active member may_enable: true member p3: enabled may_enable: true - ]) dnl Remove the "bond-primary" config directive from the bond. AT_CHECK([ovs-vsctl remove Port bond0 other_config bond-primary]) ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -238,15 +239,14 @@ active-backup primary: member p1: enabled - active member may_enable: true member p2: enabled + active member may_enable: true member p3: enabled may_enable: true - ]) OVS_VSWITCHD_STOP @@ -4862,6 +4862,54 @@ recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,fr OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif - handling of malformed TCP packets]) +OVS_VSWITCHD_START +add_of_ports br0 1 90 + +dnl drop packet has tcp port 0-f but allow other tcp packets +AT_DATA([flows.txt], [dnl +priority=75 tcp tp_dst=0/0xfff0 actions=drop +priority=50 tcp actions=output:1 +]) +AT_CHECK([ovs-ofctl replace-flows br0 flows.txt]) + +dnl good tcp pkt, tcp(sport=100,dpor=16) +pkt1="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a141414140064001000000000000000005002200053330000" + +dnl malformed tcp pkt(tcp_hdr < 20 byte), tcp(sport=100,dport=16,dataofs=1) +pkt2="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a141414140064001000000000000000001002200093330000" + +dnl malformed tcp pkt(tcp_hdr > pkt_len), tcp(sport=100,dport=16,dataofs=15) +pkt3="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a14141414006400100000000000000000f002200093330000" + +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt1"], [0], [stdout]) +dnl for good tcp pkt, ovs can extract the tp_dst=16 +AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=16/0xfff0), packets:0, bytes:0, used:never, actions:1 +]) + +AT_CHECK([ovs-appctl revalidator/purge], [0], [stdout]) +AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt2"], [0], [stdout]) +dnl for malformed tcp pkt(tcp_hdr < 20 byte), ovs uses default value tp_dst=0 +AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=0/0xfff0), packets:0, bytes:0, used:never, actions:drop +]) + +AT_CHECK([ovs-appctl revalidator/purge], [0], [stdout]) +AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt3"], [0], [stdout]) +dnl for malformed tcp pkt(tcp_hdr > pkt_len), ovs uses default value tp_dst=0 +AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=0/0xfff0), packets:0, bytes:0, used:never, actions:drop +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - exit]) OVS_VSWITCHD_START add_of_ports br0 1 2 3 10 11 12 13 14 @@ -5525,7 +5573,36 @@ check_flows () { echo "n_packets=$n" test "$n" = 1 } -OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0]) +OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + +# Checks for regression against a bug in which OVS crashed +# with in_port=OFPP_NONE or in_port=OFPP_CONTROLLER and +# recirculation is involved. +AT_SETUP([ofproto-dpif - packet-out recirculation with OFPP_NONE and OFPP_CONTROLLER]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 + +AT_DATA([flows.txt], [dnl +table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1) +table=1 ip actions=ct(commit),normal +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000 +AT_CHECK([ovs-ofctl packet-out br0 "in_port=none,packet=$packet actions=table"]) +AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller,packet=$packet actions=table"]) + +# Dumps out the flow table, extracts the number of packets that have gone +# through the (single) flow in table 1, and returns success if it's exactly 2. +check_flows () { + n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p') + echo "n_packets=$n" + test "$n" = 2 +} +OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -7524,13 +7601,28 @@ dnl configure bridge IPFIX and ensure that sample action generation works at the dnl datapath level. AT_SETUP([ofproto-dpif - Bridge IPFIX sanity check]) OVS_VSWITCHD_START -add_of_ports br0 1 2 +dnl first revalidation triggered by add interface +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +1 +]) + +add_of_ports br0 1 2 3 +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +2 +]) dnl Sample every packet using bridge-based sampling. AT_CHECK([ovs-vsctl -- set bridge br0 ipfix=@fix -- \ --id=@fix create ipfix targets=\"127.0.0.1:4739\" \ - sampling=1], [0], [ignore]) + sampling=2], [0], [ignore]) +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +3 +]) +AT_CHECK([ovs-vsctl set ipfix `ovs-vsctl get bridge br0 ipfix` sampling=1], [0]) +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +4 +]) dnl Send some packets that should be sampled. for i in `seq 1 3`; do AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800)']) @@ -7540,6 +7632,28 @@ flow-dump from the main thread: packets:2, bytes:68, used:0.001s, actions:userspace(pid=0,ipfix(output_port=4294967295)) ]) +AT_CHECK([ovs-appctl revalidator/purge]) + +dnl Check sample is performed even if only one of the ports is present. +AT_DATA([flows.txt], [dnl +table=0,in_port=3,tcp actions=load:0xffff->NXM_OF_IN_PORT[],ct(zone=1,table=1) +table=1,tcp, actions=output:2 +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +for i in `seq 1 3`; do + AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:08,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no)']) +done + +AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/.*\(packets:\)/\1/' | sed 's/used:[[0-9]].[[0-9]]*s/used:0.001s/'], [0], [dnl +flow-dump from the main thread: +packets:2, bytes:236, used:0.001s, actions:userspace(pid=0,ipfix(output_port=2)),2 +packets:2, bytes:236, used:0.001s, actions:userspace(pid=0,ipfix(output_port=4294967295)),ct(zone=1),recirc(0x1) +]) + +AT_CHECK([ovs-ofctl del-flows br0 in_port=3]) +AT_CHECK([ovs-ofctl del-flows br0 table=1]) + AT_CHECK([ovs-appctl revalidator/purge]) dnl dnl Add a slowpath meter. The userspace action should be metered. @@ -8591,6 +8705,34 @@ AT_CHECK([sed -n 's/=[[0-9]][[0-9]]\(\.[[0-9]][[0-9]]*\)\{0,1\}s/=?s/p' stdout], OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([ofproto-dpif - patch ports - meter (clone)]) + +OVS_VSWITCHD_START( + [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 -- \ + add-port br0 p1 -- set Interface p1 type=patch \ + options:peer=p2 ofport_request=2 -- \ + add-br br1 -- \ + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ + fail-mode=secure -- \ + add-port br1 p2 -- set Interface p2 type=patch \ + options:peer=p1 -- \ + add-port br1 p3 -- set Interface p3 type=dummy ofport_request=3]) + +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br1 'meter=1 pktps stats bands=type=drop rate=2']) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 in_port=local,ip,actions=2,1]) +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 in_port=1,ip,actions=meter:1,3]) + +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=10.1.1.22,dst=10.0.0.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=53295,dst=8080)'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: clone(meter(0),3),1 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + dnl ---------------------------------------------------------------------- AT_BANNER([ofproto-dpif -- megaflows]) @@ -9695,6 +9837,26 @@ OFPST_TABLE reply (OF1.3) (xid=0x2): OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif packet-out table meter drop]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 + +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps bands=type=drop rate=1']) +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 'in_port=1 action=meter:1,output:2']) + +ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)" +ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)" + +# Check that vswitchd hasn't crashed by dumping the meter added above +AT_CHECK([ovs-ofctl -O OpenFlow13 dump-meters br0 | ofctl_strip], [0], [dnl +OFPST_METER_CONFIG reply (OF1.3): +meter=1 pktps bands= +type=drop rate=1 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - ICMPv6]) OVS_VSWITCHD_START add_of_ports br0 1 @@ -11404,6 +11566,23 @@ Megaflow: recirc_id=0x3,eth,ip,in_port=1,nw_frag=no Datapath actions: 4 ]) +ovs-ofctl del-flows br0 + +AT_DATA([flows.txt], [dnl +table=0,in_port=1 actions=load:0x1->NXM_NX_REG1[[]],resubmit(,1),load:0x2->NXM_NX_REG1[[]],resubmit(,1),load:0x3->NXM_NX_REG1[[]],resubmit(,1) +table=1,in_port=1,reg1=0x1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,4) +table=1,in_port=1,reg1=0x2 actions=output:2 +table=1,in_port=1,reg1=0x3 actions=output:4 +table=4,in_port=1 actions=output:3 +]) + +AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) +AT_CHECK([cat stdout | grep Datapath -B1], [0], [dnl +Megaflow: recirc_id=0,eth,ip,in_port=1,nw_frag=no +Datapath actions: check_pkt_len(size=200,gt(3),le(3)),2,4 +]) + OVS_VSWITCHD_STOP AT_CLEANUP diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index 736d9809cb..f906b5c3b5 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -134,6 +134,21 @@ strip_ufid () { sed 's/mega_ufid:[[-0-9a-f]]* // s/ufid:[[-0-9a-f]]* //' } + +# Strips packets: and bytes: from output +strip_stats () { + sed 's/packets:[[0-9]]*/packets:0/ + s/bytes:[[0-9]]*/bytes:0/' +} + +# Changes all 'recirc(...)' and 'recirc=...' to say 'recirc()' and +# 'recirc=' respectively. This should make output easier to +# compare. +strip_recirc() { + sed 's/recirc_id([[x0-9]]*)/recirc_id()/ + s/recirc_id=[[x0-9]]*/recirc_id=/ + s/recirc([[x0-9]]*)/recirc()/' +} m4_divert_pop([PREPARE_TESTS]) m4_define([TESTABLE_LOG], [-vPATTERN:ANY:'%c|%p|%m']) diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at index 66545da572..e6c5bc6e94 100644 --- a/tests/ovs-macros.at +++ b/tests/ovs-macros.at @@ -259,7 +259,20 @@ dnl Executes shell COMMAND in a loop until it returns zero. If COMMAND does dnl not return zero within a reasonable time limit, executes the commands dnl in IF-FAILED (if provided) and fails the test. m4_define([OVS_WAIT_UNTIL], - [OVS_WAIT([$1], [$2], [AT_LINE], [until $1])]) + [AT_FAIL_IF([test "$#" -ge 3]) + dnl The second argument should not be a number (confused with AT_CHECK ?). + AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null]) + OVS_WAIT([$1], [$2], [AT_LINE], [until $1])]) + +dnl OVS_WAIT_UNTIL_EQUAL(COMMAND, OUTPUT) +dnl +dnl Executes shell COMMAND in a loop until it returns zero and the output +dnl equals OUTPUT. If COMMAND does not return zero or a desired output within +dnl a reasonable time limit, fails the test. +m4_define([OVS_WAIT_UNTIL_EQUAL], + [AT_FAIL_IF([test "$#" -ge 3]) + echo "$2" > wait_until_expected + OVS_WAIT_UNTIL([$1 | diff -u wait_until_expected - ])]) dnl OVS_WAIT_WHILE(COMMAND, [IF-FAILED]) dnl @@ -267,7 +280,10 @@ dnl Executes shell COMMAND in a loop until it returns nonzero. If COMMAND does dnl not return nonzero within a reasonable time limit, executes the commands dnl in IF-FAILED (if provided) and fails the test. m4_define([OVS_WAIT_WHILE], - [OVS_WAIT([if $1; then return 1; else return 0; fi], [$2], + [AT_FAIL_IF([test "$#" -ge 3]) + dnl The second argument should not be a number (confused with AT_CHECK ?). + AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null]) + OVS_WAIT([if $1; then return 1; else return 0; fi], [$2], [AT_LINE], [while $1])]) dnl OVS_APP_EXIT_AND_WAIT(DAEMON) diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index 604f15c2d1..c93cb9f16c 100644 --- a/tests/ovs-ofctl.at +++ b/tests/ovs-ofctl.at @@ -3246,3 +3246,22 @@ dnl because we need ovs-vswitchd to have the controller config before starting dnl the controller to 'snoop' the OpenFlow messages from beginning OVS_VSWITCHD_STOP(["/connection failed (No such file or directory)/d"]) AT_CLEANUP + + +AT_SETUP([ovs-ofctl show-flows - Oversized flow]) +OVS_VSWITCHD_START + +printf " priority=90,icmp,reg15=0x8005,metadata=0x1,nw_dst=11.0.0.1,icmp_type=8,icmp_code=0 actions=" > flow.txt +for i in `seq 1 1022`; do printf "set_field:0x399->reg13,set_field:0x$i->reg15,resubmit(,39),"; done >> flow.txt +printf "resubmit(,39)\n" >> flow.txt + +AT_CHECK([ovs-ofctl -O OpenFlow15 add-flows br0 flow.txt]) + +AT_CHECK([ovs-ofctl -O OpenFlow10 dump-flows br0 | ofctl_strip | sed '/NXST_FLOW/d' | sort], [0], []) +OVS_WAIT_UNTIL([grep -q "ofp_flow|WARN|Flow exceeded the maximum flow statistics reply size and was excluded from the response set" ovs-vswitchd.log]) + +cat flow.txt > expout +AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sed '/OFPST_FLOW/d' | sort], [0], [expout]) + +OVS_VSWITCHD_STOP(["/Flow exceeded the maximum flow statistics reply size and was excluded from the response set/d"]) +AT_CLEANUP diff --git a/tests/ovsdb-client.at b/tests/ovsdb-client.at index 06b671df8c..2d14f1ac26 100644 --- a/tests/ovsdb-client.at +++ b/tests/ovsdb-client.at @@ -3,6 +3,7 @@ AT_BANNER([OVSDB -- ovsdb-client commands]) AT_SETUP([ovsdb-client get-schema-version and get-schema-cksum]) AT_KEYWORDS([ovsdb client positive]) ordinal_schema > schema +on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) AT_CHECK([ovsdb-client get-schema-version unix:socket ordinals], [0], [5.1.3 @@ -14,6 +15,7 @@ AT_CLEANUP AT_SETUP([ovsdb-client needs-conversion (no conversion needed)]) AT_KEYWORDS([ovsdb client file positive]) +on_exit 'kill `cat *.pid`' ordinal_schema > schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) @@ -27,6 +29,7 @@ AT_SETUP([ovsdb-client needs-conversion (conversion needed)]) AT_KEYWORDS([ovsdb client file positive]) ordinal_schema > schema touch .db.~lock~ +on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) sed 's/5\.1\.3/5.1.4/' < schema > schema2 diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index fc6253cfe9..0f7076a052 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -400,6 +400,61 @@ done AT_CLEANUP +AT_BANNER([OVSDB - cluster failure while joining]) +AT_SETUP([OVSDB cluster - follower crash while joining]) +AT_KEYWORDS([ovsdb server negative unix cluster join]) + +n=3 +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +ordinal_schema > schema +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db dnl + $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) +cid=`ovsdb-tool db-cid s1.db` +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +for i in `seq 2 $n`; do + AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) +done + +on_exit 'kill `cat *.pid`' + +dnl Starting followers first, so we can configure them to crash on join. +for j in `seq $n`; do + i=$(($n + 1 - $j)) + AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl + --detach --no-chdir --log-file=s$i.log dnl + --pidfile=s$i.pid --unixctl=s$i dnl + --remote=punix:s$i.ovsdb s$i.db]) + if test $i != 1; then + OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i dnl + cluster/failure-test crash-before-sending-install-snapshot-reply dnl + | grep -q "engaged"]) + fi +done + +dnl Make sure that followers really crashed. +for i in `seq 2 $n`; do + OVS_WAIT_WHILE([test -s s$i.pid]) +done + +dnl Bring them back. +for i in `seq 2 $n`; do + AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl + --detach --no-chdir --log-file=s$i.log dnl + --pidfile=s$i.pid --unixctl=s$i dnl + --remote=punix:s$i.ovsdb s$i.db]) +done + +dnl Make sure that all servers joined the cluster. +for i in `seq $n`; do + AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) +done + +for i in `seq $n`; do + OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) +done + +AT_CLEANUP + OVS_START_SHELL_HELPERS @@ -413,12 +468,12 @@ ovsdb_cluster_failure_test () { if test "$crash_node" == "1"; then new_leader=$5 fi + log_grep=$6 cp $top_srcdir/vswitchd/vswitch.ovsschema schema schema=`ovsdb-tool schema-name schema` - AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl -ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns -]) + AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) + AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"]) n=3 join_cluster() { @@ -434,7 +489,7 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral start_server() { local i=$1 printf "\ns$i: starting\n" - AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) + AT_CHECK([ovsdb-server -vjsonrpc -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) } connect_server() { local i=$1 @@ -460,14 +515,23 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral fi AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore]) fi + + # Initializing the database separately to avoid extra 'wait' operation + # in later transactions. + AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait init], [0], [ignore], [ignore]) + AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore]) AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait create QoS type=x], [0], [ignore], [ignore]) - # Make sure that the node really crashed. - AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore]) - # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed). - if test "$remote_1" = "$crash_node"; then - db=unix:s$remote_2.ovsdb + # Make sure that the node really crashed or has specific log message. + if test -z "$log_grep"; then + AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore]) + # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed). + if test "$remote_1" = "$crash_node"; then + db=unix:s$remote_2.ovsdb + fi + else + OVS_WAIT_UNTIL([grep -q "$log_grep" s${crash_node}.log]) fi AT_CHECK([ovs-vsctl --db="$db" --no-leader-only --no-wait --columns=type --bare list QoS], [0], [x ]) @@ -563,6 +627,11 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update AT_CLEANUP +AT_SETUP([OVSDB cluster - txn on leader, leader transfers leadership after sending appendReq]) +AT_KEYWORDS([ovsdb server negative unix cluster pending-txn transfer]) +ovsdb_cluster_failure_test 1 2 1 transfer-leadership-after-sending-append-request -1 "Transferring leadership" +AT_CLEANUP + AT_SETUP([OVSDB cluster - competing candidates]) AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates]) @@ -629,9 +698,8 @@ ovsdb_torture_test () { local variant=$3 # 'kill' and restart or 'remove' and add cp $top_srcdir/vswitchd/vswitch.ovsschema schema schema=`ovsdb-tool schema-name schema` - AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl -ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns -]) + AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) + AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"]) join_cluster() { local i=$1 diff --git a/tests/ovsdb-data.at b/tests/ovsdb-data.at index 8cd2a26cb3..25c6acdac6 100644 --- a/tests/ovsdb-data.at +++ b/tests/ovsdb-data.at @@ -846,18 +846,21 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- integer], [[diff-data '["integer"]' '[0]' '[2]']], [[diff: 2 apply diff: 2 +apply diff in place: 2 OK]]) OVSDB_CHECK_POSITIVE([generate and apply diff -- boolean], [[diff-data '["boolean"]' '[true]' '[false]']], [[diff: false apply diff: false +apply diff in place: false OK]]) OVSDB_CHECK_POSITIVE([generate and apply diff -- string], [[diff-data '["string"]' '["AAA"]' '["BBB"]']], [[diff: "BBB" apply diff: "BBB" +apply diff in place: "BBB" OK]]) dnl Test set modifications. @@ -870,15 +873,19 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- set], ]], [[diff: ["set",[0,2]] apply diff: ["set",[1,2]] +apply diff in place: ["set",[1,2]] OK diff: 0 apply diff: 1 +apply diff in place: 1 OK diff: ["set",[0,1]] apply diff: ["set",[0,1]] +apply diff in place: ["set",[0,1]] OK diff: ["set",[0,1]] apply diff: ["set",[]] +apply diff in place: ["set",[]] OK]]) dnl Test set modifications causes data to violate set size constrain. @@ -898,18 +905,23 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- map], ]], [[diff: ["map",[["2 gills","1 chopin"],["2 pints","1 quart"]]] apply diff: ["map",[["2 pints","1 quart"]]] +apply diff in place: ["map",[["2 pints","1 quart"]]] OK diff: ["map",[]] apply diff: ["map",[["2 gills","1 chopin"]]] +apply diff in place: ["map",[["2 gills","1 chopin"]]] OK diff: ["map",[["2 gills","1 chopin"]]] apply diff: ["map",[]] +apply diff in place: ["map",[]] OK diff: ["map",[["2 pints","1 quart"]]] apply diff: ["map",[["2 pints","1 quart"]]] +apply diff in place: ["map",[["2 pints","1 quart"]]] OK diff: ["map",[["2 gills","1 gallon"]]] apply diff: ["map",[["2 gills","1 gallon"]]] +apply diff in place: ["map",[["2 gills","1 gallon"]]] OK]]) OVSDB_CHECK_NEGATIVE([generate and apply diff with map -- size error], diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 1386f13770..91d34d0de6 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -225,7 +225,7 @@ m4_define([OVSDB_CHECK_IDL_TCP6_MULTIPLE_REMOTES_PY], m4_define([OVSDB_CHECK_IDL_SSL_PY], [AT_SETUP([$1 - Python3 - SSL]) AT_SKIP_IF([test "$HAVE_OPENSSL" = no]) - $PYTHON3 -c "import OpenSSL.SSL" + $PYTHON3 -c "import ssl" SSL_PRESENT=$? AT_SKIP_IF([test $SSL_PRESENT != 0]) AT_KEYWORDS([ovsdb server idl positive Python with ssl socket $5]) @@ -2309,7 +2309,7 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect], 'condition simple [["i","==",2]]' \ 'condition simple [["i","==",1]]' \ '+reconnect' \ - '["idltest", + '?["idltest", {"op": "update", "table": "simple", "where": [["i", "==", 1]], @@ -2320,7 +2320,7 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect], 003: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 004: change conditions 005: reconnect -006: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +006: table simple 007: {"error":null,"result":[{"count":1}]} 008: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> 009: done diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at index ac243d6a79..e672c13b27 100644 --- a/tests/ovsdb-server.at +++ b/tests/ovsdb-server.at @@ -4,7 +4,7 @@ m4_define([OVSDB_SERVER_SHUTDOWN], [OVS_APP_EXIT_AND_WAIT_BY_TARGET([ovsdb-server], [ovsdb-server.pid])]) m4_define([OVSDB_SERVER_SHUTDOWN_N], - [cp pid$1 savepid$1 + [cp $1.pid savepid$1 AT_CHECK([ovs-appctl -t "`pwd`"/unixctl$1 -e exit], [0], [ignore], [ignore]) OVS_WAIT_WHILE([kill -0 `cat savepid$1`], [kill `cat savepid$1`])]) @@ -30,14 +30,13 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_KEYWORDS([ovsdb server positive unix $5]) $2 > schema AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -88,8 +87,7 @@ AT_CHECK([uuidfilt output], [0], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] -]], [], - [test ! -e pid || kill `cat pid`]) +]], []) AT_CLEANUP AT_SETUP([truncating database log with bad transaction]) @@ -136,8 +134,7 @@ AT_CHECK([uuidfilt output], [0], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] -]], [], - [test ! -e pid || kill `cat pid`]) +]], []) AT_CLEANUP dnl CHECK_DBS([databases]) @@ -159,6 +156,7 @@ ordinal_schema > schema1 constraint_schema > schema2 AT_CHECK([ovsdb-tool create db1 schema1], [0], [ignore], [ignore]) AT_CHECK([ovsdb-tool create db2 schema2], [0], [ignore], [ignore]) +on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:db.sock db1 db2], [0], [ignore], [ignore]) CHECK_DBS([constraints ordinals @@ -166,7 +164,7 @@ ordinals AT_CHECK( [[ovstest test-jsonrpc request unix:db.sock get_schema [\"nonexistent\"]]], [0], [[{"error":{"details":"get_schema request specifies unknown database nonexistent","error":"unknown database","syntax":"[\"nonexistent\"]"},"id":0,"result":null} -]], [], [test ! -e pid || kill `cat pid`]) +]], []) OVSDB_SERVER_SHUTDOWN AT_CLEANUP @@ -393,7 +391,7 @@ AT_CHECK( "table": "Manager", "uuid-name": "x", "row": {"target": "punix:socket2"}}]']], [0], [ignore], [ignore]) -on_exit 'kill `cat ovsdb-server.pid`' +on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore]) ovs-appctl -t ovsdb-server time/warp 6000 1000 AT_CHECK( @@ -686,6 +684,7 @@ ovsdb_check_online_compaction() { ovsdb-tool create-cluster db schema unix:s1.raft fi]) dnl Start ovsdb-server. + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server -vvlog:off -vconsole:off --detach --no-chdir --pidfile --remote=punix:socket --log-file db], [0]) AT_CHECK([ovsdb_client_wait unix:socket ordinals connected]) AT_CAPTURE_FILE([ovsdb-server.log]) @@ -832,7 +831,7 @@ _uuid name number <0> five 5 <1> four 4 <2> three 3 -], [], [test ! -e pid || kill `cat pid`]) +], []) OVSDB_SERVER_SHUTDOWN } OVS_END_SHELL_HELPERS @@ -1228,6 +1227,71 @@ AT_CHECK([test $logged_updates -lt $logged_nonblock_updates]) AT_CHECK_UNQUOTED([ovs-vsctl get open_vswitch . system_version], [0], [xyzzy$counter ]) +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) +AT_CLEANUP + +AT_SETUP([ovsdb-server transaction history size]) +on_exit 'kill `cat *.pid`' + +dnl Start an ovsdb-server with the clustered vswitchd schema. +AT_CHECK([ovsdb-tool create-cluster db dnl + $abs_top_srcdir/vswitchd/vswitch.ovsschema unix:s1.raft], + [0], [ignore], [ignore]) +AT_CHECK([ovsdb-server --detach --no-chdir --pidfile dnl + --log-file --remote=punix:db.sock db], + [0], [ignore], [ignore]) +AT_CHECK([ovs-vsctl --no-wait init]) + +dnl Create a bridge with N ports per transaction. Increase N every 4 +dnl iterations. And then remove the bridges. By increasing the size of +dnl transactions, ensuring that they take up a significant percentage of +dnl the total database size, so the transaction history will not be able +dnl to hold all of them. +dnl +dnl The test verifies that the number of atoms in the transaction history +dnl is always less than the number of atoms in the database, except for +dnl a case where there is only one transaction in a history. +get_memory_value () { + n=$(ovs-appctl -t ovsdb-server memory/show dnl + | tr ' ' '\n' | grep "^$1:" | cut -d ':' -f 2) + if test X"$n" == "X"; then + n=0 + fi + echo $n +} + +check_atoms () { + if test $(get_memory_value txn-history) -eq 1; then return; fi + n_db_atoms=$(get_memory_value atoms) + n_txn_history_atoms=$(get_memory_value txn-history-atoms) + echo "n_db_atoms: $n_db_atoms" + echo "n_txn_history_atoms: $n_txn_history_atoms" + AT_CHECK([test $n_txn_history_atoms -le $n_db_atoms]) +} + +add_ports () { + for j in $(seq 1 $2); do + printf " -- add-port br$1 p$1-%d" $j + done +} + +initial_db_atoms=$(get_memory_value atoms) + +for i in $(seq 1 100); do + cmd=$(add_ports $i $(($i / 4 + 1))) + AT_CHECK([ovs-vsctl --no-wait add-br br$i $cmd]) + check_atoms +done + +for i in $(seq 1 100); do + AT_CHECK([ovs-vsctl --no-wait del-br br$i]) + check_atoms +done + +dnl After removing all the bridges, the number of atoms in the database +dnl should return to its initial value. +AT_CHECK([test $(get_memory_value atoms) -eq $initial_db_atoms]) + OVS_APP_EXIT_AND_WAIT([ovsdb-server]) AT_CLEANUP @@ -1254,15 +1318,14 @@ m4_define([OVSDB_CHECK_EXECUTION], $2 > schema PKIDIR=$abs_top_builddir/tests AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:127.0.0.1 db], [0], [ignore], [ignore]) PARSE_LISTENING_PORT([ovsdb-server.log], [SSL_PORT]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -1291,16 +1354,15 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_SKIP_IF([test $HAVE_IPV6 = no]) $2 > schema PKIDIR=$abs_top_builddir/tests + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:[[::1]] db], [0], [ignore], [ignore]) PARSE_LISTENING_PORT([ovsdb-server.log], [SSL_PORT]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:[[::1]]:$SSL_PORT 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:[[::1]]:$SSL_PORT 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -1327,16 +1389,15 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_KEYWORDS([ovsdb server positive tcp $5]) $2 > schema PKIDIR=$abs_top_builddir/tests + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --remote=ptcp:0:127.0.0.1 db], [0], [ignore], [ignore]) PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -1364,16 +1425,15 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_SKIP_IF([test $HAVE_IPV6 = no]) $2 > schema PKIDIR=$abs_top_builddir/tests + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --remote=ptcp:0:[[::1]] db], [0], [ignore], [ignore]) PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client transact tcp:[[::1]]:$TCP_PORT 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client transact tcp:[[::1]]:$TCP_PORT 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -1453,9 +1513,9 @@ m4_define([OVSDB_CHECK_EXECUTION], target=4 $2 > schema schema_name=`ovsdb-tool schema-name schema` + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) - on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log dnl --pidfile --remote=punix:db1.sock db1 ], [0], [ignore], [ignore]) @@ -1511,12 +1571,11 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) i - on_exit 'test ! -e pid || kill `cat pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) - on_exit 'test ! -e pid2 || kill `cat pid2`' + AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) m4_foreach([txn], [$3], [AT_CHECK([ovsdb-client transact 'txn'], [0], [stdout], [ignore]) @@ -1557,11 +1616,10 @@ m4_define([OVSDB_CHECK_REPLICATION], AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) - on_exit 'test ! -e pid || kill `cat pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock --sync-exclude-tables=mydb:b db2], [0], [ignore], [ignore]) - on_exit 'test ! -e pid2 || kill `cat pid2`' + AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock --sync-exclude-tables=mydb:b db2], [0], [ignore], [ignore]) m4_foreach([txn], [$3], [AT_CHECK([ ovsdb-client transact 'txn' ], [0], [stdout], [ignore]) @@ -1629,6 +1687,7 @@ AT_CLEANUP #ovsdb-server/set-sync-exclude-tables command AT_SETUP([ovsdb-server/set-sync-exclude-tables]) +on_exit 'kill `cat *.pid`' AT_KEYWORDS([ovsdb server replication set-exclude-tables]) AT_SKIP_IF([test $DIFF_SUPPORTS_NORMAL_FORMAT = no]) @@ -1637,12 +1696,10 @@ AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-sync-exclude-tables mydb:b], [0], [ignore], [ignore], [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) +AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-sync-exclude-tables mydb:b], [0], [ignore], [ignore]) AT_CHECK([ovsdb-client transact unix:db.sock \ '[["mydb", @@ -1651,11 +1708,9 @@ AT_CHECK([ovsdb-client transact unix:db.sock \ "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "b", - "row": {"number": 1, "name": "one"}}]]'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) + "row": {"number": 1, "name": "one"}}]]'], [0], [stdout], [ignore]) -AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) +AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout], [ignore]) cat stdout > dump1 OVS_WAIT_UNTIL([ ovsdb-client dump unix:db2.sock | grep zero ]) AT_CHECK([ovsdb-client dump unix:db2.sock], [0], [stdout], [ignore]) @@ -1679,16 +1734,15 @@ AT_CLEANUP #ovsdb-server/connect-active-ovsdb-server AT_SETUP([ovsdb-server/connect-active-server]) +on_exit 'kill `cat *.pid`' AT_KEYWORDS([ovsdb server replication connect-active-server]) replication_schema > schema AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 db2], [0], [ignore], [ignore]) dnl Try to connect without specifying the active server. AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/connect-active-ovsdb-server], [0], @@ -1718,6 +1772,7 @@ AT_CLEANUP #ovsdb-server/disconnect-active-server command AT_SETUP([ovsdb-server/disconnect-active-server]) +on_exit 'kill `cat *.pid`' AT_KEYWORDS([ovsdb server replication disconnect-active-server]) AT_SKIP_IF([test $DIFF_SUPPORTS_NORMAL_FORMAT = no]) @@ -1726,10 +1781,8 @@ AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) AT_CHECK([ovsdb-client transact unix:db.sock \ '[["mydb", @@ -1775,7 +1828,7 @@ AT_CHECK([uuidfilt output], [0], [7,9c7,8 --- > _uuid name number > ----- ---- ------ -], [ignore], [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) +], [ignore]) dnl The backup server now become active, and can accept write transactions. AT_CHECK([ovsdb-client transact unix:db2.sock \ @@ -1826,13 +1879,12 @@ dnl Start both 'db1' and 'db2' in backup mode. Let them backup from each dnl other. This is not an supported operation state, but to simulate a start dnl up condition where an HA manger can select which one to be an active dnl server soon after. -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --sync-from=unix:db2.sock --active ], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' +on_exit 'kill `cat *.pid`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --sync-from=unix:db2.sock --active ], [0], [ignore], [ignore]) AT_CHECK([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/connect-active-ovsdb-server]) -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) dnl dnl make sure both servers reached the replication state @@ -1900,8 +1952,8 @@ AT_CHECK([ovsdb-tool transact db \ "row": {"number": 9, "name": "nine"}}]]'], [0], [ignore], [ignore]) dnl Start 'db', then try to be a back up server of itself. -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db --sync-from=unix:db.sock --active ], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' +on_exit 'kill `cat *.pid`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db --sync-from=unix:db.sock --active ], [0], [ignore], [ignore]) dnl Save the current content AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout]) @@ -1919,6 +1971,7 @@ AT_CHECK([diff dump1 dump2]) AT_CLEANUP AT_SETUP([ovsdb-server/read-only db:ptcp connection]) +on_exit 'kill `cat *.pid`' AT_KEYWORDS([ovsdb server read-only]) AT_DATA([schema], [[{"name": "mydb", @@ -2007,12 +2060,10 @@ AT_CHECK([ovsdb-tool transact db2 \ "row": {"number": 10, "name": "ten"}}]]'], [0], [ignore], [ignore]) dnl Start both 'db1' and 'db2'. -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' - +on_exit 'kill `cat *.pid`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore]) -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore]) OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/sync-status |grep active]) OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/sync-status |grep active]) @@ -2112,7 +2163,7 @@ dnl Starting a dummy server only to reserve some tcp port. AT_CHECK([cp db db.tmp]) AT_CHECK([ovsdb-server -vfile -vvlog:off --log-file=listener.log dnl --detach --no-chdir dnl - --pidfile=pid2 --unixctl=unixctl2 dnl + --pidfile=2.pid --unixctl=unixctl2 dnl --remote=ptcp:0:127.0.0.1 dnl db.tmp], [0], [stdout], [stderr]) PARSE_LISTENING_PORT([listener.log], [BAD_TCP_PORT]) diff --git a/tests/pmd.at b/tests/pmd.at index 225d4ee3a4..a7cbf9a81b 100644 --- a/tests/pmd.at +++ b/tests/pmd.at @@ -199,7 +199,7 @@ pmd thread numa_id core_id : OVS_VSWITCHD_STOP AT_CLEANUP -AT_SETUP([PMD - pmd-cpu-mask - NUMA]) +AT_SETUP([PMD - pmd-cpu-mask - dual NUMA]) OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy-pmd options:n_rxq=8 options:numa_id=1 -- set Open_vSwitch . other_config:pmd-cpu-mask=1], [], [], [--dummy-numa 1,1,0,0]) @@ -359,6 +359,44 @@ pmd thread numa_id 1 core_id 0: OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([PMD - pmd-cpu-mask - multi NUMA]) +OVS_VSWITCHD_START([add-port br0 p0 \ + -- set Interface p0 type=dummy-pmd options:n_rxq=4 \ + -- set Interface p0 options:numa_id=0 \ + -- set Open_vSwitch . other_config:pmd-cpu-mask=0xf \ + -- set open_vswitch . other_config:pmd-rxq-assign=cycles], + [], [], [--dummy-numa 1,2,1,2]) + +TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=group]) + +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using group algorithm"]) +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) + +# check all pmds from both non-local numas are assigned an rxq +AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) + +TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=cycles]) + +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using cycles algorithm"]) +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) + +# check all pmds from both non-local numas are assigned an rxq +AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) + +TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=roundrobin]) + +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using roundrobin algorithm"]) +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) + +# check all pmds from both non-local numas are assigned an rxq +AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([PMD - stats]) OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 ofport_request=7 type=dummy-pmd options:n_rxq=4], [], [], [DUMMY_NUMA]) diff --git a/tests/reconnect.at b/tests/reconnect.at index 0f74709f5a..5bca84351c 100644 --- a/tests/reconnect.at +++ b/tests/reconnect.at @@ -39,8 +39,19 @@ run connected # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) +# Timeout should be scheduled to the next probe interval. timeout +run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout +run + +# Short time advance past the original probe interval, but not expired still. +timeout +run # Now disable the receive-attempted feature and timeout again. receive-attempted LLONG_MAX @@ -67,18 +78,37 @@ connected last connected 0 ms ago, connected 0 ms total # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) -timeout - no timeout - -# Now disable the receive-attempted feature and timeout again. -receive-attempted LLONG_MAX +# Timeout should be scheduled to the next probe interval. timeout advance 5000 ms ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout + advance 1 ms + +### t=6001 ### + in ACTIVE for 5001 ms (0 ms backoff) +run + +# Short time advance past the original probe interval, but not expired still. +timeout + advance 1 ms + +### t=6002 ### + in ACTIVE for 5002 ms (0 ms backoff) +run + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX +timeout + advance 0 ms +run should send probe in IDLE for 0 ms (0 ms backoff) @@ -86,7 +116,7 @@ run timeout advance 5000 ms -### t=11000 ### +### t=11002 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect @@ -94,7 +124,7 @@ disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected - disconnected at 11000 ms (0 ms ago) + disconnected at 11002 ms (0 ms ago) ]) ###################################################################### @@ -111,8 +141,19 @@ run connected # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) +# Timeout should be scheduled to the next probe interval. +timeout +run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout +run + +# Short time advance past the original probe interval, but not expired still. timeout +run # Now disable the receive-attempted feature and timeout again. receive-attempted LLONG_MAX @@ -148,18 +189,37 @@ connected last connected 0 ms ago, connected 0 ms total # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) -timeout - no timeout - -# Now disable the receive-attempted feature and timeout again. -receive-attempted LLONG_MAX +# Timeout should be scheduled to the next probe interval. timeout advance 5000 ms ### t=6500 ### in ACTIVE for 5000 ms (0 ms backoff) run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout + advance 1 ms + +### t=6501 ### + in ACTIVE for 5001 ms (0 ms backoff) +run + +# Short time advance past the original probe interval, but not expired still. +timeout + advance 1 ms + +### t=6502 ### + in ACTIVE for 5002 ms (0 ms backoff) +run + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX +timeout + advance 0 ms +run should send probe in IDLE for 0 ms (0 ms backoff) @@ -167,7 +227,7 @@ run timeout advance 5000 ms -### t=11500 ### +### t=11502 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect @@ -175,7 +235,7 @@ disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected - disconnected at 11500 ms (0 ms ago) + disconnected at 11502 ms (0 ms ago) ]) ###################################################################### @@ -1271,14 +1331,14 @@ activity created 1000, last activity 3000, last connected 2000 # Connection times out. -timeout - no timeout -receive-attempted LLONG_MAX timeout advance 5000 ms ### t=8000 ### in ACTIVE for 6000 ms (1000 ms backoff) +receive-attempted LLONG_MAX +timeout + advance 0 ms run should send probe in IDLE for 0 ms (1000 ms backoff) diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at index 19a0b125b9..8b9f5c7525 100644 --- a/tests/system-common-macros.at +++ b/tests/system-common-macros.at @@ -281,6 +281,14 @@ m4_define([OVS_START_L7], # m4_define([OFPROTO_CLEAR_DURATION_IDLE], [[sed -e 's/duration=.*s,/duration=,/g' -e 's/idle_age=[0-9]*,/idle_age=,/g']]) +# OVS_CHECK_TUNNEL_TSO() +# +# Macro to be used in general tunneling tests that could be also +# used by system-tso. In that case, tunneling is not supported and +# the test should be skipped. +m4_define([OVS_CHECK_TUNNEL_TSO], + [m4_ifdef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)])]) + # OVS_CHECK_VXLAN() # # Do basic check for vxlan functionality, skip the test if it's not there. diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at index e0e750fde5..512aa87d4c 100644 --- a/tests/system-dpdk.at +++ b/tests/system-dpdk.at @@ -248,6 +248,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout]) AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl ]) +AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl +DPIF implementation set to dpif_avx512. +]) + AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl Miniflow extract implementation set to autovalidator. ]) @@ -275,6 +279,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout]) AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl ]) +AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl +DPIF implementation set to dpif_avx512. +]) + AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl Miniflow extract implementation set to autovalidator. ]) diff --git a/tests/system-route.at b/tests/system-route.at index 1714273e35..270956d13f 100644 --- a/tests/system-route.at +++ b/tests/system-route.at @@ -14,10 +14,9 @@ dnl Add ip address. AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout]) dnl Check that OVS catches route updates. -OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [0], [dnl -Cached: 10.0.0.17/24 dev p1-route SRC 10.0.0.17 -Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local -]) +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [dnl +Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17 +Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local]) dnl Delete ip address. AT_CHECK([ip addr del 10.0.0.17/24 dev p1-route], [0], [stdout]) diff --git a/tests/system-traffic.at b/tests/system-traffic.at index f400cfabc9..4c368eded4 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -218,6 +218,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over vxlan tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_VXLAN() OVS_TRAFFIC_VSWITCHD_START() @@ -258,7 +259,55 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([datapath - ping vlan over vxlan tunnel]) +OVS_CHECK_TUNNEL_TSO() +OVS_CHECK_VXLAN() + +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-underlay]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) + +ADD_NAMESPACES(at_ns0) + +dnl Set up underlay link from host into the namespace using veth pair. +ADD_VETH(p0, at_ns0, br-underlay, "172.31.2.1/24") +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) +AT_CHECK([ip link set dev br-underlay up]) + +dnl Set up tunnel endpoints on OVS outside the namespace and with a native +dnl linux device inside the namespace. +ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24]) +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.2.1.1/24], + [id 0 dstport 4789]) + +AT_CHECK([ovs-vsctl set port br0 tag=100]) +AT_CHECK([ovs-vsctl set port br-underlay tag=42]) + +ADD_VLAN(at_vxlan1, at_ns0, 100, "10.1.1.1/24") +ADD_VLAN(p0, at_ns0, 42, "172.31.1.1/24") + +dnl First, check the underlay +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +dnl Okay, now check the overlay with different packet sizes +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([datapath - ping over vxlan6 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_VXLAN_UDP6ZEROCSUM() OVS_TRAFFIC_VSWITCHD_START() @@ -302,6 +351,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over gre tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() @@ -343,6 +393,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6gre L2 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -383,6 +434,7 @@ AT_CLEANUP AT_SETUP([datapath - ping over erspan v1 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -419,6 +471,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over erspan v2 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -455,6 +508,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6erspan v1 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -494,6 +548,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6erspan v2 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -534,6 +589,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over geneve tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_GENEVE() OVS_TRAFFIC_VSWITCHD_START() @@ -575,6 +631,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over geneve tunnel, delete flow regression]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_GENEVE() OVS_TRAFFIC_VSWITCHD_START() @@ -629,6 +686,7 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d AT_CLEANUP AT_SETUP([datapath - flow resume with geneve tun_metadata]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_GENEVE() OVS_TRAFFIC_VSWITCHD_START() @@ -680,6 +738,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over geneve6 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_GENEVE_UDP6ZEROCSUM() OVS_TRAFFIC_VSWITCHD_START() @@ -723,6 +782,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over gre tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -769,6 +829,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over erspan v1 tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -817,6 +878,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over erspan v2 tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -870,6 +932,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6erspan v1 tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -925,6 +988,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6erspan v2 tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -1981,6 +2045,111 @@ tcp,orig=(src=10.1.1.3,dst=10.1.1.4,sport=,dport=),reply=(src= OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - zones from other field]) +CHECK_CONNTRACK() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. +AT_DATA([flows.txt], [dnl +priority=1,action=drop +priority=10,arp,action=normal +priority=10,icmp,action=normal +priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0) +priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_ZONE[]),2 +priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=5) +priority=100,in_port=2,ct_state=+trk,ct_zone=5,tcp,action=1 +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +OVS_START_L7([at_ns1], [http]) + +dnl HTTP requests from p0->p1 should work fine. +NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl +tcp,dnl +orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),dnl +reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=),dnl +zone=5,protoinfo=(state=) +]) + +dnl This is to test when the zoneid is set by a field variable like +dnl NXM_NX_CT_ZONE, the OVS xlate should generate a megaflow with a form of +dnl "ct_zone(5), ... actions: ct(commit, zone=5)". The match "ct_zone(5)" +dnl is needed as if we changes the zoneid into 15 in the following, the old +dnl "ct_zone(5), ... actions: ct(commit, zone=5)" megaflow will not get hit, +dnl and OVS will generate a new megaflow with the match "ct_zone(0xf)". +dnl This will make sure that the new packets are committing to zoneid 15 +dnl rather than old 5. +AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl + | grep "+trk" | grep -q "ct_zone(0x5)" ], [0], []) + +AT_CHECK([ovs-ofctl mod-flows br0 dnl + 'priority=100,ct_state=-trk,tcp,in_port="ovs-p0" actions=ct(table=0,zone=15)']) + +NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) + +AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl + | grep "+trk" | grep -q "ct_zone(0xf)" ], [0], []) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([conntrack - zones from other field, more tests]) +CHECK_CONNTRACK() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0. +AT_DATA([flows.txt], [dnl +priority=1,action=drop +priority=10,arp,action=normal +priority=10,icmp,action=normal +priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0,commit,exec(load:0xffff0005->NXM_NX_CT_LABEL[[0..31]])) +priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_LABEL[[0..15]]),2 +priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=5) +priority=100,in_port=2,ct_state=+trk,ct_zone=5,tcp,action=1 +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +OVS_START_L7([at_ns1], [http]) + +dnl HTTP requests from p0->p1 should work fine. +NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl +tcp,dnl +orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),dnl +reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=),dnl +zone=5,labels=0xffff0005,protoinfo=(state=) +]) + +AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl + | grep "+trk" | sed 's/0xffff0005\/0xffff/0x5\/0xffff/' dnl + | grep -q "ct_label(0x5/0xffff)" ], [0], []) + +AT_CHECK([ovs-ofctl mod-flows br0 'priority=100,ct_state=-trk,tcp,in_port="ovs-p0" actions=ct(table=0,zone=15,commit,exec(load:0xffff000f->NXM_NX_CT_LABEL[[0..31]]))']) + +NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) + +AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl + | grep "+trk" | sed 's/0xffff000f\/0xffff/0xf\/0xffff/' dnl + | grep -q "ct_label(0xf/0xffff)" ], [0], []) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - multiple bridges]) CHECK_CONNTRACK() OVS_TRAFFIC_VSWITCHD_START( @@ -3305,6 +3474,46 @@ NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - IPv4 Fragmentation + NAT]) +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) +CHECK_CONNTRACK() + +OVS_TRAFFIC_VSWITCHD_START( + [set-fail-mode br0 secure -- ]) + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.2.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.2.1.2/24") + +dnl Create a dummy route for NAT +NS_CHECK_EXEC([at_ns1], [ip addr add 10.1.1.2/32 dev lo]) +NS_CHECK_EXEC([at_ns0], [ip route add 10.1.1.0/24 via 10.2.1.2]) +NS_CHECK_EXEC([at_ns1], [ip route add 10.1.1.0/24 via 10.2.1.1]) + +dnl Solely for debugging when things go wrong +NS_EXEC([at_ns0], [tcpdump -l -n -xx -U -i p0 -w p0.pcap >tcpdump.out 2>/dev/null &]) +NS_EXEC([at_ns1], [tcpdump -l -n -xx -U -i p1 -w p1.pcap >tcpdump.out 2>/dev/null &]) + +AT_DATA([flows.txt], [dnl +table=0,arp,actions=normal +table=0,ct_state=-trk,ip,in_port=ovs-p0, actions=ct(table=1, nat) +table=0,ct_state=-trk,ip,in_port=ovs-p1, actions=ct(table=1, nat) +table=1,ct_state=+trk+new,ip,in_port=ovs-p0, actions=ct(commit, nat(src=10.1.1.1)),ovs-p1 +table=1,ct_state=+trk+est,ip,in_port=ovs-p0, actions=ovs-p1 +table=1,ct_state=+trk+est,ip,in_port=ovs-p1, actions=ovs-p0 +]) + +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +dnl Check connectivity +NS_CHECK_EXEC([at_ns0], [ping -c 1 10.1.1.2 -M dont -s 4500 | FORMAT_PING], [0], [dnl +1 packets transmitted, 1 received, 0% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - resubmit to ct multiple times]) CHECK_CONNTRACK() @@ -3464,15 +3673,15 @@ action=normal AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) -AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88deb13891389080803136161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl +AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88deb13891389080803136161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl "16161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161"dnl "61616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl "1616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161, actions=ct(table=1)"]) -AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88deb6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl +AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88deb6161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl "161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161, actions=ct(table=1)"]) -AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033d923861616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl +AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033d923861616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616"dnl "1616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161610a, actions=ct(table=1)"]) AT_CHECK([ovs-appctl dpctl/dump-flows | head -2 | tail -1 | grep -q -e ["]udp[(]src=5001["]]) @@ -5817,7 +6026,7 @@ on_exit 'ovs-appctl revalidator/purge' on_exit 'ovs-appctl dpif/dump-flows br0' dnl Should work with the virtual IP address through NAT -for i in 1 2 3 4 5 6 7 8 9 10 11 12; do +for i in $(seq 1 50); do echo Request $i NS_CHECK_EXEC([at_ns1], [wget 10.1.1.64 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) done @@ -6106,6 +6315,132 @@ AT_CHECK([ovs-ofctl dump-flows br0 | grep table=2, | OFPROTO_CLEAR_DURATION_IDLE OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - can match and clear ct_state from outside OVS]) +CHECK_CONNTRACK_LOCAL_STACK() +OVS_CHECK_TUNNEL_TSO() +OVS_CHECK_GENEVE() + +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-underlay], [set bridge br-underlay other-config:hwaddr=\"f0:00:00:01:01:02\"]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay "priority=100,ct_state=+trk,actions=ct_clear,resubmit(,0)"]) +AT_CHECK([ovs-ofctl add-flow br-underlay "priority=10,actions=normal"]) + +ADD_NAMESPACES(at_ns0) + +dnl Set up underlay link from host into the namespace using veth pair. +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24", "f0:00:00:01:01:01") +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) +AT_CHECK([ip link set dev br-underlay up]) + +dnl Set up tunnel endpoints on OVS outside the namespace and with a native +dnl linux device inside the namespace. +ADD_OVS_TUNNEL([geneve], [br0], [at_gnv0], [172.31.1.1], [10.1.1.100/24]) +ADD_NATIVE_TUNNEL([geneve], [ns_gnv0], [at_ns0], [172.31.1.100], [10.1.1.1/24], + [vni 0]) + +dnl First, check the underlay +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl Okay, now check the overlay +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl Confirm that the ct_state and ct_clear action found its way to the dp +AT_CHECK([ovs-appctl dpctl/dump-flows --names | grep ct_clear | sort | dnl + grep 'eth(src=f0:00:00:01:01:02,dst=f0:00:00:01:01:01)' | dnl + strip_stats | strip_used | dnl + sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], + [0], [dnl +recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f0:00:00:01:01:01),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:0.0s, actions:ct_clear,ovs-p0 +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_BANNER([IGMP]) + +AT_SETUP([IGMP - flood under normal action]) + +OVS_TRAFFIC_VSWITCHD_START() +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p1, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01") +ADD_VETH(p2, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02") + +AT_CHECK([ovs-ofctl add-flow br0 "actions=NORMAL"]) + +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 01 00 5e 01 01 03 dnl +f0 00 00 01 01 01 08 00 46 c0 00 28 00 00 40 00 01 02 d3 49 45 65 eb 4a e0 dnl +00 00 16 94 04 00 00 22 00 f9 02 00 00 00 01 04 00 00 00 e0 00 00 fb 00 00 dnl +00 00 00 00 > /dev/null]) + +AT_CHECK([ovs-appctl dpctl/dump-flows --names | grep -e .*ipv4 | sort | dnl + strip_stats | strip_used | strip_recirc | dnl + sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], + [0], [dnl +recirc_id(),in_port(ovs-p1),eth(src=f0:00:00:01:01:01,dst=01:00:5e:01:01:03),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:br0,ovs-p2 +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([IGMP - forward with ICMP]) + +OVS_TRAFFIC_VSWITCHD_START() +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p1, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01") +ADD_VETH(p2, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02") + +AT_DATA([flows.txt], [dnl +table=0, arp actions=NORMAL +table=0, ip,in_port=1 actions=ct(table=1,zone=64000) +table=0, in_port=2 actions=output:1 +table=1, ip,ct_state=+trk+inv actions=drop +table=1 ip,in_port=1,icmp,ct_state=+trk+new actions=output:2 +table=1, in_port=1,ip,ct_state=+trk+new actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) +table=1, in_port=1,ip,ct_state=+trk+est actions=output:2 +]) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +dnl Send the IGMP, followed by a unicast ICMP - ensure we won't black hole + +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 f0 00 00 01 01 02 dnl +f0 00 00 01 01 01 08 00 46 c0 00 28 00 00 40 00 01 02 d3 49 45 65 eb 4a e0 dnl +00 00 16 94 04 00 00 22 00 f9 02 00 00 00 01 04 00 00 00 e0 00 00 fb 00 00 dnl +00 00 00 00 > /dev/null]) + +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 f0 00 00 01 01 02 dnl +f0 00 00 01 01 01 08 00 45 00 00 1c 00 01 00 00 40 01 64 dc 0a 01 01 01 0a dnl +01 01 02 08 00 f7 ff ff ff ff ff > /dev/null]) + +sleep 1 + +dnl Prefer the OpenFlow rules, because different datapaths will behave slightly +dnl differently with respect to the exact dp rules. +dnl +dnl This is also why we clear n_bytes / n_packets - some kernels with ipv6 +dnl enabled will bump some of these counters non-deterministically + +AT_CHECK([ovs-ofctl dump-flows br0 | grep -v NXST | dnl + strip_duration | grep -v arp | grep -v n_packets=0 | dnl + grep -v 'in_port=2 actions=output:1' | dnl + sed 's/n_bytes=[[0-9]]*/n_bytes=0/ + s/idle_age=[[0-9]]*/idle_age=0/ + s/n_packets=[[1-9]]/n_packets=0/' | sort], [0], [dnl + cookie=0x0, table=0, n_packets=0, n_bytes=0, idle_age=0, ip,in_port=1 actions=ct(table=1,zone=64000) + cookie=0x0, table=1, n_packets=0, n_bytes=0, idle_age=0, ct_state=+new+trk,icmp,in_port=1 actions=output:2 + cookie=0x0, table=1, n_packets=0, n_bytes=0, idle_age=0, ct_state=+new+trk,ip,in_port=1 actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([802.1ad]) AT_SETUP([802.1ad - vlan_limit]) diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at index 406334f3e0..1a80047619 100644 --- a/tests/system-tso-macros.at +++ b/tests/system-tso-macros.at @@ -29,3 +29,5 @@ m4_define([CONFIGURE_VETH_OFFLOADS], [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])] [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])] ) + +m4_define([CHECK_SYSTEM_TSO], []) diff --git a/tests/test-cmap.c b/tests/test-cmap.c index 0705475606..f8cc4dd80a 100644 --- a/tests/test-cmap.c +++ b/tests/test-cmap.c @@ -74,6 +74,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, cmap_values[i++] = e->value; } assert(i == n); + assert(e == NULL); /* Here we test iteration with cmap_next_position() */ i = 0; @@ -107,6 +108,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, count += e->value == values[i]; } assert(count == 1); + assert(e == NULL); } /* Check that all the values are there in batched lookup. */ @@ -130,6 +132,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, CMAP_NODE_FOR_EACH (e, node, nodes[k]) { count += e->value == values[i + k]; } + assert(e == NULL); } assert(count == j); /* j elements in a batch. */ } diff --git a/tests/test-hindex.c b/tests/test-hindex.c index af06be5fcc..95e49284ee 100644 --- a/tests/test-hindex.c +++ b/tests/test-hindex.c @@ -265,6 +265,11 @@ test_hindex_for_each_safe(hash_func *hash) i = 0; n_remaining = n; HINDEX_FOR_EACH_SAFE (e, next, node, &hindex) { + if (hindex_next(&hindex, &e->node) == NULL) { + assert(next == NULL); + } else { + assert(&next->node == hindex_next(&hindex, &e->node)); + } assert(i < n); if (pattern & (1ul << e->value)) { size_t j; @@ -281,6 +286,7 @@ test_hindex_for_each_safe(hash_func *hash) i++; } assert(i == n); + assert(next == NULL); for (i = 0; i < n; i++) { if (pattern & (1ul << i)) { diff --git a/tests/test-hmap.c b/tests/test-hmap.c index 9259b0b3fc..47b4755386 100644 --- a/tests/test-hmap.c +++ b/tests/test-hmap.c @@ -62,6 +62,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n, hmap_values[i++] = e->value; } assert(i == n); + assert(e == NULL); memcpy(sort_values, values, sizeof *sort_values * n); qsort(sort_values, n, sizeof *sort_values, compare_ints); @@ -82,6 +83,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n, count += e->value == values[i]; } assert(count == 1); + assert(e == NULL); } /* Check counters. */ @@ -243,6 +245,11 @@ test_hmap_for_each_safe(hash_func *hash) i = 0; n_remaining = n; HMAP_FOR_EACH_SAFE (e, next, node, &hmap) { + if (hmap_next(&hmap, &e->node) == NULL) { + assert(next == NULL); + } else { + assert(&next->node == hmap_next(&hmap, &e->node)); + } assert(i < n); if (pattern & (1ul << e->value)) { size_t j; @@ -259,6 +266,8 @@ test_hmap_for_each_safe(hash_func *hash) i++; } assert(i == n); + assert(next == NULL); + assert(e == NULL); for (i = 0; i < n; i++) { if (pattern & (1ul << i)) { @@ -308,6 +317,7 @@ test_hmap_for_each_pop(hash_func *hash) i++; } assert(i == n); + assert(e == NULL); hmap_destroy(&hmap); } diff --git a/tests/test-json.c b/tests/test-json.c index a7ee595e0b..072a537252 100644 --- a/tests/test-json.c +++ b/tests/test-json.c @@ -22,6 +22,8 @@ #include #include #include "ovstest.h" +#include "random.h" +#include "timeval.h" #include "util.h" /* --pretty: If set, the JSON output is pretty-printed, instead of printed as @@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[]) } OVSTEST_REGISTER("test-json", test_json_main); + +static void +json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) +{ + struct { + int n; + int quote_probablility; + int special_probability; + int iter; + } configs[] = { + { 100000, 0, 0, 1000, }, + { 100000, 2, 1, 1000, }, + { 100000, 10, 1, 1000, }, + { 10000000, 0, 0, 100, }, + { 10000000, 2, 1, 100, }, + { 10000000, 10, 1, 100, }, + { 100000000, 0, 0, 10. }, + { 100000000, 2, 1, 10, }, + { 100000000, 10, 1, 10, }, + }; + + printf(" SIZE Q S TIME\n"); + printf("--------------------------------------\n"); + + for (int i = 0; i < ARRAY_SIZE(configs); i++) { + int iter = configs[i].iter; + int n = configs[i].n; + char *str = xzalloc(n); + + for (int j = 0; j < n - 1; j++) { + int r = random_range(100); + + if (r < configs[i].special_probability) { + str[j] = random_range(' ' - 1) + 1; + } else if (r < (configs[i].special_probability + + configs[i].quote_probablility)) { + str[j] = '"'; + } else { + str[j] = random_range(256 - ' ') + ' '; + } + } + + printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility, + configs[i].special_probability); + fflush(stdout); + + struct json *json = json_string_create_nocopy(str); + uint64_t start = time_msec(); + + char **res = xzalloc(iter * sizeof *res); + for (int j = 0; j < iter; j++) { + res[j] = json_to_string(json, 0); + } + + printf("%16.3lf ms\n", (double) (time_msec() - start) / iter); + json_destroy(json); + for (int j = 0; j < iter; j++) { + free(res[j]); + } + free(res); + } + + exit(0); +} + +OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main); diff --git a/tests/test-list.c b/tests/test-list.c index 6f1fb059bc..648e02a5e2 100644 --- a/tests/test-list.c +++ b/tests/test-list.c @@ -61,7 +61,7 @@ check_list(struct ovs_list *list, const int values[], size_t n) assert(e->value == values[i]); i++; } - assert(&e->node == list); + assert(e == NULL); assert(i == n); i = 0; @@ -70,7 +70,7 @@ check_list(struct ovs_list *list, const int values[], size_t n) assert(e->value == values[n - i - 1]); i++; } - assert(&e->node == list); + assert(e == NULL); assert(i == n); assert(ovs_list_is_empty(list) == !n); @@ -135,6 +135,13 @@ test_list_for_each_safe(void) values_idx = 0; n_remaining = n; LIST_FOR_EACH_SAFE (e, next, node, &list) { + /* "next" is valid as long as it's not pointing to &list. */ + if (&e->node == list.prev) { + assert(next == NULL); + } else { + assert(&next->node == e->node.next); + } + assert(i < n); if (pattern & (1ul << i)) { ovs_list_remove(&e->node); @@ -148,7 +155,8 @@ test_list_for_each_safe(void) i++; } assert(i == n); - assert(&e->node == &list); + assert(e == NULL); + assert(next == NULL); for (i = 0; i < n; i++) { if (pattern & (1ul << i)) { diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index daa55dab7b..57572cd3ed 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -512,6 +512,18 @@ do_diff_data(struct ovs_cmdl_context *ctx) ovs_fatal(0, "failed to apply diff"); } + /* Apply diff to 'old' in place. */ + error = ovsdb_datum_apply_diff_in_place(&old, &diff, &type); + if (error) { + char *string = ovsdb_error_to_string_free(error); + ovs_fatal(0, "%s", string); + } + + /* Test to make sure 'old' equals 'new' now. */ + if (!ovsdb_datum_equals(&new, &old, &type)) { + ovs_fatal(0, "failed to apply diff in place"); + } + /* Print diff */ json = ovsdb_datum_to_json(&diff, &type); printf ("diff: "); @@ -522,6 +534,11 @@ do_diff_data(struct ovs_cmdl_context *ctx) printf ("apply diff: "); print_and_free_json(json); + /* Print updated 'old' */ + json = ovsdb_datum_to_json(&old, &type); + printf ("apply diff in place: "); + print_and_free_json(json); + ovsdb_datum_destroy(&new, &type); ovsdb_datum_destroy(&old, &type); ovsdb_datum_destroy(&diff, &type); @@ -1862,7 +1879,8 @@ print_and_log(const char *format, ...) } static char * -format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents) +format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents, + bool terse) { const char *change_str = !ovsdb_idl_track_is_set(row->table) @@ -1873,9 +1891,13 @@ format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents) ? "deleted row: " : ""; - return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT, - step, row->table->class_->name, change_str, contents, - UUID_ARGS(&row->uuid)); + if (terse) { + return xasprintf("%03d: table %s", step, row->table->class_->name); + } else { + return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT, + step, row->table->class_->name, change_str, + contents, UUID_ARGS(&row->uuid)); + } } static void @@ -1998,7 +2020,7 @@ print_idl_row_updated_singleton(const struct idltest_singleton *sng, int step) } static void -print_idl_row_simple(const struct idltest_simple *s, int step) +print_idl_row_simple(const struct idltest_simple *s, int step, bool terse) { struct ds msg = DS_EMPTY_INITIALIZER; ds_put_format(&msg, "i=%"PRId64" r=%g b=%s s=%s u="UUID_FMT" ia=[", @@ -2025,7 +2047,7 @@ print_idl_row_simple(const struct idltest_simple *s, int step) } ds_put_cstr(&msg, "]"); - char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg)); + char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg), terse); print_and_log("%s", row_msg); ds_destroy(&msg); free(row_msg); @@ -2034,7 +2056,7 @@ print_idl_row_simple(const struct idltest_simple *s, int step) } static void -print_idl_row_link1(const struct idltest_link1 *l1, int step) +print_idl_row_link1(const struct idltest_link1 *l1, int step, bool terse) { struct ds msg = DS_EMPTY_INITIALIZER; ds_put_format(&msg, "i=%"PRId64" k=", l1->i); @@ -2053,7 +2075,7 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step) ds_put_format(&msg, "%"PRId64, l1->l2->i); } - char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg)); + char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg), terse); print_and_log("%s", row_msg); ds_destroy(&msg); free(row_msg); @@ -2062,7 +2084,7 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step) } static void -print_idl_row_link2(const struct idltest_link2 *l2, int step) +print_idl_row_link2(const struct idltest_link2 *l2, int step, bool terse) { struct ds msg = DS_EMPTY_INITIALIZER; ds_put_format(&msg, "i=%"PRId64" l1=", l2->i); @@ -2070,7 +2092,7 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step) ds_put_format(&msg, "%"PRId64, l2->l1->i); } - char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg)); + char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg), terse); print_and_log("%s", row_msg); ds_destroy(&msg); free(row_msg); @@ -2079,7 +2101,7 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step) } static void -print_idl_row_simple3(const struct idltest_simple3 *s3, int step) +print_idl_row_simple3(const struct idltest_simple3 *s3, int step, bool terse) { struct ds msg = DS_EMPTY_INITIALIZER; size_t i; @@ -2098,7 +2120,7 @@ print_idl_row_simple3(const struct idltest_simple3 *s3, int step) } ds_put_cstr(&msg, "]"); - char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg)); + char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg), terse); print_and_log("%s", row_msg); ds_destroy(&msg); free(row_msg); @@ -2107,12 +2129,12 @@ print_idl_row_simple3(const struct idltest_simple3 *s3, int step) } static void -print_idl_row_simple4(const struct idltest_simple4 *s4, int step) +print_idl_row_simple4(const struct idltest_simple4 *s4, int step, bool terse) { struct ds msg = DS_EMPTY_INITIALIZER; ds_put_format(&msg, "name=%s", s4->name); - char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg)); + char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg), terse); print_and_log("%s", row_msg); ds_destroy(&msg); free(row_msg); @@ -2121,7 +2143,7 @@ print_idl_row_simple4(const struct idltest_simple4 *s4, int step) } static void -print_idl_row_simple6(const struct idltest_simple6 *s6, int step) +print_idl_row_simple6(const struct idltest_simple6 *s6, int step, bool terse) { struct ds msg = DS_EMPTY_INITIALIZER; ds_put_format(&msg, "name=%s ", s6->name); @@ -2132,7 +2154,7 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step) } ds_put_cstr(&msg, "]"); - char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg)); + char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg), terse); print_and_log("%s", row_msg); ds_destroy(&msg); free(row_msg); @@ -2141,12 +2163,13 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step) } static void -print_idl_row_singleton(const struct idltest_singleton *sng, int step) +print_idl_row_singleton(const struct idltest_singleton *sng, int step, + bool terse) { struct ds msg = DS_EMPTY_INITIALIZER; ds_put_format(&msg, "name=%s", sng->name); - char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg)); + char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg), terse); print_and_log("%s", row_msg); ds_destroy(&msg); free(row_msg); @@ -2155,7 +2178,7 @@ print_idl_row_singleton(const struct idltest_singleton *sng, int step) } static void -print_idl(struct ovsdb_idl *idl, int step) +print_idl(struct ovsdb_idl *idl, int step, bool terse) { const struct idltest_simple3 *s3; const struct idltest_simple4 *s4; @@ -2167,31 +2190,31 @@ print_idl(struct ovsdb_idl *idl, int step) int n = 0; IDLTEST_SIMPLE_FOR_EACH (s, idl) { - print_idl_row_simple(s, step); + print_idl_row_simple(s, step, terse); n++; } IDLTEST_LINK1_FOR_EACH (l1, idl) { - print_idl_row_link1(l1, step); + print_idl_row_link1(l1, step, terse); n++; } IDLTEST_LINK2_FOR_EACH (l2, idl) { - print_idl_row_link2(l2, step); + print_idl_row_link2(l2, step, terse); n++; } IDLTEST_SIMPLE3_FOR_EACH (s3, idl) { - print_idl_row_simple3(s3, step); + print_idl_row_simple3(s3, step, terse); n++; } IDLTEST_SIMPLE4_FOR_EACH (s4, idl) { - print_idl_row_simple4(s4, step); + print_idl_row_simple4(s4, step, terse); n++; } IDLTEST_SIMPLE6_FOR_EACH (s6, idl) { - print_idl_row_simple6(s6, step); + print_idl_row_simple6(s6, step, terse); n++; } IDLTEST_SINGLETON_FOR_EACH (sng, idl) { - print_idl_row_singleton(sng, step); + print_idl_row_singleton(sng, step, terse); n++; } if (!n) { @@ -2200,7 +2223,7 @@ print_idl(struct ovsdb_idl *idl, int step) } static void -print_idl_track(struct ovsdb_idl *idl, int step) +print_idl_track(struct ovsdb_idl *idl, int step, bool terse) { const struct idltest_simple3 *s3; const struct idltest_simple4 *s4; @@ -2211,27 +2234,27 @@ print_idl_track(struct ovsdb_idl *idl, int step) int n = 0; IDLTEST_SIMPLE_FOR_EACH_TRACKED (s, idl) { - print_idl_row_simple(s, step); + print_idl_row_simple(s, step, terse); n++; } IDLTEST_LINK1_FOR_EACH_TRACKED (l1, idl) { - print_idl_row_link1(l1, step); + print_idl_row_link1(l1, step, terse); n++; } IDLTEST_LINK2_FOR_EACH_TRACKED (l2, idl) { - print_idl_row_link2(l2, step); + print_idl_row_link2(l2, step, terse); n++; } IDLTEST_SIMPLE3_FOR_EACH_TRACKED (s3, idl) { - print_idl_row_simple3(s3, step); + print_idl_row_simple3(s3, step, terse); n++; } IDLTEST_SIMPLE4_FOR_EACH_TRACKED (s4, idl) { - print_idl_row_simple4(s4, step); + print_idl_row_simple4(s4, step, terse); n++; } IDLTEST_SIMPLE6_FOR_EACH_TRACKED (s6, idl) { - print_idl_row_simple6(s6, step); + print_idl_row_simple6(s6, step, terse); n++; } @@ -2634,6 +2657,13 @@ do_idl(struct ovs_cmdl_context *ctx) char *arg = ctx->argv[i]; struct jsonrpc_msg *request, *reply; + bool terse = false; + if (*arg == '?') { + /* We're only interested in terse table contents. */ + terse = true; + arg++; + } + if (*arg == '+') { /* The previous transaction didn't change anything. */ arg++; @@ -2654,10 +2684,10 @@ do_idl(struct ovs_cmdl_context *ctx) /* Print update. */ if (track) { - print_idl_track(idl, step++); + print_idl_track(idl, step++, terse); ovsdb_idl_track_clear(idl); } else { - print_idl(idl, step++); + print_idl(idl, step++, terse); } } seqno = ovsdb_idl_get_seqno(idl); @@ -2710,7 +2740,7 @@ do_idl(struct ovs_cmdl_context *ctx) ovsdb_idl_wait(idl); poll_block(); } - print_idl(idl, step++); + print_idl(idl, step++, false); ovsdb_idl_track_clear(idl); ovsdb_idl_destroy(idl); print_and_log("%03d: done", step); @@ -2727,13 +2757,15 @@ print_idl_row_simple2(const struct idltest_simple2 *s, int step) printf("%03d: name=%s smap=[", step, s->name); for (i = 0; i < smap->n; i++) { - printf("[%s : %s]%s", smap->keys[i].string, smap->values[i].string, - i < smap->n-1? ",": ""); + printf("[%s : %s]%s", + smap->keys[i].s->string, smap->values[i].s->string, + i < smap->n - 1 ? "," : ""); } printf("] imap=["); for (i = 0; i < imap->n; i++) { - printf("[%"PRId64" : %s]%s", imap->keys[i].integer, imap->values[i].string, - i < imap->n-1? ",":""); + printf("[%"PRId64" : %s]%s", + imap->keys[i].integer, imap->values[i].s->string, + i < imap->n - 1 ? "," : ""); } printf("]\n"); } @@ -2802,8 +2834,8 @@ do_idl_partial_update_map_column(struct ovs_cmdl_context *ctx) myTxn = ovsdb_idl_txn_create(idl); smap = idltest_simple2_get_smap(myRow, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING); - strcpy(key_to_delete, smap->keys[0].string); - idltest_simple2_update_smap_delkey(myRow, smap->keys[0].string); + ovs_strlcpy(key_to_delete, smap->keys[0].s->string, sizeof key_to_delete); + idltest_simple2_update_smap_delkey(myRow, smap->keys[0].s->string); ovsdb_idl_txn_commit_block(myTxn); ovsdb_idl_txn_destroy(myTxn); ovsdb_idl_get_initial_snapshot(idl); @@ -2829,7 +2861,7 @@ dump_simple3(struct ovsdb_idl *idl, int step) { IDLTEST_SIMPLE3_FOR_EACH(myRow, idl) { - print_idl_row_simple3(myRow, step); + print_idl_row_simple3(myRow, step, false); } } @@ -2971,7 +3003,7 @@ do_idl_compound_index_with_ref(struct ovs_cmdl_context *ctx) idltest_simple3_index_set_uref(equal, &myRow2, 1); printf("%03d: Query using index with reference\n", step++); IDLTEST_SIMPLE3_FOR_EACH_EQUAL (myRow, equal, index) { - print_idl_row_simple3(myRow, step++); + print_idl_row_simple3(myRow, step++, false); } idltest_simple3_index_destroy_row(equal); diff --git a/tests/test-ovsdb.py b/tests/test-ovsdb.py index 5bc0bf6814..853264f22b 100644 --- a/tests/test-ovsdb.py +++ b/tests/test-ovsdb.py @@ -232,75 +232,87 @@ def get_singleton_table_printable_row(row): return "name=%s" % row.name -def print_row(table, row, step, contents): - s = "%03d: table %s: %s " % (step, table, contents) - s += get_simple_printable_row_string(row, ["uuid"]) +def print_row(table, row, step, contents, terse): + if terse: + s = "%03d: table %s" % (step, table) + else: + s = "%03d: table %s: %s " % (step, table, contents) + s += get_simple_printable_row_string(row, ["uuid"]) print(s) -def print_idl(idl, step): +def print_idl(idl, step, terse=False): n = 0 if "simple" in idl.tables: simple = idl.tables["simple"].rows for row in simple.values(): print_row("simple", row, step, - get_simple_table_printable_row(row)) + get_simple_table_printable_row(row), + terse) n += 1 if "simple2" in idl.tables: simple2 = idl.tables["simple2"].rows for row in simple2.values(): print_row("simple2", row, step, - get_simple2_table_printable_row(row)) + get_simple2_table_printable_row(row), + terse) n += 1 if "simple3" in idl.tables: simple3 = idl.tables["simple3"].rows for row in simple3.values(): print_row("simple3", row, step, - get_simple3_table_printable_row(row)) + get_simple3_table_printable_row(row), + terse) n += 1 if "simple4" in idl.tables: simple4 = idl.tables["simple4"].rows for row in simple4.values(): print_row("simple4", row, step, - get_simple4_table_printable_row(row)) + get_simple4_table_printable_row(row), + terse) n += 1 if "simple5" in idl.tables: simple5 = idl.tables["simple5"].rows for row in simple5.values(): print_row("simple5", row, step, - get_simple5_table_printable_row(row)) + get_simple5_table_printable_row(row), + terse) n += 1 if "simple6" in idl.tables: simple6 = idl.tables["simple6"].rows for row in simple6.values(): print_row("simple6", row, step, - get_simple6_table_printable_row(row)) + get_simple6_table_printable_row(row), + terse) n += 1 if "link1" in idl.tables: l1 = idl.tables["link1"].rows for row in l1.values(): print_row("link1", row, step, - get_link1_table_printable_row(row)) + get_link1_table_printable_row(row), + terse) n += 1 if "link2" in idl.tables: l2 = idl.tables["link2"].rows for row in l2.values(): print_row("link2", row, step, - get_link2_table_printable_row(row)) + get_link2_table_printable_row(row), + terse) n += 1 if "singleton" in idl.tables: sng = idl.tables["singleton"].rows for row in sng.values(): print_row("singleton", row, step, - get_singleton_table_printable_row(row)) + get_singleton_table_printable_row(row), + terse) n += 1 if not n: @@ -701,6 +713,12 @@ def do_idl(schema_file, remote, *commands): step += 1 for command in commands: + terse = False + if command.startswith("?"): + # We're only interested in terse table contents. + terse = True + command = command[1:] + if command.startswith("+"): # The previous transaction didn't change anything. command = command[1:] @@ -714,7 +732,7 @@ def do_idl(schema_file, remote, *commands): rpc.wait(poller) poller.block() - print_idl(idl, step) + print_idl(idl, step, terse) step += 1 seqno = idl.change_seqno diff --git a/tests/test-rcu.c b/tests/test-rcu.c index 965f3c49f3..bb17092bf0 100644 --- a/tests/test-rcu.c +++ b/tests/test-rcu.c @@ -35,7 +35,7 @@ quiescer_main(void *aux OVS_UNUSED) } static void -test_rcu_quiesce(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) +test_rcu_quiesce(void) { pthread_t quiescer; @@ -48,4 +48,29 @@ test_rcu_quiesce(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) xpthread_join(quiescer, NULL); } -OVSTEST_REGISTER("test-rcu-quiesce", test_rcu_quiesce); +static void +add_count(void *_count) +{ + unsigned *count = (unsigned *)_count; + (*count) ++; +} + +static void +test_rcu_barrier(void) +{ + unsigned count = 0; + for (int i = 0; i < 10; i ++) { + ovsrcu_postpone(add_count, &count); + } + + ovsrcu_barrier(); + ovs_assert(count == 10); +} + +static void +test_rcu(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { + test_rcu_quiesce(); + test_rcu_barrier(); +} + +OVSTEST_REGISTER("test-rcu", test_rcu); diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at index 59723e63b8..c7665a1aeb 100644 --- a/tests/tunnel-push-pop-ipv6.at +++ b/tests/tunnel-push-pop-ipv6.at @@ -432,6 +432,42 @@ AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535)) ]) +dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc1244cafe86dd60000000003a11402001cafe0000000000000000000000922001cafe000000000000000000000088c85312b5003abc700c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172']) + +ovs-appctl time/warp 1000 +ovs-appctl time/warp 1000 + +dnl Check VXLAN tunnel push +AT_CHECK([ovs-ofctl add-flow int-br action=2]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:ca:fe,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1) +]) + +AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl +2001:cafe::92 f8:bc:12:44:ca:fe br0 +2001:cafe::93 f8:bc:12:44:34:b7 br0 +]) + +dnl Restore and check the cache entries +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc124434b686dd60000000003a11402001cafe0000000000000000000000922001cafe000000000000000000000088c85312b5003abc700c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172']) + +ovs-appctl time/warp 1000 +ovs-appctl time/warp 1000 + +dnl Check VXLAN tunnel push +AT_CHECK([ovs-ofctl add-flow int-br action=2]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1) +]) + +AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl +2001:cafe::92 f8:bc:12:44:34:b6 br0 +2001:cafe::93 f8:bc:12:44:34:b7 br0 +]) + ovs-appctl time/warp 10000 AT_CHECK([ovs-vsctl del-port int-br t3 \ diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index 48c5de9d19..a441de3ef2 100644 --- a/tests/tunnel-push-pop.at +++ b/tests/tunnel-push-pop.at @@ -499,6 +499,28 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port [[37]]' | sort], [0], [dnl port 7: rx pkts=5, bytes=434, drop=?, errs=?, frame=?, over=?, crc=? ]) +dnl Send out packets received from L3GRE tunnel back to L3GRE tunnel +AT_CHECK([ovs-ofctl del-flows int-br]) +AT_CHECK([ovs-ofctl add-flow int-br "in_port=7,actions=set_field:3->in_port,7"]) +AT_CHECK([ovs-vsctl -- set Interface br0 options:pcap=br0.pcap]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) + +ovs-appctl time/warp 1000 + +AT_CHECK([ovs-pcap p0.pcap > p0.pcap.txt 2>&1]) +AT_CHECK([tail -6 p0.pcap.txt], [0], [dnl +aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +]) + + dnl Check decapsulation of Geneve packet with options AT_CAPTURE_FILE([ofctl_monitor.log]) AT_CHECK([ovs-ofctl monitor int-br 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) @@ -518,8 +540,43 @@ icmp,vlan_tci=0x0000,dl_src=be:b6:f4:e1:49:4a,dl_dst=fe:71:d8:83:72:4f,nw_src=30 AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], [0], [dnl port 5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? ]) -AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl -tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=2,rule_cookie=0,controller_id=0,max_len=65535)) +AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)' | sed -e 's/recirc_id=[[0-9]]*/recirc_id=/g'], [0], [dnl +tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=,rule_cookie=0,controller_id=0,max_len=65535)) +]) + +dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc1244cafe08004500004e00010000401173e90101025c01010258c85312b5003a8cd40c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172']) + +ovs-appctl time/warp 1000 +ovs-appctl time/warp 1000 + +dnl Check VXLAN tunnel push +AT_CHECK([ovs-ofctl add-flow int-br action=2]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:ca:fe,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1) +]) + +AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl +1.1.2.92 f8:bc:12:44:ca:fe br0 +1.1.2.93 f8:bc:12:44:34:b7 br0 +]) + +dnl Restore and check the cache entries +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc124434b608004500004e00010000401173e90101025c01010258c85312b5003a8cd40c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172']) + +ovs-appctl time/warp 1000 +ovs-appctl time/warp 1000 + +dnl Check VXLAN tunnel push +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1) +]) + +AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl +1.1.2.92 f8:bc:12:44:34:b6 br0 +1.1.2.93 f8:bc:12:44:34:b7 br0 ]) ovs-appctl time/warp 10000 @@ -595,6 +652,64 @@ OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep 50540000000a5054000000091235 | wc OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([tunnel_push_pop - packet_out debug_slow]) + +OVS_VSWITCHD_START( + [add-port br0 p0 dnl + -- set Interface p0 type=dummy ofport_request=1 dnl + other-config:hwaddr=aa:55:aa:55:00:00]) +AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg]) +AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy]) +AT_CHECK([ovs-vsctl add-port int-br t2 dnl + -- set Interface t2 type=geneve options:remote_ip=1.1.2.92 dnl + options:key=123 ofport_request=2]) + +dnl First setup dummy interface IP address, then add the route +dnl so that tnl-port table can get valid IP address for the device. +AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK +]) +AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK +]) +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) + +dnl This ARP reply from p0 has two effects: +dnl 1. The ARP cache will learn that 1.1.2.92 is at f8:bc:12:44:34:b6. +dnl 2. The br0 mac learning will learn that f8:bc:12:44:34:b6 is on p0. +AT_CHECK([ + ovs-appctl netdev-dummy/receive p0 dnl + 'recirc_id(0),in_port(2),dnl + eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl + arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)' +]) + +AT_CHECK([ovs-vsctl -- set Interface p0 options:tx_pcap=p0.pcap]) + +packet=50540000000a505400000009123 +dnl Source port is based on a packet hash, so it may differ depending on the +dnl compiler flags and CPU type. Masked with '....'. +encap=f8bc124434b6aa55aa5500000800450000320000400040113406010102580101025c....17c1001e00000000655800007b00 + +dnl Output to tunnel from a int-br internal port. +dnl Checking that the packet arrived and it was correctly encapsulated. +AT_CHECK([ovs-ofctl add-flow int-br "in_port=LOCAL,actions=debug_slow,output:2"]) +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 1]) +dnl Sending again to exercise the non-miss upcall path. +AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 2]) + +dnl Output to tunnel from the controller. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out int-br CONTROLLER "debug_slow,output:2" "${packet}5"]) +OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}5" | wc -l` -ge 1]) + +dnl Datapath actions should not have tunnel push action. +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q tnl_push], [1]) +dnl There should be slow_path action instead. +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q 'slow_path(action)'], [0]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([tunnel_push_pop - underlay bridge match]) OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00]) @@ -645,3 +760,54 @@ NXST_FLOW reply: OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([tunnel_push_pop - VXLAN access port]) + +dnl Create bridge that has a MAC address. +OVS_VSWITCHD_START([set bridge br0 datapath_type=dummy dnl + -- set Interface br0 other-config:hwaddr=aa:55:aa:55:00:00]) +AT_CHECK([ovs-vsctl add-port br0 p8 dnl + -- set Interface p8 type=dummy ofport_request=8]) + +dnl Create another bridge. +AT_CHECK([ovs-vsctl add-br ovs-tun0 -- set bridge ovs-tun0 datapath_type=dummy]) + +dnl Add VXLAN port to this bridge. +AT_CHECK([ovs-vsctl add-port ovs-tun0 tun0 dnl + -- set int tun0 type=vxlan options:remote_ip=10.0.0.11 dnl + -- add-port ovs-tun0 p7 dnl + -- set interface p7 type=dummy ofport_request=7]) + +dnl Set VLAN tags, so that br0 and its port p8 have the same tag, +dnl but ovs-tun0's port p7 has a different tag. +AT_CHECK([ovs-vsctl set port p8 tag=42 dnl + -- set port br0 tag=42 dnl + -- set port p7 tag=200]) + +dnl Set IP address and route for br0. +AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 10.0.0.2/24], [0], [OK +]) +AT_CHECK([ovs-appctl ovs/route/add 10.0.0.11/24 br0], [0], [OK +]) + +dnl Send an ARP reply to port b8 on br0, so that packets will be forwarded +dnl to learned port. +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),dnl + eth(src=aa:55:aa:66:00:00,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl + arp(sip=10.0.0.11,tip=10.0.0.2,op=2,sha=aa:55:aa:66:00:00,tha=00:00:00:00:00:00)']) + +AT_CHECK([ovs-appctl ofproto/trace ovs-tun0 in_port=p7], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], [dnl +Megaflow: recirc_id=0,eth,in_port=7,dl_src=00:00:00:00:00:00,dnl +dl_dst=00:00:00:00:00:00,dl_type=0x0000 +Datapath actions: push_vlan(vid=200,pcp=0),1,clone(tnl_push(tnl_port(4789),dnl +header(size=50,type=4,eth(dst=aa:55:aa:66:00:00,src=aa:55:aa:55:00:00,dnl +dl_type=0x0800),ipv4(src=10.0.0.2,dst=10.0.0.11,proto=17,tos=0,ttl=64,dnl +frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x0)),dnl +out_port(100)),8) +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/tunnel.at b/tests/tunnel.at index b8ae7caa9b..fd482aa872 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -126,7 +126,7 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"]) AT_CHECK([ovs-appctl dpctl/dump-flows | tail -1], [0], [dnl -tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 +tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 ]) OVS_VSWITCHD_STOP diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in index 71800795c0..e6e07f4763 100644 --- a/utilities/ovs-ctl.in +++ b/utilities/ovs-ctl.in @@ -421,7 +421,9 @@ Less important options for "start", "restart" and "force-reload-kmod": --no-force-corefiles do not force on core dumps for OVS daemons --no-mlockall do not lock all of ovs-vswitchd into memory --ovsdb-server-priority=NICE set ovsdb-server's niceness (default: $OVSDB_SERVER_PRIORITY) + --ovsdb-server-options=OPTIONS additional options for ovsdb-server (example: '-vconsole:dbg -vfile:dbg') --ovs-vswitchd-priority=NICE set ovs-vswitchd's niceness (default: $OVS_VSWITCHD_PRIORITY) + --ovs-vswitchd-options=OPTIONS additional options for ovs-vswitchd (example: '-vconsole:dbg -vfile:dbg') --no-full-hostname set short hostname instead of full hostname --no-record-hostname do not attempt to determine/record system hostname as part of start command diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in index 3eda01d3c1..13477a6a9e 100644 --- a/utilities/ovs-lib.in +++ b/utilities/ovs-lib.in @@ -519,13 +519,13 @@ join_cluster() { LOCAL_ADDR="$3" REMOTE_ADDR="$4" - if test ! -e "$DB_FILE"; then - ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" "$REMOTE_ADDR" - elif ovsdb_tool db-is-standalone "$DB_FILE"; then - # Backup standalone database and join cluster. + if test -e "$DB_FILE" && ovsdb_tool db-is-standalone "$DB_FILE"; then backup_db || return 1 + rm $DB_FILE + fi + if test ! -e "$DB_FILE"; then action "Joining $DB_FILE to cluster" \ - ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" + ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" "$REMOTE_ADDR" fi } diff --git a/utilities/ovs-save b/utilities/ovs-save index 27ce3a9aad..a190902f4d 100755 --- a/utilities/ovs-save +++ b/utilities/ovs-save @@ -102,7 +102,7 @@ save_interfaces () { get_highest_ofp_version() { ovs-vsctl get bridge "$1" protocols | \ sed 's/[][]//g' | sed 's/\ //g' | \ - awk -F ',' '{ print (NF>1)? $(NF) : "OpenFlow14" }' + awk -F ',' '{ print (NF>0)? $(NF) : "OpenFlow14" }' } save_flows () { @@ -150,7 +150,10 @@ save_flows () { ovs-ofctl -O $ofp_version dump-flows --no-names --no-stats "$bridge" | \ sed -e '/NXST_FLOW/d' \ -e '/OFPST_FLOW/d' \ - -e 's/\(idle\|hard\)_age=[^,]*,//g' > \ + -e 's/\(idle\|hard\)_age=[^,]*,//g' \ + -e 's/igmp_type/tp_src/g' \ + -e 's/igmp_code/tp_dst/g' \ + -e 's/igmp/ip,nw_proto=2/g' > \ "$workdir/$bridge.flows.dump" done echo "rm -rf \"$workdir\"" diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index cb7c5cb769..c790a56adf 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -4229,7 +4229,7 @@ bridge_configure_aa(struct bridge *br) union ovsdb_atom atom; atom.integer = m->isid; - if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER) == UINT_MAX) { + if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER, NULL)) { VLOG_INFO("Deleting isid=%"PRIu32", vlan=%"PRIu16, m->isid, m->vlan); bridge_aa_mapping_destroy(m); @@ -4826,7 +4826,7 @@ queue_ids_include(const struct ovsdb_datum *queues, int64_t target) union ovsdb_atom atom; atom.integer = target; - return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER) != UINT_MAX; + return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER, NULL); } static void @@ -5020,7 +5020,7 @@ bridge_configure_mirrors(struct bridge *br) union ovsdb_atom atom; atom.uuid = m->uuid; - if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID) == UINT_MAX) { + if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID, NULL)) { mirror_destroy(m); } }