diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh index 6cd38ff3ef..c4ec93a398 100755 --- a/.ci/linux-build.sh +++ b/.ci/linux-build.sh @@ -220,7 +220,7 @@ fi if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then if [ -z "$DPDK_VER" ]; then - DPDK_VER="21.11" + DPDK_VER="21.11.1" fi install_dpdk $DPDK_VER fi diff --git a/.cirrus.yml b/.cirrus.yml index a7ae793bc4..a4d2a5bbcd 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -2,8 +2,8 @@ freebsd_build_task: freebsd_instance: matrix: - image_family: freebsd-12-2-snap - image_family: freebsd-11-4-snap + image_family: freebsd-12-3-snap + image_family: freebsd-13-0-snap cpu: 4 memory: 4G diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index af524251ff..319ee38c7d 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -208,9 +208,9 @@ Q: What DPDK version does each Open vSwitch release work with? 2.12.x 18.11.9 2.13.x 19.11.10 2.14.x 19.11.10 - 2.15.x 20.11.1 - 2.16.x 20.11.1 - 2.17.x 21.11.0 + 2.15.x 20.11.4 + 2.16.x 20.11.4 + 2.17.x 21.11.1 ============ ======== Q: Are all the DPDK releases that OVS versions work with maintained? diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst index d9f44055db..f8f01bfadd 100644 --- a/Documentation/intro/install/dpdk.rst +++ b/Documentation/intro/install/dpdk.rst @@ -42,7 +42,7 @@ Build requirements In addition to the requirements described in :doc:`general`, building Open vSwitch with DPDK will require the following: -- DPDK 21.11 +- DPDK 21.11.1 - A `DPDK supported NIC`_ @@ -73,9 +73,9 @@ Install DPDK #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``:: $ cd /usr/src/ - $ wget https://fast.dpdk.org/rel/dpdk-21.11.tar.xz - $ tar xf dpdk-21.11.tar.xz - $ export DPDK_DIR=/usr/src/dpdk-21.11 + $ wget https://fast.dpdk.org/rel/dpdk-21.11.1.tar.xz + $ tar xf dpdk-21.11.1.tar.xz + $ export DPDK_DIR=/usr/src/dpdk-stable-21.11 $ cd $DPDK_DIR #. Configure and install DPDK using Meson diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst index c4300cd53e..a297aadac8 100644 --- a/Documentation/intro/install/general.rst +++ b/Documentation/intro/install/general.rst @@ -169,7 +169,7 @@ other than plain text, only if you have the following: If you are going to extensively modify Open vSwitch, consider installing the following to obtain better warnings: -- "sparse" version 0.5.1 or later +- "sparse" version 0.6.2 or later (https://git.kernel.org/pub/scm/devel/sparse/sparse.git/). - GNU make. diff --git a/NEWS b/NEWS index c10e9bfacc..7c71284f97 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,33 @@ +v2.17.3 - xx xxx xxxx +--------------------- + - OVSDB: + * New Local_Config schema added to support Connections (--remote) + configuration in a clustered databse independently for each server. + E.g. for listening on unique addresses. See the ovsdb.local-config.5 + manpage for schema details. + +v2.17.2 - 15 Jun 2022 +--------------------- + - Bug fixes + - DPDK: + * OVS validated with DPDK 21.11.1. It is recommended to use this version + until further releases. + +v2.17.1 - 08 Apr 2022 +--------------------- + - Bug fixes + - libopenvswitch API change: + * To fix the Undefined Behavior issue causing the compiler to incorrectly + optimize important parts of code, container iteration macros (e.g., + LIST_FOR_EACH) have been re-implemented in a UB-safe way. + * Backwards compatibility has mostly been preserved, however the + user-provided pointer is now set to NULL after the loop (unless it + exited via "break;") + * Users of libopenvswitch will need to double-check the use of such loop + macros before compiling with a new version. + * Since the change is limited to the definitions within the headers, the + ABI is not affected. + v2.17.0 - 17 Feb 2022 --------------------- - Userspace datapath: diff --git a/acinclude.m4 b/acinclude.m4 index 0c360fd1ef..61e88105f5 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -305,6 +305,13 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [ ])], [AC_DEFINE([HAVE_TCA_SKBEDIT_FLAGS], [1], [Define to 1 if TCA_SKBEDIT_FLAGS is available.])]) + + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([#include ], [ + int x = TCA_STATS_PKT64; + ])], + [AC_DEFINE([HAVE_TCA_STATS_PKT64], [1], + [Define to 1 if TCA_STATS_PKT64 is available.])]) ]) dnl OVS_CHECK_LINUX_SCTP_CT @@ -1424,7 +1431,7 @@ AC_DEFUN([OVS_ENABLE_SPARSE], : ${SPARSE=sparse} AC_SUBST([SPARSE]) AC_CONFIG_COMMANDS_PRE( - [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')']) + [CC='$(if $(C:0=),env REAL_CC="'"$CC"'" CHECK="$(SPARSE) $(SPARSE_WERROR) -I $(top_srcdir)/include/sparse -I $(top_srcdir)/include $(SPARSEFLAGS) $(SPARSE_EXTRA_INCLUDES) " cgcc $(CGCCFLAGS),'"$CC"')']) AC_ARG_ENABLE( [sparse], diff --git a/configure.ac b/configure.ac index 4e9bcce272..5cc3f4801e 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 2.17.0, bugs@openvswitch.org) +AC_INIT(openvswitch, 2.17.3, bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 70ac0a0e56..218e7db814 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -1712,6 +1712,15 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, ipHdr->ttl = ipAttr->ipv4_ttl; key->ipKey.nwTtl = ipAttr->ipv4_ttl; } + if (ipHdr->dscp != (ipAttr->ipv4_tos & 0xfc)) { + /* ECN + DSCP */ + UINT8 newTos = (ipHdr->tos & 0x3) | (ipAttr->ipv4_tos & 0xfc); + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate16(ipHdr->check, ipHdr->tos, newTos); + } + ipHdr->tos = newTos; + key->ipKey.nwTos = newTos; + } return NDIS_STATUS_SUCCESS; } diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c index cc0840704a..2a206305ec 100644 --- a/datapath-windows/ovsext/PacketIO.c +++ b/datapath-windows/ovsext/PacketIO.c @@ -45,7 +45,9 @@ extern NDIS_STRING ovsExtFriendlyNameUC; static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList); static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags); + PNET_BUFFER_LIST netBufferLists, + ULONG sendCompleteFlags, + BOOLEAN isSendComplete); VOID OvsInitCompletionList(OvsCompletionList *completionList, @@ -155,7 +157,7 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, OvsReportNBLIngressError(switchContext, netBufferLists, &filterReason, NDIS_STATUS_PAUSED); OvsCompleteNBLIngress(switchContext, netBufferLists, - sendCompleteFlags); + sendCompleteFlags, FALSE); return; } @@ -175,6 +177,79 @@ OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, NDIS_DEFAULT_PORT_NUMBER, sendFlags); } +static __inline BOOLEAN +OvsCheckNBLSingleSource(PNET_BUFFER_LIST netBufferLists) +{ + UINT32 sourcePortId = 0; + BOOLEAN singleSource = TRUE; + PNET_BUFFER_LIST curNbl = netBufferLists; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; + + while (curNbl != NULL) { + info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + if (info == NULL) { + /* We are not able to determine the source port ID */ + singleSource = FALSE; + OVS_LOG_INFO("nbl %p has no source port", curNbl); + break; + } + if (curNbl == netBufferLists) { + sourcePortId = info->SourcePortId; + } else if (info->SourcePortId != sourcePortId) { + singleSource = FALSE; + OVS_LOG_INFO("Source port in nbl %p is %u, not from %u", + curNbl, info->SourcePortId, sourcePortId); + break; + } + curNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl); + } + + return singleSource; +} + +/* + * SendNetBufferListsCompleteHandler releases the NetBufferLists with flag + * NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE if all the NBLs have same + * source port, for cloned NBLs, source port might be changed, although the + * cloned NBLs have same source port, there parent NBLs may have different + * source ports, so we should have a check before passing the flag to + * NdisFSendNetBufferListsComplete. + */ +static __inline VOID +OvsCompleteUpperLayerNBL(NDIS_HANDLE ndisHandle, + PNET_BUFFER_LIST netBufferLists, + ULONG sendCompleteFlags, + BOOLEAN isSendComplete) +{ + BOOLEAN singleSource = TRUE; + PNET_BUFFER_LIST curNbl, nextNbl; + + /* To check whether the NBLs are from the same source port */ + if (isSendComplete && + (sendCompleteFlags & NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE)) { + singleSource = OvsCheckNBLSingleSource(netBufferLists); + } + + if (singleSource) { + NdisFSendNetBufferListsComplete(ndisHandle, + netBufferLists, + sendCompleteFlags); + } else { + /* + * Not from a single source port, releasing the NBls without flag + * NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE doesn't help, so + * let's release them one by one. + */ + for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { + nextNbl = NET_BUFFER_LIST_NEXT_NBL(curNbl); + NET_BUFFER_LIST_NEXT_NBL(curNbl) = NULL; + NdisFSendNetBufferListsComplete(ndisHandle, + curNbl, + sendCompleteFlags); + } + } +} + static __inline VOID OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST nblList, @@ -184,8 +259,8 @@ OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, { ASSERT(error); OvsReportNBLIngressError(switchContext, nblList, filterReason, error); - NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, nblList, - sendCompleteFlags); + OvsCompleteUpperLayerNBL(switchContext->NdisFilterHandle, nblList, + sendCompleteFlags, FALSE); } static VOID @@ -427,7 +502,8 @@ OvsExtSendNBL(NDIS_HANDLE filterModuleContext, static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST netBufferLists, - ULONG sendCompleteFlags) + ULONG sendCompleteFlags, + BOOLEAN isSendComplete) { PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; OvsCompletionList newList; @@ -449,8 +525,10 @@ OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, /* Complete the NBL's that were sent by the upper layer. */ if (newList.dropNbl != NULL) { - NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl, - sendCompleteFlags); + OvsCompleteUpperLayerNBL(switchContext->NdisFilterHandle, + newList.dropNbl, + sendCompleteFlags, + isSendComplete); } } @@ -466,7 +544,7 @@ OvsExtSendNBLComplete(NDIS_HANDLE filterModuleContext, ULONG sendCompleteFlags) { OvsCompleteNBLIngress((POVS_SWITCH_CONTEXT)filterModuleContext, - netBufferLists, sendCompleteFlags); + netBufferLists, sendCompleteFlags, TRUE); } @@ -476,7 +554,8 @@ OvsFinalizeCompletionList(OvsCompletionList *completionList) if (completionList->dropNbl != NULL) { OvsCompleteNBLIngress(completionList->switchContext, completionList->dropNbl, - completionList->sendCompleteFlags); + completionList->sendCompleteFlags, + FALSE); completionList->dropNbl = NULL; completionList->dropNblNext = &completionList->dropNbl; diff --git a/debian/changelog b/debian/changelog index 3e0d3a66e3..5ddd655d6c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,21 @@ +openvswitch (2.17.3-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Wed, 15 Jun 2022 12:04:07 +0200 + +openvswitch (2.17.2-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Wed, 15 Jun 2022 12:04:07 +0200 + +openvswitch (2.17.1-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Fri, 08 Apr 2022 14:57:49 +0200 + openvswitch (2.17.0-1) unstable; urgency=low * New upstream version diff --git a/debian/openvswitch-switch.install b/debian/openvswitch-switch.install index 6a6e9a5435..5ac3df77b1 100644 --- a/debian/openvswitch-switch.install +++ b/debian/openvswitch-switch.install @@ -14,4 +14,5 @@ usr/share/openvswitch/scripts/ovs-check-dead-ifs usr/share/openvswitch/scripts/ovs-ctl usr/share/openvswitch/scripts/ovs-kmod-ctl usr/share/openvswitch/scripts/ovs-save +usr/share/openvswitch/local-config.ovsschema usr/share/openvswitch/vswitch.ovsschema diff --git a/debian/openvswitch-switch.manpages b/debian/openvswitch-switch.manpages index 7fd7bc55da..088734b0dc 100644 --- a/debian/openvswitch-switch.manpages +++ b/debian/openvswitch-switch.manpages @@ -1,5 +1,6 @@ ovsdb/ovsdb-server.1 ovsdb/ovsdb-server.5 +ovsdb/ovsdb.local-config.5 debian/tmp/usr/share/man/man8/ovs-ctl.8 utilities/ovs-dpctl-top.8 utilities/ovs-dpctl.8 diff --git a/dpdk/lib/vhost/vhost_user.c b/dpdk/lib/vhost/vhost_user.c index a781346c4d..550b0ee8b5 100644 --- a/dpdk/lib/vhost/vhost_user.c +++ b/dpdk/lib/vhost/vhost_user.c @@ -1603,6 +1603,9 @@ vhost_user_get_inflight_fd(struct virtio_net **pdev, int numa_node = SOCKET_ID_ANY; void *addr; + if (validate_msg_fds(msg, 0) != 0) + return RTE_VHOST_MSG_RESULT_ERR; + if (msg->size != sizeof(msg->payload.inflight)) { VHOST_LOG_CONFIG(ERR, "invalid get_inflight_fd message size is %d\n", @@ -1704,6 +1707,9 @@ vhost_user_set_inflight_fd(struct virtio_net **pdev, VhostUserMsg *msg, int fd, i; int numa_node = SOCKET_ID_ANY; + if (validate_msg_fds(msg, 1) != 0) + return RTE_VHOST_MSG_RESULT_ERR; + fd = msg->fds[0]; if (msg->size != sizeof(msg->payload.inflight) || fd < 0) { VHOST_LOG_CONFIG(ERR, @@ -2873,6 +2879,9 @@ vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, case VHOST_USER_SET_VRING_ADDR: vring_idx = msg->payload.addr.index; break; + case VHOST_USER_SET_INFLIGHT_FD: + vring_idx = msg->payload.inflight.num_queues - 1; + break; default: return 0; } diff --git a/include/linux/automake.mk b/include/linux/automake.mk index 8f063f482e..f857c7e088 100644 --- a/include/linux/automake.mk +++ b/include/linux/automake.mk @@ -2,6 +2,7 @@ noinst_HEADERS += \ include/linux/netlink.h \ include/linux/netfilter/nf_conntrack_sctp.h \ include/linux/pkt_cls.h \ + include/linux/gen_stats.h \ include/linux/tc_act/tc_mpls.h \ include/linux/tc_act/tc_pedit.h \ include/linux/tc_act/tc_skbedit.h \ diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h new file mode 100644 index 0000000000..6fae6f727c --- /dev/null +++ b/include/linux/gen_stats.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_GEN_STATS_WRAPPER_H +#define __LINUX_GEN_STATS_WRAPPER_H 1 + +#if defined(__KERNEL__) || defined(HAVE_TCA_STATS_PKT64) +#include_next +#else +#include + +enum { + TCA_STATS_UNSPEC, + TCA_STATS_BASIC, + TCA_STATS_RATE_EST, + TCA_STATS_QUEUE, + TCA_STATS_APP, + TCA_STATS_RATE_EST64, + TCA_STATS_PAD, + TCA_STATS_BASIC_HW, + TCA_STATS_PKT64, + __TCA_STATS_MAX, +}; +#define TCA_STATS_MAX (__TCA_STATS_MAX - 1) + +/** + * struct gnet_stats_basic - byte/packet throughput statistics + * @bytes: number of seen bytes + * @packets: number of seen packets + */ +struct gnet_stats_basic { + __u64 bytes; + __u32 packets; +}; + +/** + * struct gnet_stats_rate_est - rate estimator + * @bps: current byte rate + * @pps: current packet rate + */ +struct gnet_stats_rate_est { + __u32 bps; + __u32 pps; +}; + +/** + * struct gnet_stats_rate_est64 - rate estimator + * @bps: current byte rate + * @pps: current packet rate + */ +struct gnet_stats_rate_est64 { + __u64 bps; + __u64 pps; +}; + +/** + * struct gnet_stats_queue - queuing statistics + * @qlen: queue length + * @backlog: backlog size of queue + * @drops: number of dropped packets + * @requeues: number of requeues + * @overlimits: number of enqueues over the limit + */ +struct gnet_stats_queue { + __u32 qlen; + __u32 backlog; + __u32 drops; + __u32 requeues; + __u32 overlimits; +}; + +/** + * struct gnet_estimator - rate estimator configuration + * @interval: sampling period + * @ewma_log: the log of measurement window weight + */ +struct gnet_estimator { + signed char interval; + unsigned char ewma_log; +}; + +#endif /* __KERNEL__ || !HAVE_TCA_STATS_PKT64 */ +#endif /* __LINUX_GEN_STATS_WRAPPER_H */ diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h index 3054015d93..df10cf579e 100644 --- a/include/openvswitch/flow.h +++ b/include/openvswitch/flow.h @@ -141,15 +141,14 @@ struct flow { uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */ uint8_t nw_ttl; /* IP TTL/Hop Limit. */ uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */ + /* L4 (64-bit aligned) */ struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ struct eth_addr arp_sha; /* ARP/ND source hardware address. */ struct eth_addr arp_tha; /* ARP/ND target hardware address. */ - ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. - * With L3 to avoid matching L4. */ + ovs_be16 tcp_flags; /* TCP flags/ICMPv6 ND options type. */ ovs_be16 pad2; /* Pad to 64 bits. */ struct ovs_key_nsh nsh; /* Network Service Header keys */ - /* L4 (64-bit aligned) */ ovs_be16 tp_src; /* TCP/UDP/SCTP source port/ICMP type. */ ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */ ovs_be16 ct_tp_src; /* CT original tuple source port/ICMP type. */ @@ -179,7 +178,7 @@ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t) enum { FLOW_SEGMENT_1_ENDS_AT = offsetof(struct flow, dl_dst), FLOW_SEGMENT_2_ENDS_AT = offsetof(struct flow, nw_src), - FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, tp_src), + FLOW_SEGMENT_3_ENDS_AT = offsetof(struct flow, nd_target), }; BUILD_ASSERT_DECL(FLOW_SEGMENT_1_ENDS_AT % sizeof(uint64_t) == 0); BUILD_ASSERT_DECL(FLOW_SEGMENT_2_ENDS_AT % sizeof(uint64_t) == 0); diff --git a/include/openvswitch/hmap.h b/include/openvswitch/hmap.h index 4e001cc692..beb48295b9 100644 --- a/include/openvswitch/hmap.h +++ b/include/openvswitch/hmap.h @@ -134,17 +134,17 @@ struct hmap_node *hmap_random_node(const struct hmap *); * without using 'break', NODE will be NULL. This is true for all of the * HMAP_FOR_EACH_*() macros. */ -#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ - for (INIT_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false); \ - ASSIGN_CONTAINER(NODE, hmap_next_with_hash(&(NODE)->MEMBER), \ - MEMBER)) -#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ - for (INIT_CONTAINER(NODE, hmap_first_in_bucket(HMAP, HASH), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false); \ - ASSIGN_CONTAINER(NODE, hmap_next_in_bucket(&(NODE)->MEMBER), MEMBER)) +#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ + for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_with_hash(HMAP, HASH), \ + struct hmap_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hmap_next_with_hash(ITER_VAR(NODE)))) + +#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ + for (INIT_MULTIVAR(NODE, MEMBER, hmap_first_in_bucket(HMAP, HASH), \ + struct hmap_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hmap_next_in_bucket(ITER_VAR(NODE)))) static inline struct hmap_node *hmap_first_with_hash(const struct hmap *, size_t hash); @@ -170,54 +170,80 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *); /* Iterates through every node in HMAP. */ #define HMAP_FOR_EACH(NODE, MEMBER, HMAP) \ HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, (void) 0) -#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ - for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false); \ - ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) +#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ + for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_first(HMAP), struct hmap_node, \ + __VA_ARGS__); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE)))) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash map but its members remain accessible and intact). */ -#define HMAP_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HMAP) \ - HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, (void) 0) -#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ - for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ - ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false) \ - ? INIT_CONTAINER(NEXT, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), 1 \ - : 0); \ - (NODE) = (NEXT)) +#define HMAP_FOR_EACH_SAFE_LONG(NODE, NEXT, MEMBER, HMAP) \ + HMAP_FOR_EACH_SAFE_LONG_INIT (NODE, NEXT, MEMBER, HMAP, (void) NEXT) + +#define HMAP_FOR_EACH_SAFE_LONG_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ + for (INIT_MULTIVAR_SAFE_LONG_EXP(NODE, NEXT, MEMBER, hmap_first(HMAP), \ + struct hmap_node, __VA_ARGS__); \ + CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_VAR(NEXT) = hmap_next(HMAP, ITER_VAR(NODE)), \ + ITER_VAR(NEXT) != NULL); \ + UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) + +/* Short versions of HMAP_FOR_EACH_SAFE. */ +#define HMAP_FOR_EACH_SAFE_SHORT(NODE, MEMBER, HMAP) \ + HMAP_FOR_EACH_SAFE_SHORT_INIT (NODE, MEMBER, HMAP, (void) 0) + +#define HMAP_FOR_EACH_SAFE_SHORT_INIT(NODE, MEMBER, HMAP, ...) \ + for (INIT_MULTIVAR_SAFE_SHORT_EXP(NODE, MEMBER, hmap_first(HMAP), \ + struct hmap_node, __VA_ARGS__); \ + CONDITION_MULTIVAR_SAFE_SHORT(NODE, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_NEXT_VAR(NODE) = hmap_next(HMAP, ITER_VAR(NODE))); \ + UPDATE_MULTIVAR_SAFE_SHORT(NODE)) + +#define HMAP_FOR_EACH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(HMAP_FOR_EACH_SAFE_LONG, \ + HMAP_FOR_EACH_SAFE_SHORT, \ + 4, __VA_ARGS__) + /* Continues an iteration from just after NODE. */ #define HMAP_FOR_EACH_CONTINUE(NODE, MEMBER, HMAP) \ HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, (void) 0) -#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ - for (ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), \ - __VA_ARGS__; \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false); \ - ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) +#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ + for (INIT_MULTIVAR_EXP(NODE, MEMBER, hmap_next(HMAP, &(NODE)->MEMBER), \ + struct hmap_node, __VA_ARGS__); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hmap_next(HMAP, ITER_VAR(NODE)))) + +struct hmap_pop_helper_iter__ { + size_t bucket; + struct hmap_node *node; +}; -static inline struct hmap_node * -hmap_pop_helper__(struct hmap *hmap, size_t *bucket) { +static inline void +hmap_pop_helper__(struct hmap *hmap, struct hmap_pop_helper_iter__ *iter) { - for (; *bucket <= hmap->mask; (*bucket)++) { - struct hmap_node *node = hmap->buckets[*bucket]; + for (; iter->bucket <= hmap->mask; (iter->bucket)++) { + struct hmap_node *node = hmap->buckets[iter->bucket]; if (node) { hmap_remove(hmap, node); - return node; + iter->node = node; + return; } } - - return NULL; + iter->node = NULL; } -#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ - for (size_t bucket__ = 0; \ - INIT_CONTAINER(NODE, hmap_pop_helper__(HMAP, &bucket__), MEMBER), \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ - || ((NODE = NULL), false);) +#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ + for (struct hmap_pop_helper_iter__ ITER_VAR(NODE) = { 0, NULL }; \ + hmap_pop_helper__(HMAP, &ITER_VAR(NODE)), \ + (ITER_VAR(NODE).node != NULL) ? \ + (((NODE) = OBJECT_CONTAINING(ITER_VAR(NODE).node, \ + NODE, MEMBER)),1): \ + (((NODE) = NULL), 0);) static inline struct hmap_node *hmap_first(const struct hmap *); static inline struct hmap_node *hmap_next(const struct hmap *, diff --git a/include/openvswitch/list.h b/include/openvswitch/list.h index 8ad5eeb327..6272d340cf 100644 --- a/include/openvswitch/list.h +++ b/include/openvswitch/list.h @@ -72,37 +72,74 @@ static inline bool ovs_list_is_empty(const struct ovs_list *); static inline bool ovs_list_is_singleton(const struct ovs_list *); static inline bool ovs_list_is_short(const struct ovs_list *); -#define LIST_FOR_EACH(ITER, MEMBER, LIST) \ - for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ - &(ITER)->MEMBER != (LIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) -#define LIST_FOR_EACH_CONTINUE(ITER, MEMBER, LIST) \ - for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER); \ - &(ITER)->MEMBER != (LIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.next, MEMBER)) -#define LIST_FOR_EACH_REVERSE(ITER, MEMBER, LIST) \ - for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \ - &(ITER)->MEMBER != (LIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) -#define LIST_FOR_EACH_REVERSE_SAFE(ITER, PREV, MEMBER, LIST) \ - for (INIT_CONTAINER(ITER, (LIST)->prev, MEMBER); \ - (&(ITER)->MEMBER != (LIST) \ - ? INIT_CONTAINER(PREV, (ITER)->MEMBER.prev, MEMBER), 1 \ - : 0); \ - (ITER) = (PREV)) -#define LIST_FOR_EACH_REVERSE_CONTINUE(ITER, MEMBER, LIST) \ - for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ - &(ITER)->MEMBER != (LIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) -#define LIST_FOR_EACH_SAFE(ITER, NEXT, MEMBER, LIST) \ - for (INIT_CONTAINER(ITER, (LIST)->next, MEMBER); \ - (&(ITER)->MEMBER != (LIST) \ - ? INIT_CONTAINER(NEXT, (ITER)->MEMBER.next, MEMBER), 1 \ - : 0); \ - (ITER) = (NEXT)) -#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \ - while (!ovs_list_is_empty(LIST) \ - && (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1)) +#define LIST_FOR_EACH(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->next, struct ovs_list); \ + CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ + UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next)) + +#define LIST_FOR_EACH_CONTINUE(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.next, struct ovs_list); \ + CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ + UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->next)) + +#define LIST_FOR_EACH_REVERSE(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR(VAR, MEMBER, (LIST)->prev, struct ovs_list); \ + CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ + UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev)) + +#define LIST_FOR_EACH_REVERSE_CONTINUE(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR(VAR, MEMBER, VAR->MEMBER.prev, struct ovs_list); \ + CONDITION_MULTIVAR(VAR, MEMBER, ITER_VAR(VAR) != (LIST)); \ + UPDATE_MULTIVAR(VAR, ITER_VAR(VAR)->prev)) + +/* LONG version of SAFE iterators. */ +#define LIST_FOR_EACH_REVERSE_SAFE_LONG(VAR, PREV, MEMBER, LIST) \ + for (INIT_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, (LIST)->prev, \ + struct ovs_list); \ + CONDITION_MULTIVAR_SAFE_LONG(VAR, PREV, MEMBER, \ + ITER_VAR(VAR) != (LIST), \ + ITER_VAR(PREV) = ITER_VAR(VAR)->prev, \ + ITER_VAR(PREV) != (LIST)); \ + UPDATE_MULTIVAR_SAFE_LONG(VAR, PREV)) + +#define LIST_FOR_EACH_SAFE_LONG(VAR, NEXT, MEMBER, LIST) \ + for (INIT_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, (LIST)->next, \ + struct ovs_list); \ + CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER, \ + ITER_VAR(VAR) != (LIST), \ + ITER_VAR(NEXT) = ITER_VAR(VAR)->next, \ + ITER_VAR(NEXT) != (LIST)); \ + UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT)) + +/* SHORT version of SAFE iterators. */ +#define LIST_FOR_EACH_REVERSE_SAFE_SHORT(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, (LIST)->prev, struct ovs_list);\ + CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, \ + ITER_VAR(VAR) != (LIST), \ + ITER_NEXT_VAR(VAR) = ITER_VAR(VAR)->prev); \ + UPDATE_MULTIVAR_SAFE_SHORT(VAR)) + +#define LIST_FOR_EACH_SAFE_SHORT(VAR, MEMBER, LIST) \ + for (INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, (LIST)->next, struct ovs_list);\ + CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, \ + ITER_VAR(VAR) != (LIST), \ + ITER_NEXT_VAR(VAR) = ITER_VAR(VAR)->next); \ + UPDATE_MULTIVAR_SAFE_SHORT(VAR)) + +#define LIST_FOR_EACH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(LIST_FOR_EACH_SAFE_LONG, \ + LIST_FOR_EACH_SAFE_SHORT, \ + 4, __VA_ARGS__) + +#define LIST_FOR_EACH_REVERSE_SAFE(...) \ + OVERLOAD_SAFE_MACRO(LIST_FOR_EACH_REVERSE_SAFE_LONG, \ + LIST_FOR_EACH_REVERSE_SAFE_SHORT, \ + 4, __VA_ARGS__) + +#define LIST_FOR_EACH_POP(ITER, MEMBER, LIST) \ + while (!ovs_list_is_empty(LIST) ? \ + (INIT_CONTAINER(ITER, ovs_list_pop_front(LIST), MEMBER), 1) : \ + (ITER = NULL, 0)) /* Inline implementations. */ diff --git a/include/openvswitch/ofp-actions.h b/include/openvswitch/ofp-actions.h index 41bcb55d20..b7231c7bb3 100644 --- a/include/openvswitch/ofp-actions.h +++ b/include/openvswitch/ofp-actions.h @@ -218,7 +218,9 @@ struct ofpact *ofpact_next_flattened(const struct ofpact *); static inline struct ofpact * ofpact_end(const struct ofpact *ofpacts, size_t ofpacts_len) { - return ALIGNED_CAST(struct ofpact *, (uint8_t *) ofpacts + ofpacts_len); + return ofpacts + ? ALIGNED_CAST(struct ofpact *, (uint8_t *) ofpacts + ofpacts_len) + : NULL; } static inline bool diff --git a/include/openvswitch/ofpbuf.h b/include/openvswitch/ofpbuf.h index 1136ba04c8..32f03ea837 100644 --- a/include/openvswitch/ofpbuf.h +++ b/include/openvswitch/ofpbuf.h @@ -179,7 +179,11 @@ static inline void ofpbuf_delete(struct ofpbuf *b) static inline void *ofpbuf_at(const struct ofpbuf *b, size_t offset, size_t size) { - return offset + size <= b->size ? (char *) b->data + offset : NULL; + if (offset + size <= b->size) { + ovs_assert(b->data); + return (char *) b->data + offset; + } + return NULL; } /* Returns a pointer to byte 'offset' in 'b', which must contain at least @@ -188,20 +192,23 @@ static inline void *ofpbuf_at_assert(const struct ofpbuf *b, size_t offset, size_t size) { ovs_assert(offset + size <= b->size); - return ((char *) b->data) + offset; + ovs_assert(b->data); + return (char *) b->data + offset; } /* Returns a pointer to byte following the last byte of data in use in 'b'. */ static inline void *ofpbuf_tail(const struct ofpbuf *b) { - return (char *) b->data + b->size; + ovs_assert(b->data || !b->size); + return b->data ? (char *) b->data + b->size : NULL; } /* Returns a pointer to byte following the last byte allocated for use (but * not necessarily in use) in 'b'. */ static inline void *ofpbuf_end(const struct ofpbuf *b) { - return (char *) b->base + b->allocated; + ovs_assert(b->base || !b->allocated); + return b->base ? (char *) b->base + b->allocated : NULL; } /* Returns the number of bytes of headroom in 'b', that is, the number of bytes @@ -249,6 +256,11 @@ static inline void *ofpbuf_pull(struct ofpbuf *b, size_t size) { ovs_assert(b->size >= size); void *data = b->data; + + if (!size) { + return data; + } + b->data = (char*)b->data + size; b->size = b->size - size; return data; @@ -270,7 +282,7 @@ static inline struct ofpbuf *ofpbuf_from_list(const struct ovs_list *list) static inline bool ofpbuf_equal(const struct ofpbuf *a, const struct ofpbuf *b) { return a->size == b->size && - memcmp(a->data, b->data, a->size) == 0; + (a->size == 0 || memcmp(a->data, b->data, a->size) == 0); } static inline bool ofpbuf_oversized(const struct ofpbuf *ofpacts) diff --git a/include/openvswitch/shash.h b/include/openvswitch/shash.h index c249e13e1f..4e7badd4dc 100644 --- a/include/openvswitch/shash.h +++ b/include/openvswitch/shash.h @@ -41,13 +41,24 @@ struct shash { BUILD_ASSERT_TYPE(SHASH_NODE, struct shash_node *), \ BUILD_ASSERT_TYPE(SHASH, struct shash *)) -#define SHASH_FOR_EACH_SAFE(SHASH_NODE, NEXT, SHASH) \ - HMAP_FOR_EACH_SAFE_INIT ( \ +#define SHASH_FOR_EACH_SAFE_SHORT(SHASH_NODE, SHASH) \ + HMAP_FOR_EACH_SAFE_SHORT_INIT ( \ + SHASH_NODE, node, &(SHASH)->map, \ + BUILD_ASSERT_TYPE(SHASH_NODE, struct shash_node *), \ + BUILD_ASSERT_TYPE(SHASH, struct shash *)) + +#define SHASH_FOR_EACH_SAFE_LONG(SHASH_NODE, NEXT, SHASH) \ + HMAP_FOR_EACH_SAFE_LONG_INIT ( \ SHASH_NODE, NEXT, node, &(SHASH)->map, \ BUILD_ASSERT_TYPE(SHASH_NODE, struct shash_node *), \ BUILD_ASSERT_TYPE(NEXT, struct shash_node *), \ BUILD_ASSERT_TYPE(SHASH, struct shash *)) +#define SHASH_FOR_EACH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(SHASH_FOR_EACH_SAFE_LONG, \ + SHASH_FOR_EACH_SAFE_SHORT, \ + 3, __VA_ARGS__) + void shash_init(struct shash *); void shash_destroy(struct shash *); void shash_destroy_free_data(struct shash *); diff --git a/include/openvswitch/util.h b/include/openvswitch/util.h index 228b185c3a..8e6c46a85f 100644 --- a/include/openvswitch/util.h +++ b/include/openvswitch/util.h @@ -145,6 +145,150 @@ OVS_NO_RETURN void ovs_assert_failure(const char *, const char *, const char *); #define INIT_CONTAINER(OBJECT, POINTER, MEMBER) \ ((OBJECT) = NULL, ASSIGN_CONTAINER(OBJECT, POINTER, MEMBER)) +/* Multi-variable container iterators. + * + * The following macros facilitate safe iteration over data structures + * contained in objects. It does so by using an internal iterator variable of + * the type of the member object pointer (i.e: pointer to the data structure). + */ + +/* Multi-variable iterator variable name. + * Returns the name of the internal iterator variable. + */ +#define ITER_VAR(NAME) NAME ## __iterator__ + +/* Multi-variable initialization. Creates an internal iterator variable that + * points to the provided pointer. The type of the iterator variable is + * ITER_TYPE*. It must be the same type as &VAR->MEMBER. + * + * The _EXP version evaluates the extra expressions once. + */ +#define INIT_MULTIVAR(VAR, MEMBER, POINTER, ITER_TYPE) \ + INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0) + +#define INIT_MULTIVAR_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \ + ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER) + +/* Multi-variable condition. + * Evaluates the condition expression (that must be based on the internal + * iterator variable). Only if the result of expression is true, the OBJECT is + * set to the object containing the current value of the iterator variable. + * + * It is up to the caller to make sure it is safe to run OBJECT_CONTAINING on + * the pointers that verify the condition. + */ +#define CONDITION_MULTIVAR(VAR, MEMBER, EXPR) \ + ((EXPR) ? \ + (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), 1) : \ + (((VAR) = NULL), 0)) + +/* Multi-variable update. + * Sets the iterator value to NEXT_ITER. + */ +#define UPDATE_MULTIVAR(VAR, NEXT_ITER) \ + (ITER_VAR(VAR) = NEXT_ITER) + +/* In the safe version of the multi-variable container iteration, the next + * value of the iterator is precalculated on the condition expression. + * This allows for the iterator to be freed inside the loop. + * + * Two versions of the macros are provided: + * + * * In the _SHORT version, the user does not have to provide a variable to + * store the next value of the iterator. Instead, a second iterator variable + * is declared in the INIT_ macro and its name is determined by + * ITER_NEXT_VAR(OBJECT). + * + * * In the _LONG version, the user provides another variable of the same type + * as the iterator object variable to store the next containing object. + * We still declare an iterator variable inside the loop but in this case it's + * name is derived from the name of the next containing variable. + * The value of the next containing object will only be set + * (via OBJECT_CONTAINING) if an additional condition is statisfied. This + * second condition must ensure it is safe to call OBJECT_CONTAINING on the + * next iterator variable. + * With respect to the value of the next containing object: + * - Inside of the loop: the variable is either NULL or safe to use. + * - Outside of the loop: the variable is NULL if the loop ends normally. + * If the loop ends with a "break;" statement, rules of Inside the loop + * apply. + */ +#define ITER_NEXT_VAR(NAME) NAME ## __iterator__next__ + +/* Safe initialization declares both iterators. */ +#define INIT_MULTIVAR_SAFE_SHORT(VAR, MEMBER, POINTER, ITER_TYPE) \ + INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, (void) 0) + +#define INIT_MULTIVAR_SAFE_SHORT_EXP(VAR, MEMBER, POINTER, ITER_TYPE, ...) \ + ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \ + *ITER_NEXT_VAR(VAR) = NULL + +/* Evaluate the condition expression and, if satisfied, update the _next_ + * iterator with the NEXT_EXPR. + * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and + * ITER_NEXT_VAR(VAR). + */ +#define CONDITION_MULTIVAR_SAFE_SHORT(VAR, MEMBER, EXPR, NEXT_EXPR) \ + ((EXPR) ? \ + (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \ + (NEXT_EXPR), 1) : \ + (((VAR) = NULL), 0)) + +#define UPDATE_MULTIVAR_SAFE_SHORT(VAR) \ + UPDATE_MULTIVAR(VAR, ITER_NEXT_VAR(VAR)) + +/* _LONG versions of the macros. */ + +#define INIT_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE) \ + INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, ITER_TYPE, \ + (void) 0) \ + +#define INIT_MULTIVAR_SAFE_LONG_EXP(VAR, NEXT_VAR, MEMBER, POINTER, \ + ITER_TYPE, ...) \ + ITER_TYPE *ITER_VAR(VAR) = ( __VA_ARGS__ , (ITER_TYPE *) POINTER), \ + *ITER_VAR(NEXT_VAR) = NULL + +/* Evaluate the condition expression and, if satisfied, update the _next_ + * iterator with the NEXT_EXPR. After, evaluate the NEXT_COND and, if + * satisfied, set the value to NEXT_VAR. NEXT_COND must use ITER_VAR(NEXT_VAR). + * + * Both EXPR and NEXT_EXPR should only use ITER_VAR(VAR) and + * ITER_VAR(NEXT_VAR). + */ +#define CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR, MEMBER, EXPR, NEXT_EXPR, \ + NEXT_COND) \ + ((EXPR) ? \ + (((VAR) = OBJECT_CONTAINING(ITER_VAR(VAR), VAR, MEMBER)), \ + (NEXT_EXPR), ((NEXT_COND) ? \ + ((NEXT_VAR) = \ + OBJECT_CONTAINING(ITER_VAR(NEXT_VAR), NEXT_VAR, MEMBER)) : \ + ((NEXT_VAR) = NULL)), 1) : \ + (((VAR) = NULL), ((NEXT_VAR) = NULL), 0)) + +#define UPDATE_MULTIVAR_SAFE_LONG(VAR, NEXT_VAR) \ + UPDATE_MULTIVAR(VAR, ITER_VAR(NEXT_VAR)) + +/* Helpers to allow overloading the *_SAFE iterator macros and select either + * the LONG or the SHORT version depending on the number of arguments. + */ +#define GET_SAFE_MACRO2(_1, _2, NAME, ...) NAME +#define GET_SAFE_MACRO3(_1, _2, _3, NAME, ...) NAME +#define GET_SAFE_MACRO4(_1, _2, _3, _4, NAME, ...) NAME +#define GET_SAFE_MACRO5(_1, _2, _3, _4, _5, NAME, ...) NAME +#define GET_SAFE_MACRO6(_1, _2, _3, _4, _5, _6, NAME, ...) NAME +#define GET_SAFE_MACRO(MAX_ARGS) GET_SAFE_MACRO ## MAX_ARGS + +/* MSVC treats __VA_ARGS__ as a simple token in argument lists. Introduce + * a level of indirection to work around that. */ +#define EXPAND_MACRO(name, args) name args + +/* Overload the LONG and the SHORT version of the macros. MAX_ARGS is the + * maximum number of arguments (i.e: the number of arguments of the LONG + * version). */ +#define OVERLOAD_SAFE_MACRO(LONG, SHORT, MAX_ARGS, ...) \ + EXPAND_MACRO(GET_SAFE_MACRO(MAX_ARGS), \ + (__VA_ARGS__, LONG, SHORT))(__VA_ARGS__) + /* Returns the number of elements in ARRAY. */ #define ARRAY_SIZE(ARRAY) __ARRAY_SIZE(ARRAY) @@ -285,6 +429,9 @@ is_pow2(uintmax_t x) * segfault, so it is important to be aware of correct alignment. */ #define ALIGNED_CAST(TYPE, ATTR) ((TYPE) (void *) (ATTR)) +#define IS_PTR_ALIGNED(OBJ) \ + (!(OBJ) || (uintptr_t) (OBJ) % __alignof__(OVS_TYPEOF(OBJ)) == 0) + #ifdef __cplusplus } #endif diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in index a8b0705d9f..631a8fca80 100755 --- a/ipsec/ovs-monitor-ipsec.in +++ b/ipsec/ovs-monitor-ipsec.in @@ -337,7 +337,14 @@ conn prevent_unencrypted_vxlan Once strongSwan vici bindings will be distributed with major Linux distributions this function could be simplified.""" vlog.info("Refreshing StrongSwan configuration") - subprocess.call([self.IPSEC, "update"]) + proc = subprocess.Popen([self.IPSEC, "update"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + outs, errs = proc.communicate() + if proc.returncode != 0: + vlog.err("StrongSwan failed to update configuration:\n" + "%s \n %s" % (str(outs), str(errs))) + subprocess.call([self.IPSEC, "rereadsecrets"]) # "ipsec update" command does not remove those tunnels that were # updated or that disappeared from the ipsec.conf file. So, we have @@ -708,6 +715,11 @@ conn prevent_unencrypted_vxlan not re.match(r".*need --listen.*", pout): break + if re.match(r".*[F|f]ailed to initiate connection.*", pout): + vlog.err('Failed to initiate connection through' + ' Interface %s.\n' % (conn.split('-')[0])) + vlog.err(pout) + def _nss_clear_database(self): """Remove all OVS IPsec related state from the NSS database""" try: diff --git a/lib/automake.mk b/lib/automake.mk index a23cdc4ade..e9a5978e88 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -38,8 +38,6 @@ lib_libopenvswitchavx512_la_CFLAGS = \ -fPIC \ $(AM_CFLAGS) lib_libopenvswitchavx512_la_SOURCES = \ - lib/cpu.c \ - lib/cpu.h \ lib/dpif-netdev-lookup-avx512-gather.c \ lib/dpif-netdev-extract-avx512.c \ lib/dpif-netdev-avx512.c @@ -89,6 +87,8 @@ lib_libopenvswitch_la_SOURCES = \ lib/conntrack.h \ lib/coverage.c \ lib/coverage.h \ + lib/cpu.c \ + lib/cpu.h \ lib/crc32c.c \ lib/crc32c.h \ lib/csum.c \ diff --git a/lib/cfm.c b/lib/cfm.c index cc43e70e31..c3742f3de2 100644 --- a/lib/cfm.c +++ b/lib/cfm.c @@ -416,7 +416,7 @@ cfm_run(struct cfm *cfm) OVS_EXCLUDED(mutex) ovs_mutex_lock(&mutex); if (timer_expired(&cfm->fault_timer)) { long long int interval = cfm_fault_interval(cfm); - struct remote_mp *rmp, *rmp_next; + struct remote_mp *rmp; enum cfm_fault_reason old_cfm_fault = cfm->fault; uint64_t old_flap_count = cfm->flap_count; int old_health = cfm->health; @@ -475,7 +475,7 @@ cfm_run(struct cfm *cfm) OVS_EXCLUDED(mutex) cfm->rx_packets = rx_packets; } - HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, &cfm->remote_mps) { + HMAP_FOR_EACH_SAFE (rmp, node, &cfm->remote_mps) { if (!rmp->recv) { VLOG_INFO("%s: Received no CCM from RMP %"PRIu64" in the last" " %lldms", cfm->name, rmp->mpid, diff --git a/lib/classifier.c b/lib/classifier.c index c4790ee6ba..0a89626cc3 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -916,9 +916,9 @@ free_conjunctive_matches(struct hmap *matches, struct conjunctive_match *cm_stubs, size_t n_cm_stubs) { if (hmap_count(matches) > n_cm_stubs) { - struct conjunctive_match *cm, *next; + struct conjunctive_match *cm; - HMAP_FOR_EACH_SAFE (cm, next, hmap_node, matches) { + HMAP_FOR_EACH_SAFE (cm, hmap_node, matches) { if (!(cm >= cm_stubs && cm < &cm_stubs[n_cm_stubs])) { free(cm); } diff --git a/lib/cmap.h b/lib/cmap.h index c502d23112..72e2ec5f71 100644 --- a/lib/cmap.h +++ b/lib/cmap.h @@ -108,6 +108,8 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node, * * CMAP and HASH are evaluated only once. NODE is evaluated many times. * + * After a normal exit of the loop (not through a "break;" statement) NODE is + * NULL. * * Thread-safety * ============= @@ -128,15 +130,15 @@ size_t cmap_replace(struct cmap *, struct cmap_node *old_node, * CMAP_FOR_EACH_WITH_HASH_PROTECTED may only be used if CMAP is guaranteed not * to change during iteration. It may be very slightly faster. */ -#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \ - for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \ - (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ - ASSIGN_CONTAINER(NODE, cmap_node_next(&(NODE)->MEMBER), MEMBER)) -#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \ - for (INIT_CONTAINER(NODE, CMAP_NODE, MEMBER); \ - (NODE) != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ - ASSIGN_CONTAINER(NODE, cmap_node_next_protected(&(NODE)->MEMBER), \ - MEMBER)) +#define CMAP_NODE_FOR_EACH(NODE, MEMBER, CMAP_NODE) \ + for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, cmap_node_next(ITER_VAR(NODE)))) +#define CMAP_NODE_FOR_EACH_PROTECTED(NODE, MEMBER, CMAP_NODE) \ + for (INIT_MULTIVAR(NODE, MEMBER, CMAP_NODE, struct cmap_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, cmap_node_next_protected(ITER_VAR(NODE)))) + #define CMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, CMAP) \ CMAP_NODE_FOR_EACH(NODE, MEMBER, cmap_find(CMAP, HASH)) #define CMAP_FOR_EACH_WITH_HASH_PROTECTED(NODE, MEMBER, HASH, CMAP) \ @@ -223,7 +225,7 @@ unsigned long cmap_find_batch(const struct cmap *cmap, unsigned long map, ? (INIT_CONTAINER(NODE, (CURSOR)->node, MEMBER), \ cmap_cursor_advance(CURSOR), \ true) \ - : false) + : (NODE = NULL, false)) #define CMAP_CURSOR_FOR_EACH(NODE, MEMBER, CURSOR, CMAP) \ for (*(CURSOR) = cmap_cursor_start(CMAP); \ diff --git a/lib/conntrack.c b/lib/conntrack.c index 33a1a92953..0103fb5396 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -1526,14 +1526,14 @@ set_label(struct dp_packet *pkt, struct conn *conn, static long long ct_sweep(struct conntrack *ct, long long now, size_t limit) { - struct conn *conn, *next; + struct conn *conn; long long min_expiration = LLONG_MAX; size_t count = 0; ovs_mutex_lock(&ct->ct_lock); for (unsigned i = 0; i < N_CT_TM; i++) { - LIST_FOR_EACH_SAFE (conn, next, exp_node, &ct->exp_lists[i]) { + LIST_FOR_EACH_SAFE (conn, exp_node, &ct->exp_lists[i]) { ovs_mutex_lock(&conn->lock); if (now < conn->expiration || count >= limit) { min_expiration = MIN(min_expiration, conn->expiration); @@ -2242,7 +2242,7 @@ nat_range_hash(const struct conn *conn, uint32_t basis, hash = ct_addr_hash_add(hash, &nat_info->min_addr); hash = ct_addr_hash_add(hash, &nat_info->max_addr); hash = hash_add(hash, - (nat_info->max_port << 16) + ((uint32_t) nat_info->max_port << 16) | nat_info->min_port); hash = ct_endpoint_hash_add(hash, &conn->key.src); hash = ct_endpoint_hash_add(hash, &conn->key.dst); @@ -2265,8 +2265,16 @@ set_sport_range(const struct nat_action_info_t *ni, const struct conn_key *k, if (((ni->nat_action & NAT_ACTION_SNAT_ALL) == NAT_ACTION_SRC) || ((ni->nat_action & NAT_ACTION_DST))) { *curr = ntohs(k->src.port); - *min = MIN_NAT_EPHEMERAL_PORT; - *max = MAX_NAT_EPHEMERAL_PORT; + if (*curr < 512) { + *min = 1; + *max = 511; + } else if (*curr < 1024) { + *min = 600; + *max = 1023; + } else { + *min = MIN_NAT_EPHEMERAL_PORT; + *max = MAX_NAT_EPHEMERAL_PORT; + } } else { *min = ni->min_port; *max = ni->max_port; @@ -2389,6 +2397,26 @@ next_addr_in_range_guarded(union ct_addr *curr, union ct_addr *min, return exhausted; } +static bool +nat_get_unique_l4(struct conntrack *ct, struct conn *nat_conn, + ovs_be16 *port, uint16_t curr, uint16_t min, + uint16_t max) +{ + uint16_t orig = curr; + + FOR_EACH_PORT_IN_RANGE (curr, min, max) { + *port = htons(curr); + if (!conn_lookup(ct, &nat_conn->rev_key, + time_msec(), NULL, NULL)) { + return true; + } + } + + *port = htons(orig); + + return false; +} + /* This function tries to get a unique tuple. * Every iteration checks that the reverse tuple doesn't * collide with any existing one. @@ -2403,9 +2431,11 @@ next_addr_in_range_guarded(union ct_addr *curr, union ct_addr *min, * * In case of DNAT: * - For each dst IP address in the range (if any). - * - For each dport in range (if any). - * - Try to find a source port in the ephemeral range - * (after testing the port used by the sender). + * - For each dport in range (if any) tries to find + * an unique tuple. + * - Eventually, if the previous attempt fails, + * tries to find a source port in the ephemeral + * range (after testing the port used by the sender). * * If none can be found, return exhaustion to the caller. */ static bool @@ -2436,6 +2466,11 @@ nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, set_dport_range(nat_info, &conn->key, hash, &curr_dport, &min_dport, &max_dport); + if (pat_proto) { + nat_conn->rev_key.src.port = htons(curr_dport); + nat_conn->rev_key.dst.port = htons(curr_sport); + } + another_round: store_addr_to_key(&curr_addr, &nat_conn->rev_key, nat_info->nat_action); @@ -2449,15 +2484,19 @@ another_round: goto next_addr; } - FOR_EACH_PORT_IN_RANGE(curr_dport, min_dport, max_dport) { - nat_conn->rev_key.src.port = htons(curr_dport); - FOR_EACH_PORT_IN_RANGE(curr_sport, min_sport, max_sport) { - nat_conn->rev_key.dst.port = htons(curr_sport); - if (!conn_lookup(ct, &nat_conn->rev_key, - time_msec(), NULL, NULL)) { - return true; - } - } + bool found = false; + if (nat_info->nat_action & NAT_ACTION_DST_PORT) { + found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.src.port, + curr_dport, min_dport, max_dport); + } + + if (!found) { + found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.dst.port, + curr_sport, min_sport, max_sport); + } + + if (found) { + return true; } /* Check if next IP is in range and respin. Otherwise, notify @@ -2857,8 +2896,8 @@ expectation_clean(struct conntrack *ct, const struct conn_key *parent_key) { ovs_rwlock_wrlock(&ct->resources_lock); - struct alg_exp_node *node, *next; - HINDEX_FOR_EACH_WITH_HASH_SAFE (node, next, node_ref, + struct alg_exp_node *node; + HINDEX_FOR_EACH_WITH_HASH_SAFE (node, node_ref, conn_key_hash(parent_key, ct->hash_basis), &ct->alg_expectation_refs) { if (!conn_key_cmp(&node->parent_key, parent_key)) { diff --git a/lib/dns-resolve.c b/lib/dns-resolve.c index d344514343..1afcc65adb 100644 --- a/lib/dns-resolve.c +++ b/lib/dns-resolve.c @@ -189,8 +189,8 @@ dns_resolve_destroy(void) ub_ctx_delete(ub_ctx__); ub_ctx__ = NULL; - struct resolve_request *req, *next; - HMAP_FOR_EACH_SAFE (req, next, hmap_node, &all_reqs__) { + struct resolve_request *req; + HMAP_FOR_EACH_SAFE (req, hmap_node, &all_reqs__) { ub_resolve_free(req->ub_result); free(req->addr); free(req->name); @@ -265,7 +265,7 @@ resolve_callback__(void *req_, int err, struct ub_result *result) if (err != 0 || (result->qtype == ns_t_aaaa && !result->havedata)) { ub_resolve_free(result); req->state = RESOLVE_ERROR; - VLOG_ERR_RL(&rl, "%s: failed to resolve", req->name); + VLOG_WARN_RL(&rl, "%s: failed to resolve", req->name); return; } diff --git a/lib/dpif-netdev-avx512.c b/lib/dpif-netdev-avx512.c index b7131ba3f1..82a4138184 100644 --- a/lib/dpif-netdev-avx512.c +++ b/lib/dpif-netdev-avx512.c @@ -20,7 +20,6 @@ #include -#include "cpu.h" #include "dpif-netdev.h" #include "dpif-netdev-perf.h" #include "dpif-netdev-private.h" @@ -59,19 +58,6 @@ struct dpif_userdata { struct pkt_flow_meta pkt_meta[NETDEV_MAX_BURST]; }; -int32_t -dp_netdev_input_outer_avx512_probe(void) -{ - bool avx512f_available = cpu_has_isa(OVS_CPU_ISA_X86_AVX512F); - bool bmi2_available = cpu_has_isa(OVS_CPU_ISA_X86_BMI2); - - if (!avx512f_available || !bmi2_available) { - return -ENOTSUP; - } - - return 0; -} - int32_t dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, @@ -159,7 +145,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, mf_mask = mfex_func(packets, keys, batch_size, in_port, pmd); } - uint32_t lookup_pkts_bitmask = (1ULL << batch_size) - 1; + uint32_t lookup_pkts_bitmask = (UINT64_C(1) << batch_size) - 1; uint32_t iter = lookup_pkts_bitmask; while (iter) { uint32_t i = raw_ctz(iter); @@ -183,7 +169,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, * classifed by vector mfex else do a scalar miniflow extract * for that packet. */ - bool mfex_hit = !!(mf_mask & (1 << i)); + bool mfex_hit = !!(mf_mask & (UINT32_C(1) << i)); /* Check for a partial hardware offload match. */ if (hwol_enabled) { @@ -204,7 +190,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, pkt_meta[i].bytes = dp_packet_size(packet); phwol_hits++; - hwol_emc_smc_hitmask |= (1 << i); + hwol_emc_smc_hitmask |= (UINT32_C(1) << i); continue; } } @@ -227,7 +213,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, if (f) { rules[i] = &f->cr; emc_hits++; - hwol_emc_smc_hitmask |= (1 << i); + hwol_emc_smc_hitmask |= (UINT32_C(1) << i); continue; } } @@ -237,7 +223,7 @@ dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, if (f) { rules[i] = &f->cr; smc_hits++; - smc_hitmask |= (1 << i); + smc_hitmask |= (UINT32_C(1) << i); continue; } } diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c index c1c1fefb6a..92980ca1b9 100644 --- a/lib/dpif-netdev-extract-avx512.c +++ b/lib/dpif-netdev-extract-avx512.c @@ -42,7 +42,6 @@ #include #include -#include "cpu.h" #include "flow.h" #include "dpif-netdev-private-dpcls.h" @@ -544,7 +543,11 @@ mfex_avx512_process(struct dp_packet_batch *packets, */ __m512i v512_zeros = _mm512_setzero_si512(); __m512i v_blk0; +#if __GNUC__ >= 4 if (__builtin_constant_p(use_vbmi) && use_vbmi) { +#else + if (use_vbmi) { +#endif v_blk0 = _mm512_maskz_permutexvar_epi8_wrap(k_shuf, v_shuf, v_pkt0); } else { @@ -619,7 +622,7 @@ mfex_avx512_process(struct dp_packet_batch *packets, }; /* This packet has its miniflow created, add to hitmask. */ - hitmask |= 1 << i; + hitmask |= UINT32_C(1) << i; } return hitmask; @@ -659,47 +662,5 @@ DECLARE_MFEX_FUNC(ip_udp, PROFILE_ETH_IPV4_UDP) DECLARE_MFEX_FUNC(ip_tcp, PROFILE_ETH_IPV4_TCP) DECLARE_MFEX_FUNC(dot1q_ip_udp, PROFILE_ETH_VLAN_IPV4_UDP) DECLARE_MFEX_FUNC(dot1q_ip_tcp, PROFILE_ETH_VLAN_IPV4_TCP) - - -static int32_t -avx512_isa_probe(uint32_t needs_vbmi) -{ - static enum ovs_cpu_isa isa_required[] = { - OVS_CPU_ISA_X86_AVX512F, - OVS_CPU_ISA_X86_AVX512BW, - OVS_CPU_ISA_X86_BMI2, - }; - - int32_t ret = 0; - for (uint32_t i = 0; i < ARRAY_SIZE(isa_required); i++) { - if (!cpu_has_isa(isa_required[i])) { - ret = -ENOTSUP; - } - } - - if (needs_vbmi) { - if (!cpu_has_isa(OVS_CPU_ISA_X86_AVX512VBMI)) { - ret = -ENOTSUP; - } - } - - return ret; -} - -/* Probe functions to check ISA requirements. */ -int32_t -mfex_avx512_probe(void) -{ - const uint32_t needs_vbmi = 0; - return avx512_isa_probe(needs_vbmi); -} - -int32_t -mfex_avx512_vbmi_probe(void) -{ - const uint32_t needs_vbmi = 1; - return avx512_isa_probe(needs_vbmi); -} - #endif /* __CHECKER__ */ #endif /* __x86_64__ */ diff --git a/lib/dpif-netdev-lookup-avx512-gather.c b/lib/dpif-netdev-lookup-avx512-gather.c index 7bc1e9e9a5..fb2084392a 100644 --- a/lib/dpif-netdev-lookup-avx512-gather.c +++ b/lib/dpif-netdev-lookup-avx512-gather.c @@ -23,7 +23,6 @@ #include "dpif-netdev-lookup.h" #include "cmap.h" -#include "cpu.h" #include "flow.h" #include "pvector.h" #include "openvswitch/vlog.h" @@ -396,18 +395,11 @@ dpcls_avx512_gather_mf_any(struct dpcls_subtable *subtable, uint32_t keys_map, } dpcls_subtable_lookup_func -dpcls_subtable_avx512_gather_probe(uint32_t u0_bits, uint32_t u1_bits) +dpcls_subtable_avx512_gather_probe__(uint32_t u0_bits, uint32_t u1_bits, + bool use_vpop) { dpcls_subtable_lookup_func f = NULL; - int avx512f_available = cpu_has_isa(OVS_CPU_ISA_X86_AVX512F); - int bmi2_available = cpu_has_isa(OVS_CPU_ISA_X86_BMI2); - if (!avx512f_available || !bmi2_available) { - return NULL; - } - - int use_vpop = cpu_has_isa(OVS_CPU_ISA_X86_VPOPCNTDQ); - CHECK_LOOKUP_FUNCTION(9, 4, use_vpop); CHECK_LOOKUP_FUNCTION(9, 1, use_vpop); CHECK_LOOKUP_FUNCTION(5, 3, use_vpop); diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c index bd0a99abe7..b1d2801575 100644 --- a/lib/dpif-netdev-lookup.c +++ b/lib/dpif-netdev-lookup.c @@ -18,10 +18,25 @@ #include #include "dpif-netdev-lookup.h" +#include "cpu.h" #include "openvswitch/vlog.h" VLOG_DEFINE_THIS_MODULE(dpif_netdev_lookup); +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) +static dpcls_subtable_lookup_func +dpcls_subtable_avx512_gather_probe(uint32_t u0_bits, uint32_t u1_bits) +{ + if (!cpu_has_isa(OVS_CPU_ISA_X86_AVX512F) + || !cpu_has_isa(OVS_CPU_ISA_X86_BMI2)) { + return NULL; + } + + return dpcls_subtable_avx512_gather_probe__(u0_bits, u1_bits, + cpu_has_isa(OVS_CPU_ISA_X86_VPOPCNTDQ)); +} +#endif + /* Actual list of implementations goes here */ static struct dpcls_subtable_lookup_info_t subtable_lookups[] = { /* The autovalidator implementation will not be used by default, it must diff --git a/lib/dpif-netdev-lookup.h b/lib/dpif-netdev-lookup.h index 59f51faa0e..5d2d845945 100644 --- a/lib/dpif-netdev-lookup.h +++ b/lib/dpif-netdev-lookup.h @@ -44,7 +44,8 @@ dpcls_subtable_generic_probe(uint32_t u0_bit_count, uint32_t u1_bit_count); /* Probe function for AVX-512 gather implementation */ dpcls_subtable_lookup_func -dpcls_subtable_avx512_gather_probe(uint32_t u0_bit_cnt, uint32_t u1_bit_cnt); +dpcls_subtable_avx512_gather_probe__(uint32_t u0_bit_cnt, uint32_t u1_bit_cnt, + bool use_vpop); /* Subtable registration and iteration helpers */ diff --git a/lib/dpif-netdev-private-dpif.c b/lib/dpif-netdev-private-dpif.c index 84d4ec156e..5ae119a308 100644 --- a/lib/dpif-netdev-private-dpif.c +++ b/lib/dpif-netdev-private-dpif.c @@ -22,6 +22,7 @@ #include #include +#include "cpu.h" #include "openvswitch/dynamic-string.h" #include "openvswitch/vlog.h" #include "util.h" @@ -33,6 +34,19 @@ enum dpif_netdev_impl_info_idx { DPIF_NETDEV_IMPL_AVX512 }; +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) +static int32_t +dp_netdev_input_outer_avx512_probe(void) +{ + if (!cpu_has_isa(OVS_CPU_ISA_X86_AVX512F) + || !cpu_has_isa(OVS_CPU_ISA_X86_BMI2)) { + return -ENOTSUP; + } + + return 0; +} +#endif + /* Actual list of implementations goes here. */ static struct dpif_netdev_impl_info_t dpif_impls[] = { /* The default scalar C code implementation. */ diff --git a/lib/dpif-netdev-private-dpif.h b/lib/dpif-netdev-private-dpif.h index 0da639c55a..3e38630f53 100644 --- a/lib/dpif-netdev-private-dpif.h +++ b/lib/dpif-netdev-private-dpif.h @@ -67,10 +67,7 @@ dp_netdev_input(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, odp_port_t in_port); -/* AVX512 enabled DPIF implementation and probe functions. */ -int32_t -dp_netdev_input_outer_avx512_probe(void); - +/* AVX512 enabled DPIF implementation function. */ int32_t dp_netdev_input_outer_avx512(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, diff --git a/lib/dpif-netdev-private-extract.c b/lib/dpif-netdev-private-extract.c index a29bdcfa78..fe04ea80ff 100644 --- a/lib/dpif-netdev-private-extract.c +++ b/lib/dpif-netdev-private-extract.c @@ -19,6 +19,7 @@ #include #include +#include "cpu.h" #include "dp-packet.h" #include "dpif-netdev-private-dpcls.h" #include "dpif-netdev-private-extract.h" @@ -33,6 +34,43 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev_extract); /* Variable to hold the default MFEX implementation. */ static ATOMIC(miniflow_extract_func) default_mfex_func; +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) +static int32_t +avx512_isa_probe(bool needs_vbmi) +{ + static enum ovs_cpu_isa isa_required[] = { + OVS_CPU_ISA_X86_AVX512F, + OVS_CPU_ISA_X86_AVX512BW, + OVS_CPU_ISA_X86_BMI2, + }; + + for (uint32_t i = 0; i < ARRAY_SIZE(isa_required); i++) { + if (!cpu_has_isa(isa_required[i])) { + return -ENOTSUP; + } + } + + if (needs_vbmi && !cpu_has_isa(OVS_CPU_ISA_X86_AVX512VBMI)) { + return -ENOTSUP; + } + + return 0; +} + +/* Probe functions to check ISA requirements. */ +static int32_t +mfex_avx512_probe(void) +{ + return avx512_isa_probe(false); +} + +static int32_t +mfex_avx512_vbmi_probe(void) +{ + return avx512_isa_probe(true); +} +#endif + /* Implementations of available extract options and * the implementations are always in order of preference. */ diff --git a/lib/dpif-netdev-private-extract.h b/lib/dpif-netdev-private-extract.h index f9a757ba41..3e06148c5a 100644 --- a/lib/dpif-netdev-private-extract.h +++ b/lib/dpif-netdev-private-extract.h @@ -176,10 +176,8 @@ mfex_study_traffic(struct dp_packet_batch *packets, int mfex_set_study_pkt_cnt(uint32_t pkt_cmp_count, const char *name); -/* AVX512 MFEX Probe and Implementations functions. */ +/* AVX512 MFEX Implementation functions. */ #ifdef __x86_64__ -int32_t mfex_avx512_probe(void); -int32_t mfex_avx512_vbmi_probe(void); #define DECLARE_AVX512_MFEX_PROTOTYPE(name) \ uint32_t \ diff --git a/lib/dpif-netdev-private-flow.h b/lib/dpif-netdev-private-flow.h index 66016eb099..7425dd44e7 100644 --- a/lib/dpif-netdev-private-flow.h +++ b/lib/dpif-netdev-private-flow.h @@ -104,6 +104,7 @@ struct dp_netdev_flow { bool dead; uint32_t mark; /* Unique flow mark for netdev offloading. */ uint64_t simple_match_mark; /* Unique flow mark for the simple match. */ + odp_port_t orig_in_port; /* Statistics. */ struct dp_netdev_flow_stats stats; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 9f35713ef5..3d9d8929f7 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -93,7 +93,8 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev); /* Auto Load Balancing Defaults */ #define ALB_IMPROVEMENT_THRESHOLD 25 #define ALB_LOAD_THRESHOLD 95 -#define ALB_REBALANCE_INTERVAL 1 /* 1 Min */ +#define ALB_REBALANCE_INTERVAL 1 /* 1 Min */ +#define MAX_ALB_REBALANCE_INTERVAL 20000 /* 20000 Min */ #define MIN_TO_MSEC 60000 #define FLOW_DUMP_MAX_BATCH 50 @@ -1932,13 +1933,13 @@ static void dp_netdev_free(struct dp_netdev *dp) OVS_REQUIRES(dp_netdev_mutex) { - struct dp_netdev_port *port, *next; + struct dp_netdev_port *port; struct tx_bond *bond; shash_find_and_delete(&dp_netdevs, dp->name); ovs_rwlock_wrlock(&dp->port_rwlock); - HMAP_FOR_EACH_SAFE (port, next, node, &dp->ports) { + HMAP_FOR_EACH_SAFE (port, node, &dp->ports) { do_del_port(dp, port); } ovs_rwlock_unlock(&dp->port_rwlock); @@ -3006,7 +3007,7 @@ static void queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow, struct match *match, const struct nlattr *actions, size_t actions_len, - odp_port_t orig_in_port, int op) + int op) { struct dp_offload_thread_item *item; struct dp_offload_flow_item *flow_offload; @@ -3021,7 +3022,7 @@ queue_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, flow_offload->actions = xmalloc(actions_len); memcpy(flow_offload->actions, actions, actions_len); flow_offload->actions_len = actions_len; - flow_offload->orig_in_port = orig_in_port; + flow_offload->orig_in_port = flow->orig_in_port; item->timestamp = pmd->ctx.now; dp_netdev_offload_flow_enqueue(item); @@ -4095,6 +4096,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, flow->dead = false; flow->batch = NULL; flow->mark = INVALID_FLOW_MARK; + flow->orig_in_port = orig_in_port; *CONST_CAST(unsigned *, &flow->pmd_id) = pmd->core_id; *CONST_CAST(struct flow *, &flow->flow) = match->flow; *CONST_CAST(ovs_u128 *, &flow->ufid) = *ufid; @@ -4129,7 +4131,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, } queue_netdev_flow_put(pmd, flow, match, actions, actions_len, - orig_in_port, DP_NETDEV_FLOW_OFFLOAD_OP_ADD); + DP_NETDEV_FLOW_OFFLOAD_OP_ADD); log_netdev_flow_change(flow, match, NULL, actions, actions_len); return flow; @@ -4171,7 +4173,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, ovsrcu_set(&netdev_flow->actions, new_actions); queue_netdev_flow_put(pmd, netdev_flow, match, - put->actions, put->actions_len, ODPP_NONE, + put->actions, put->actions_len, DP_NETDEV_FLOW_OFFLOAD_OP_MOD); log_netdev_flow_change(netdev_flow, match, old_actions, put->actions, put->actions_len); @@ -4778,8 +4780,8 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config) uint32_t insert_min, cur_min; uint32_t tx_flush_interval, cur_tx_flush_interval; uint64_t rebalance_intvl; - uint8_t rebalance_load, cur_rebalance_load; - uint8_t rebalance_improve; + uint8_t cur_rebalance_load; + uint32_t rebalance_load, rebalance_improve; bool log_autolb = false; enum sched_assignment_type pmd_rxq_assign_type; @@ -4880,8 +4882,12 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config) struct pmd_auto_lb *pmd_alb = &dp->pmd_alb; - rebalance_intvl = smap_get_int(other_config, "pmd-auto-lb-rebal-interval", - ALB_REBALANCE_INTERVAL); + rebalance_intvl = smap_get_ullong(other_config, + "pmd-auto-lb-rebal-interval", + ALB_REBALANCE_INTERVAL); + if (rebalance_intvl > MAX_ALB_REBALANCE_INTERVAL) { + rebalance_intvl = ALB_REBALANCE_INTERVAL; + } /* Input is in min, convert it to msec. */ rebalance_intvl = @@ -4894,21 +4900,21 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config) log_autolb = true; } - rebalance_improve = smap_get_int(other_config, - "pmd-auto-lb-improvement-threshold", - ALB_IMPROVEMENT_THRESHOLD); + rebalance_improve = smap_get_uint(other_config, + "pmd-auto-lb-improvement-threshold", + ALB_IMPROVEMENT_THRESHOLD); if (rebalance_improve > 100) { rebalance_improve = ALB_IMPROVEMENT_THRESHOLD; } if (rebalance_improve != pmd_alb->rebalance_improve_thresh) { pmd_alb->rebalance_improve_thresh = rebalance_improve; VLOG_INFO("PMD auto load balance improvement threshold set to " - "%"PRIu8"%%", rebalance_improve); + "%"PRIu32"%%", rebalance_improve); log_autolb = true; } - rebalance_load = smap_get_int(other_config, "pmd-auto-lb-load-threshold", - ALB_LOAD_THRESHOLD); + rebalance_load = smap_get_uint(other_config, "pmd-auto-lb-load-threshold", + ALB_LOAD_THRESHOLD); if (rebalance_load > 100) { rebalance_load = ALB_LOAD_THRESHOLD; } @@ -4916,7 +4922,7 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config) if (rebalance_load != cur_rebalance_load) { atomic_store_relaxed(&pmd_alb->rebalance_load_thresh, rebalance_load); - VLOG_INFO("PMD auto load balance load threshold set to %"PRIu8"%%", + VLOG_INFO("PMD auto load balance load threshold set to %"PRIu32"%%", rebalance_load); log_autolb = true; } @@ -5684,23 +5690,28 @@ sched_numa_list_put_in_place(struct sched_numa_list *numa_list) } } +/* Returns 'true' if OVS rxq scheduling algorithm assigned any unpinned rxq to + * a PMD thread core on a non-local numa node. */ static bool sched_numa_list_cross_numa_polling(struct sched_numa_list *numa_list) { struct sched_numa *numa; - /* For each numa */ HMAP_FOR_EACH (numa, node, &numa_list->numas) { - /* For each pmd */ for (int i = 0; i < numa->n_pmds; i++) { struct sched_pmd *sched_pmd; sched_pmd = &numa->pmds[i]; - /* For each rxq. */ + if (sched_pmd->isolated) { + /* All rxqs on this PMD thread core are pinned. */ + continue; + } for (unsigned k = 0; k < sched_pmd->n_rxq; k++) { struct dp_netdev_rxq *rxq = sched_pmd->rxqs[k]; - - if (!sched_pmd->isolated && + /* Check if the rxq is not pinned to a specific PMD thread core + * by the user AND the PMD thread core that OVS assigned is + * non-local to the rxq port. */ + if (rxq->core_id == OVS_CORE_UNSPEC && rxq->pmd->numa_id != netdev_get_numa_id(rxq->port->netdev)) { return true; @@ -6000,10 +6011,10 @@ sched_numa_list_schedule(struct sched_numa_list *numa_list, /* Find any numa with available PMDs. */ for (int j = 0; j < n_numa; j++) { numa = sched_numa_list_next(numa_list, last_cross_numa); + last_cross_numa = numa; if (sched_numa_noniso_pmd_count(numa)) { break; } - last_cross_numa = numa; numa = NULL; } } @@ -6111,7 +6122,7 @@ sched_numa_list_variance(struct sched_numa_list *numa_list) * pmd_rebalance_dry_run() can be avoided when it is not needed. */ static bool -pmd_reblance_dry_run_needed(struct dp_netdev *dp) +pmd_rebalance_dry_run_needed(struct dp_netdev *dp) OVS_REQ_RDLOCK(dp->port_rwlock) { struct dp_netdev_pmd_thread *pmd; @@ -6342,11 +6353,11 @@ pmd_remove_stale_ports(struct dp_netdev *dp, OVS_EXCLUDED(pmd->port_mutex) OVS_REQ_RDLOCK(dp->port_rwlock) { - struct rxq_poll *poll, *poll_next; - struct tx_port *tx, *tx_next; + struct rxq_poll *poll; + struct tx_port *tx; ovs_mutex_lock(&pmd->port_mutex); - HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) { + HMAP_FOR_EACH_SAFE (poll, node, &pmd->poll_list) { struct dp_netdev_port *port = poll->rxq->port; if (port->need_reconfigure @@ -6354,7 +6365,7 @@ pmd_remove_stale_ports(struct dp_netdev *dp, dp_netdev_del_rxq_from_pmd(pmd, poll); } } - HMAP_FOR_EACH_SAFE (tx, tx_next, node, &pmd->tx_ports) { + HMAP_FOR_EACH_SAFE (tx, node, &pmd->tx_ports) { struct dp_netdev_port *port = tx->port; if (port->need_reconfigure @@ -6430,8 +6441,7 @@ reconfigure_datapath(struct dp_netdev *dp) /* We only reconfigure the ports that we determined above, because they're * not being used by any pmd thread at the moment. If a port fails to * reconfigure we remove it from the datapath. */ - struct dp_netdev_port *next_port; - HMAP_FOR_EACH_SAFE (port, next_port, node, &dp->ports) { + HMAP_FOR_EACH_SAFE (port, node, &dp->ports) { int err; if (!port->need_reconfigure) { @@ -6487,10 +6497,10 @@ reconfigure_datapath(struct dp_netdev *dp) } CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { - struct rxq_poll *poll, *poll_next; + struct rxq_poll *poll; ovs_mutex_lock(&pmd->port_mutex); - HMAP_FOR_EACH_SAFE (poll, poll_next, node, &pmd->poll_list) { + HMAP_FOR_EACH_SAFE (poll, node, &pmd->poll_list) { if (poll->rxq->pmd != pmd) { dp_netdev_del_rxq_from_pmd(pmd, poll); @@ -6682,7 +6692,7 @@ dpif_netdev_run(struct dpif *dpif) if (pmd_rebalance && !dp_netdev_is_reconf_required(dp) && !ports_require_restart(dp) && - pmd_reblance_dry_run_needed(dp) && + pmd_rebalance_dry_run_needed(dp) && pmd_rebalance_dry_run(dp)) { VLOG_INFO("PMD auto load balance dry run. " "Requesting datapath reconfigure."); @@ -7364,15 +7374,15 @@ static struct dp_netdev_pmd_thread * dp_netdev_get_pmd(struct dp_netdev *dp, unsigned core_id) { struct dp_netdev_pmd_thread *pmd; - const struct cmap_node *pnode; - pnode = cmap_find(&dp->poll_threads, hash_int(core_id, 0)); - if (!pnode) { - return NULL; + CMAP_FOR_EACH_WITH_HASH (pmd, node, hash_int(core_id, 0), + &dp->poll_threads) { + if (pmd->core_id == core_id) { + return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL; + } } - pmd = CONTAINER_OF(pnode, struct dp_netdev_pmd_thread, node); - return dp_netdev_pmd_try_ref(pmd) ? pmd : NULL; + return NULL; } /* Sets the 'struct dp_netdev_pmd_thread' for non-pmd threads. */ @@ -7505,6 +7515,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) seq_destroy(pmd->reload_seq); ovs_mutex_destroy(&pmd->port_mutex); ovs_mutex_destroy(&pmd->bond_mutex); + free(pmd->netdev_input_func_userdata); free(pmd); } diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 71e35ccdda..06e1e8ca02 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -85,7 +85,7 @@ enum { MAX_PORTS = USHRT_MAX }; #define EPOLLEXCLUSIVE (1u << 28) #endif -#define OVS_DP_F_UNSUPPORTED (1 << 31); +#define OVS_DP_F_UNSUPPORTED (1u << 31); /* This PID is not used by the kernel datapath when using dispatch per CPU, * but it is required to be set (not zero). */ diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c index fd0127ed17..3b4520f87c 100644 --- a/lib/dynamic-string.c +++ b/lib/dynamic-string.c @@ -152,7 +152,10 @@ ds_put_format_valist(struct ds *ds, const char *format, va_list args_) va_copy(args, args_); available = ds->string ? ds->allocated - ds->length + 1 : 0; - needed = vsnprintf(&ds->string[ds->length], available, format, args); + needed = vsnprintf(ds->string + ? &ds->string[ds->length] + : NULL, + available, format, args); va_end(args); if (needed < available) { @@ -162,7 +165,8 @@ ds_put_format_valist(struct ds *ds, const char *format, va_list args_) va_copy(args, args_); available = ds->allocated - ds->length + 1; - needed = vsnprintf(&ds->string[ds->length], available, format, args); + needed = vsnprintf(&ds->string[ds->length], + available, format, args); va_end(args); ovs_assert(needed < available); @@ -198,10 +202,11 @@ ds_put_strftime_msec(struct ds *ds, const char *template, long long int when, localtime_msec(when, &tm); } + ds_reserve(ds, 64); for (;;) { - size_t avail = ds->string ? ds->allocated - ds->length + 1 : 0; - size_t used = strftime_msec(&ds->string[ds->length], avail, template, - &tm); + size_t avail = ds->allocated - ds->length + 1; + char *dest = &ds->string[ds->length]; + size_t used = strftime_msec(dest, avail, template, &tm); if (used) { ds->length += used; return; diff --git a/lib/fat-rwlock.c b/lib/fat-rwlock.c index d913b2088f..771ccc9737 100644 --- a/lib/fat-rwlock.c +++ b/lib/fat-rwlock.c @@ -97,14 +97,14 @@ fat_rwlock_init(struct fat_rwlock *rwlock) void fat_rwlock_destroy(struct fat_rwlock *rwlock) { - struct fat_rwlock_slot *slot, *next; + struct fat_rwlock_slot *slot; /* Order is important here. By destroying the thread-specific data first, * before we destroy the slots, we ensure that the thread-specific * data destructor can't race with our loop below. */ ovsthread_key_delete(rwlock->key); - LIST_FOR_EACH_SAFE (slot, next, list_node, &rwlock->threads) { + LIST_FOR_EACH_SAFE (slot, list_node, &rwlock->threads) { free_slot(slot); } ovs_mutex_destroy(&rwlock->mutex); diff --git a/lib/hindex.h b/lib/hindex.h index 876c5a9e39..ea7402587e 100644 --- a/lib/hindex.h +++ b/lib/hindex.h @@ -128,18 +128,38 @@ void hindex_remove(struct hindex *, struct hindex_node *); * Evaluates HASH only once. */ #define HINDEX_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HINDEX) \ - for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ - NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ - ASSIGN_CONTAINER(NODE, (NODE)->MEMBER.s, MEMBER)) + for (INIT_MULTIVAR(NODE, MEMBER, hindex_node_with_hash(HINDEX, HASH), \ + struct hindex_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, ITER_VAR(NODE)->s)) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash map but its members remain accessible and intact). */ -#define HINDEX_FOR_EACH_WITH_HASH_SAFE(NODE, NEXT, MEMBER, HASH, HINDEX) \ - for (INIT_CONTAINER(NODE, hindex_node_with_hash(HINDEX, HASH), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ - ? INIT_CONTAINER(NEXT, (NODE)->MEMBER.s, MEMBER), 1 \ - : 0); \ - (NODE) = (NEXT)) +#define HINDEX_FOR_EACH_WITH_HASH_SAFE_LONG(NODE, NEXT, MEMBER, HASH, HINDEX) \ + for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ + hindex_node_with_hash(HINDEX, HASH), \ + struct hindex_node); \ + CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_VAR(NEXT) = ITER_VAR(NODE)->s, \ + ITER_VAR(NEXT) != NULL); \ + UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) + +/* Short version of HINDEX_FOR_EACH_WITH_HASH_SAFE. */ +#define HINDEX_FOR_EACH_WITH_HASH_SAFE_SHORT(NODE, MEMBER, HASH, HINDEX) \ + for (INIT_MULTIVAR_SAFE_SHORT(NODE, MEMBER, \ + hindex_node_with_hash(HINDEX, HASH), \ + struct hindex_node); \ + CONDITION_MULTIVAR_SAFE_SHORT(NODE, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_NEXT_VAR(NODE) = ITER_VAR(NODE)->s); \ + UPDATE_MULTIVAR_SAFE_SHORT(NODE)) + +#define HINDEX_FOR_EACH_WITH_HASH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(HINDEX_FOR_EACH_WITH_HASH_SAFE_LONG, \ + HINDEX_FOR_EACH_WITH_HASH_SAFE_SHORT, \ + 5, __VA_ARGS__) + /* Returns the head node in 'hindex' with the given 'hash', or a null pointer * if no nodes have that hash value. */ @@ -157,19 +177,36 @@ hindex_node_with_hash(const struct hindex *hindex, size_t hash) /* Iteration. */ /* Iterates through every node in HINDEX. */ -#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ - for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ - NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER); \ - ASSIGN_CONTAINER(NODE, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER)) +#define HINDEX_FOR_EACH(NODE, MEMBER, HINDEX) \ + for (INIT_MULTIVAR(NODE, MEMBER, hindex_first(HINDEX), \ + struct hindex_node); \ + CONDITION_MULTIVAR(NODE, MEMBER, ITER_VAR(NODE) != NULL); \ + UPDATE_MULTIVAR(NODE, hindex_next(HINDEX, ITER_VAR(NODE)))) /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash index but its members remain accessible and intact). */ -#define HINDEX_FOR_EACH_SAFE(NODE, NEXT, MEMBER, HINDEX) \ - for (INIT_CONTAINER(NODE, hindex_first(HINDEX), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER) \ - ? INIT_CONTAINER(NEXT, hindex_next(HINDEX, &(NODE)->MEMBER), MEMBER), 1 \ - : 0); \ - (NODE) = (NEXT)) +#define HINDEX_FOR_EACH_SAFE_LONG(NODE, NEXT, MEMBER, HINDEX) \ + for (INIT_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, hindex_first(HINDEX), \ + struct hindex_node); \ + CONDITION_MULTIVAR_SAFE_LONG(NODE, NEXT, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_VAR(NEXT) = hindex_next(HINDEX, ITER_VAR(NODE)), \ + ITER_VAR(NEXT) != NULL); \ + UPDATE_MULTIVAR_SAFE_LONG(NODE, NEXT)) + +/* Short version of HINDEX_FOR_EACH_SAFE. */ +#define HINDEX_FOR_EACH_SAFE_SHORT(NODE, MEMBER, HINDEX) \ + for (INIT_MULTIVAR_SAFE_SHORT(NODE, MEMBER, hindex_first(HINDEX), \ + struct hindex_node); \ + CONDITION_MULTIVAR_SAFE_SHORT(NODE, MEMBER, \ + ITER_VAR(NODE) != NULL, \ + ITER_NEXT_VAR(NODE) = hindex_next(HINDEX, ITER_VAR(NODE))); \ + UPDATE_MULTIVAR_SAFE_SHORT(NODE)) + +#define HINDEX_FOR_EACH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(HINDEX_FOR_EACH_SAFE_LONG, \ + HINDEX_FOR_EACH_SAFE_SHORT, \ + 4, __VA_ARGS__) struct hindex_node *hindex_first(const struct hindex *); struct hindex_node *hindex_next(const struct hindex *, diff --git a/lib/hmapx.c b/lib/hmapx.c index eadfe640ac..68192fc2c5 100644 --- a/lib/hmapx.c +++ b/lib/hmapx.c @@ -123,9 +123,9 @@ hmapx_add_assert(struct hmapx *map, void *data) void hmapx_clear(struct hmapx *map) { - struct hmapx_node *node, *next; + struct hmapx_node *node; - HMAPX_FOR_EACH_SAFE (node, next, map) { + HMAPX_FOR_EACH_SAFE (node, map) { hmapx_delete(map, node); } } diff --git a/lib/hmapx.h b/lib/hmapx.h index 06a6bbe67d..0b03411491 100644 --- a/lib/hmapx.h +++ b/lib/hmapx.h @@ -67,10 +67,20 @@ bool hmapx_equals(const struct hmapx *, const struct hmapx *); /* Safe when NODE may be freed (not needed when NODE may be removed from the * hash map but its members remain accessible and intact). */ -#define HMAPX_FOR_EACH_SAFE(NODE, NEXT, HMAPX) \ - HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, hmap_node, &(HMAPX)->map, \ +#define HMAPX_FOR_EACH_SAFE_SHORT(NODE, HMAPX) \ + HMAP_FOR_EACH_SAFE_SHORT_INIT (NODE, hmap_node, &(HMAPX)->map, \ + BUILD_ASSERT_TYPE(NODE, struct hmapx_node *), \ + BUILD_ASSERT_TYPE(HMAPX, struct hmapx *)) + +#define HMAPX_FOR_EACH_SAFE_LONG(NODE, NEXT, HMAPX) \ + HMAP_FOR_EACH_SAFE_LONG_INIT (NODE, NEXT, hmap_node, &(HMAPX)->map, \ BUILD_ASSERT_TYPE(NODE, struct hmapx_node *), \ BUILD_ASSERT_TYPE(NEXT, struct hmapx_node *), \ BUILD_ASSERT_TYPE(HMAPX, struct hmapx *)) +#define HMAPX_FOR_EACH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(HMAPX_FOR_EACH_SAFE_LONG, \ + HMAPX_FOR_EACH_SAFE_SHORT, \ + 3, __VA_ARGS__) + #endif /* hmapx.h */ diff --git a/lib/id-fpool.c b/lib/id-fpool.c index 15cef5d003..7108c104a4 100644 --- a/lib/id-fpool.c +++ b/lib/id-fpool.c @@ -166,11 +166,10 @@ void id_fpool_destroy(struct id_fpool *pool) { struct id_slab *slab; - struct id_slab *next; size_t i; id_fpool_lock(&pool->pool_lock); - LIST_FOR_EACH_SAFE (slab, next, node, &pool->free_slabs) { + LIST_FOR_EACH_SAFE (slab, node, &pool->free_slabs) { free(slab); } ovs_list_poison(&pool->free_slabs); diff --git a/lib/ipf.c b/lib/ipf.c index 507db2aea2..d452663743 100644 --- a/lib/ipf.c +++ b/lib/ipf.c @@ -1058,9 +1058,9 @@ ipf_send_completed_frags(struct ipf *ipf, struct dp_packet_batch *pb, } ovs_mutex_lock(&ipf->ipf_lock); - struct ipf_list *ipf_list, *next; + struct ipf_list *ipf_list; - LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_complete_list) { + LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_complete_list) { if (ipf_send_frags_in_list(ipf, ipf_list, pb, IPF_FRAG_COMPLETED_LIST, v6, now)) { ipf_completed_list_clean(&ipf->frag_lists, ipf_list); @@ -1090,10 +1090,10 @@ ipf_send_expired_frags(struct ipf *ipf, struct dp_packet_batch *pb, } ovs_mutex_lock(&ipf->ipf_lock); - struct ipf_list *ipf_list, *next; + struct ipf_list *ipf_list; size_t lists_removed = 0; - LIST_FOR_EACH_SAFE (ipf_list, next, list_node, &ipf->frag_exp_list) { + LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_exp_list) { if (now <= ipf_list->expiration || lists_removed >= IPF_FRAG_LIST_MAX_EXPIRED) { break; @@ -1121,9 +1121,9 @@ ipf_execute_reass_pkts(struct ipf *ipf, struct dp_packet_batch *pb) } ovs_mutex_lock(&ipf->ipf_lock); - struct reassembled_pkt *rp, *next; + struct reassembled_pkt *rp; - LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &ipf->reassembled_pkt_list) { + LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) { if (!rp->list->reass_execute_ctx && ipf_dp_packet_batch_add(pb, rp->pkt, false)) { rp->list->reass_execute_ctx = rp->pkt; @@ -1144,9 +1144,9 @@ ipf_post_execute_reass_pkts(struct ipf *ipf, } ovs_mutex_lock(&ipf->ipf_lock); - struct reassembled_pkt *rp, *next; + struct reassembled_pkt *rp; - LIST_FOR_EACH_SAFE (rp, next, rp_list_node, &ipf->reassembled_pkt_list) { + LIST_FOR_EACH_SAFE (rp, rp_list_node, &ipf->reassembled_pkt_list) { const size_t pb_cnt = dp_packet_batch_size(pb); int pb_idx; struct dp_packet *pkt; @@ -1271,15 +1271,15 @@ ipf_clean_thread_main(void *f) ovs_mutex_lock(&ipf->ipf_lock); - struct ipf_list *ipf_list, *next; - LIST_FOR_EACH_SAFE (ipf_list, next, list_node, + struct ipf_list *ipf_list; + LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_exp_list) { if (ipf_purge_list_check(ipf, ipf_list, now)) { ipf_expiry_list_clean(&ipf->frag_lists, ipf_list); } } - LIST_FOR_EACH_SAFE (ipf_list, next, list_node, + LIST_FOR_EACH_SAFE (ipf_list, list_node, &ipf->frag_complete_list) { if (ipf_purge_list_check(ipf, ipf_list, now)) { ipf_completed_list_clean(&ipf->frag_lists, ipf_list); diff --git a/lib/json.c b/lib/json.c index 720c73d940..042aab83b3 100644 --- a/lib/json.c +++ b/lib/json.c @@ -397,9 +397,9 @@ json_destroy__(struct json *json) static void json_destroy_object(struct shash *object) { - struct shash_node *node, *next; + struct shash_node *node; - SHASH_FOR_EACH_SAFE (node, next, object) { + SHASH_FOR_EACH_SAFE (node, object) { struct json *value = node->data; json_destroy(value); diff --git a/lib/lacp.c b/lib/lacp.c index 89d711225f..3252f17ebf 100644 --- a/lib/lacp.c +++ b/lib/lacp.c @@ -280,10 +280,10 @@ void lacp_unref(struct lacp *lacp) OVS_EXCLUDED(mutex) { if (lacp && ovs_refcount_unref_relaxed(&lacp->ref_cnt) == 1) { - struct member *member, *next; + struct member *member; lacp_lock(); - HMAP_FOR_EACH_SAFE (member, next, node, &lacp->members) { + HMAP_FOR_EACH_SAFE (member, node, &lacp->members) { member_destroy(member); } diff --git a/lib/lldp/lldpd-structs.c b/lib/lldp/lldpd-structs.c index 499b441746..a8c7fad098 100644 --- a/lib/lldp/lldpd-structs.c +++ b/lib/lldp/lldpd-structs.c @@ -64,11 +64,11 @@ lldpd_remote_cleanup(struct lldpd_hardware *hw, struct lldpd_port *), bool all) { - struct lldpd_port *port, *port_next; + struct lldpd_port *port; time_t now = time_now(); VLOG_DBG("cleanup remote port on %s", hw->h_ifname); - LIST_FOR_EACH_SAFE (port, port_next, p_entries, &hw->h_rports) { + LIST_FOR_EACH_SAFE (port, p_entries, &hw->h_rports) { bool del = all; if (!all && expire && (now >= port->p_lastupdate + port->p_chassis->c_ttl)) { @@ -99,11 +99,10 @@ static void lldpd_aa_maps_cleanup(struct lldpd_port *port) { struct lldpd_aa_isid_vlan_maps_tlv *isid_vlan_map = NULL; - struct lldpd_aa_isid_vlan_maps_tlv *isid_vlan_map_next = NULL; if (!ovs_list_is_empty(&port->p_isid_vlan_maps)) { - LIST_FOR_EACH_SAFE (isid_vlan_map, isid_vlan_map_next, m_entries, + LIST_FOR_EACH_SAFE (isid_vlan_map, m_entries, &port->p_isid_vlan_maps) { ovs_list_remove(&isid_vlan_map->m_entries); diff --git a/lib/lldp/lldpd.c b/lib/lldp/lldpd.c index a024dc5e58..4bff7b017f 100644 --- a/lib/lldp/lldpd.c +++ b/lib/lldp/lldpd.c @@ -134,24 +134,20 @@ lldpd_hardware_cleanup(struct lldpd *cfg, struct lldpd_hardware *hardware) void lldpd_cleanup(struct lldpd *cfg) { - struct lldpd_hardware *hw, *hw_next; - struct lldpd_chassis *chassis, *chassis_next; + struct lldpd_hardware *hw; + struct lldpd_chassis *chassis; VLOG_DBG("cleanup all ports"); - LIST_FOR_EACH_SAFE (hw, hw_next, h_entries, &cfg->g_hardware) { - if (!hw->h_flags) { - ovs_list_remove(&hw->h_entries); - lldpd_remote_cleanup(hw, NULL, true); - lldpd_hardware_cleanup(cfg, hw); - } else { - lldpd_remote_cleanup(hw, NULL, false); - } + LIST_FOR_EACH_SAFE (hw, h_entries, &cfg->g_hardware) { + ovs_list_remove(&hw->h_entries); + lldpd_remote_cleanup(hw, NULL, true); + lldpd_hardware_cleanup(cfg, hw); } VLOG_DBG("cleanup all chassis"); - LIST_FOR_EACH_SAFE (chassis, chassis_next, list, &cfg->g_chassis) { + LIST_FOR_EACH_SAFE (chassis, list, &cfg->g_chassis) { if (chassis->c_refcount == 0) { ovs_list_remove(&chassis->list); lldpd_chassis_cleanup(chassis, 1); diff --git a/lib/mac-learning.c b/lib/mac-learning.c index 3fcd7d9b77..a60794fb26 100644 --- a/lib/mac-learning.c +++ b/lib/mac-learning.c @@ -244,10 +244,10 @@ void mac_learning_unref(struct mac_learning *ml) { if (ml && ovs_refcount_unref(&ml->ref_cnt) == 1) { - struct mac_entry *e, *next; + struct mac_entry *e; ovs_rwlock_wrlock(&ml->rwlock); - HMAP_FOR_EACH_SAFE (e, next, hmap_node, &ml->table) { + HMAP_FOR_EACH_SAFE (e, hmap_node, &ml->table) { mac_learning_expire(ml, e); } hmap_destroy(&ml->table); diff --git a/lib/mcast-snooping.c b/lib/mcast-snooping.c index 6730301b67..029ca28558 100644 --- a/lib/mcast-snooping.c +++ b/lib/mcast-snooping.c @@ -356,11 +356,11 @@ mcast_snooping_prune_expired(struct mcast_snooping *ms, OVS_REQ_WRLOCK(ms->rwlock) { int expired; - struct mcast_group_bundle *b, *next_b; + struct mcast_group_bundle *b; time_t timenow = time_now(); expired = 0; - LIST_FOR_EACH_SAFE (b, next_b, bundle_node, &grp->bundle_lru) { + LIST_FOR_EACH_SAFE (b, bundle_node, &grp->bundle_lru) { /* This list is sorted on expiration time. */ if (b->expires > timenow) { break; @@ -946,15 +946,15 @@ mcast_snooping_wait(struct mcast_snooping *ms) void mcast_snooping_flush_bundle(struct mcast_snooping *ms, void *port) { - struct mcast_group *g, *next_g; - struct mcast_mrouter_bundle *m, *next_m; + struct mcast_group *g; + struct mcast_mrouter_bundle *m; if (!mcast_snooping_enabled(ms)) { return; } ovs_rwlock_wrlock(&ms->rwlock); - LIST_FOR_EACH_SAFE (g, next_g, group_node, &ms->group_lru) { + LIST_FOR_EACH_SAFE (g, group_node, &ms->group_lru) { if (mcast_group_delete_bundle(ms, g, port)) { ms->need_revalidate = true; @@ -964,7 +964,7 @@ mcast_snooping_flush_bundle(struct mcast_snooping *ms, void *port) } } - LIST_FOR_EACH_SAFE (m, next_m, mrouter_node, &ms->mrouter_lru) { + LIST_FOR_EACH_SAFE (m, mrouter_node, &ms->mrouter_lru) { if (m->port == port) { mcast_snooping_flush_mrouter(m); ms->need_revalidate = true; diff --git a/lib/meta-flow.c b/lib/meta-flow.c index e03cd8d0c5..c576ae6202 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -3442,7 +3442,9 @@ mf_get_vl_mff(const struct mf_field *mff, const struct vl_mff_map *vl_mff_map) { if (mff && mff->variable_len && vl_mff_map) { - return &mf_get_vl_mff__(mff->id, vl_mff_map)->mf; + struct vl_mf_field *vl_mff = mf_get_vl_mff__(mff->id, vl_mff_map); + + return vl_mff ? &vl_mff->mf : NULL; } return NULL; diff --git a/lib/namemap.c b/lib/namemap.c index 785cda4c27..dd317ea52e 100644 --- a/lib/namemap.c +++ b/lib/namemap.c @@ -90,9 +90,9 @@ void namemap_destroy(struct namemap *map) { if (map) { - struct namemap_node *node, *next; + struct namemap_node *node; - HMAP_FOR_EACH_SAFE (node, next, name_node, &map->by_name) { + HMAP_FOR_EACH_SAFE (node, name_node, &map->by_name) { hmap_remove(&map->by_name, &node->name_node); hmap_remove(&map->by_number, &node->number_node); free(node->name); diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c index 482400d8d1..ca3f2431ea 100644 --- a/lib/netdev-afxdp.c +++ b/lib/netdev-afxdp.c @@ -235,11 +235,11 @@ netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool) static void netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED) { - struct unused_pool *pool, *next; + struct unused_pool *pool; unsigned int count; ovs_mutex_lock(&unused_pools_mutex); - LIST_FOR_EACH_SAFE (pool, next, list_node, &unused_pools) { + LIST_FOR_EACH_SAFE (pool, list_node, &unused_pools) { count = umem_pool_count(&pool->umem_info->mpool); ovs_assert(count + pool->lost_in_rings <= NUM_FRAMES); diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index b6b29c75e3..e28e397d7e 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -622,9 +622,9 @@ dpdk_mp_full(const struct rte_mempool *mp) OVS_REQUIRES(dpdk_mp_mutex) static void dpdk_mp_sweep(void) OVS_REQUIRES(dpdk_mp_mutex) { - struct dpdk_mp *dmp, *next; + struct dpdk_mp *dmp; - LIST_FOR_EACH_SAFE (dmp, next, list_node, &dpdk_mp_list) { + LIST_FOR_EACH_SAFE (dmp, list_node, &dpdk_mp_list) { if (!dmp->refcount && dpdk_mp_full(dmp->mp)) { VLOG_DBG("Freeing mempool \"%s\"", dmp->mp->name); ovs_list_remove(&dmp->list_node); @@ -2561,90 +2561,6 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_dpdk *dev, } } -static void -__netdev_dpdk_vhost_send(struct netdev *netdev, int qid, - struct dp_packet **pkts, int cnt) -{ - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts; - struct netdev_dpdk_sw_stats sw_stats_add; - unsigned int n_packets_to_free = cnt; - unsigned int total_packets = cnt; - int i, retries = 0; - int max_retries = VHOST_ENQ_RETRY_MIN; - int vid = netdev_dpdk_get_vid(dev); - - qid = dev->tx_q[qid % netdev->n_txq].map; - - if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured || qid < 0 - || !(dev->flags & NETDEV_UP))) { - rte_spinlock_lock(&dev->stats_lock); - dev->stats.tx_dropped+= cnt; - rte_spinlock_unlock(&dev->stats_lock); - goto out; - } - - if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) { - COVERAGE_INC(vhost_tx_contention); - rte_spinlock_lock(&dev->tx_q[qid].tx_lock); - } - - sw_stats_add.tx_invalid_hwol_drops = cnt; - if (userspace_tso_enabled()) { - cnt = netdev_dpdk_prep_hwol_batch(dev, cur_pkts, cnt); - } - - sw_stats_add.tx_invalid_hwol_drops -= cnt; - sw_stats_add.tx_mtu_exceeded_drops = cnt; - cnt = netdev_dpdk_filter_packet_len(dev, cur_pkts, cnt); - sw_stats_add.tx_mtu_exceeded_drops -= cnt; - - /* Check has QoS has been configured for the netdev */ - sw_stats_add.tx_qos_drops = cnt; - cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt, true); - sw_stats_add.tx_qos_drops -= cnt; - - n_packets_to_free = cnt; - - do { - int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ; - unsigned int tx_pkts; - - tx_pkts = rte_vhost_enqueue_burst(vid, vhost_qid, cur_pkts, cnt); - if (OVS_LIKELY(tx_pkts)) { - /* Packets have been sent.*/ - cnt -= tx_pkts; - /* Prepare for possible retry.*/ - cur_pkts = &cur_pkts[tx_pkts]; - if (OVS_UNLIKELY(cnt && !retries)) { - /* - * Read max retries as there are packets not sent - * and no retries have already occurred. - */ - atomic_read_relaxed(&dev->vhost_tx_retries_max, &max_retries); - } - } else { - /* No packets sent - do not retry.*/ - break; - } - } while (cnt && (retries++ < max_retries)); - - rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); - - sw_stats_add.tx_failure_drops = cnt; - sw_stats_add.tx_retries = MIN(retries, max_retries); - - rte_spinlock_lock(&dev->stats_lock); - netdev_dpdk_vhost_update_tx_counters(dev, pkts, total_packets, - &sw_stats_add); - rte_spinlock_unlock(&dev->stats_lock); - -out: - for (i = 0; i < n_packets_to_free; i++) { - dp_packet_delete(pkts[i]); - } -} - static void netdev_dpdk_extbuf_free(void *addr OVS_UNUSED, void *opaque) { @@ -2749,76 +2665,69 @@ dpdk_copy_dp_packet_to_mbuf(struct rte_mempool *mp, struct dp_packet *pkt_orig) return pkt_dest; } -/* Tx function. Transmit packets indefinitely */ -static void -dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch) - OVS_NO_THREAD_SAFETY_ANALYSIS -{ - const size_t batch_cnt = dp_packet_batch_size(batch); -#if !defined(__CHECKER__) && !defined(_WIN32) - const size_t PKT_ARRAY_SIZE = batch_cnt; -#else - /* Sparse or MSVC doesn't like variable length array. */ - enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST }; -#endif +/* Replace packets in a 'batch' with their corresponding copies using + * DPDK memory. + * + * Returns the number of good packets in the batch. */ +static size_t +dpdk_copy_batch_to_mbuf(struct netdev *netdev, struct dp_packet_batch *batch) +{ struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - struct dp_packet *pkts[PKT_ARRAY_SIZE]; - struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats; - uint32_t cnt = batch_cnt; - uint32_t dropped = 0; - uint32_t tx_failure = 0; - uint32_t mtu_drops = 0; - uint32_t qos_drops = 0; - - if (dev->type != DPDK_DEV_VHOST) { - /* Check if QoS has been configured for this netdev. */ - cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets, - batch_cnt, false); - qos_drops = batch_cnt - cnt; - } - - uint32_t txcnt = 0; - - for (uint32_t i = 0; i < cnt; i++) { - struct dp_packet *packet = batch->packets[i]; - uint32_t size = dp_packet_size(packet); - - if (size > dev->max_packet_len - && !(packet->mbuf.ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { - VLOG_WARN_RL(&rl, "Too big size %u max_packet_len %d", size, - dev->max_packet_len); - mtu_drops++; - continue; - } + size_t i, size = dp_packet_batch_size(batch); + struct dp_packet *packet; - pkts[txcnt] = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet); - if (OVS_UNLIKELY(!pkts[txcnt])) { - dropped = cnt - i; - break; - } + DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, batch) { + if (OVS_UNLIKELY(packet->source == DPBUF_DPDK)) { + dp_packet_batch_refill(batch, packet, i); + } else { + struct dp_packet *pktcopy; - txcnt++; - } + pktcopy = dpdk_copy_dp_packet_to_mbuf(dev->dpdk_mp->mp, packet); + if (pktcopy) { + dp_packet_batch_refill(batch, pktcopy, i); + } - if (OVS_LIKELY(txcnt)) { - if (dev->type == DPDK_DEV_VHOST) { - __netdev_dpdk_vhost_send(netdev, qid, pkts, txcnt); - } else { - tx_failure += netdev_dpdk_eth_tx_burst(dev, qid, - (struct rte_mbuf **)pkts, - txcnt); + dp_packet_delete(packet); } } - dropped += qos_drops + mtu_drops + tx_failure; - if (OVS_UNLIKELY(dropped)) { - rte_spinlock_lock(&dev->stats_lock); - dev->stats.tx_dropped += dropped; - sw_stats->tx_failure_drops += tx_failure; - sw_stats->tx_mtu_exceeded_drops += mtu_drops; - sw_stats->tx_qos_drops += qos_drops; - rte_spinlock_unlock(&dev->stats_lock); + return dp_packet_batch_size(batch); +} + +static size_t +netdev_dpdk_common_send(struct netdev *netdev, struct dp_packet_batch *batch, + struct netdev_dpdk_sw_stats *stats) +{ + struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + size_t cnt, pkt_cnt = dp_packet_batch_size(batch); + + memset(stats, 0, sizeof *stats); + + /* Copy dp-packets to mbufs. */ + if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { + cnt = dpdk_copy_batch_to_mbuf(netdev, batch); + stats->tx_failure_drops += pkt_cnt - cnt; + pkt_cnt = cnt; } + + /* Drop oversized packets. */ + cnt = netdev_dpdk_filter_packet_len(dev, pkts, pkt_cnt); + stats->tx_mtu_exceeded_drops += pkt_cnt - cnt; + pkt_cnt = cnt; + + /* Prepare each mbuf for hardware offloading. */ + if (userspace_tso_enabled()) { + cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, pkt_cnt); + stats->tx_invalid_hwol_drops += pkt_cnt - cnt; + pkt_cnt = cnt; + } + + /* Apply Quality of Service policy. */ + cnt = netdev_dpdk_qos_run(dev, pkts, pkt_cnt, true); + stats->tx_qos_drops += pkt_cnt - cnt; + + return cnt; } static int @@ -2826,25 +2735,92 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, bool concurrent_txq OVS_UNUSED) { + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int max_retries = VHOST_ENQ_RETRY_MIN; + int cnt, batch_cnt, vhost_batch_cnt; + int vid = netdev_dpdk_get_vid(dev); + struct netdev_dpdk_sw_stats stats; + struct rte_mbuf **pkts; + int retries; - if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { - dpdk_do_tx_copy(netdev, qid, batch); + batch_cnt = cnt = dp_packet_batch_size(batch); + qid = dev->tx_q[qid % netdev->n_txq].map; + if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured || qid < 0 + || !(dev->flags & NETDEV_UP))) { + rte_spinlock_lock(&dev->stats_lock); + dev->stats.tx_dropped += cnt; + rte_spinlock_unlock(&dev->stats_lock); dp_packet_delete_batch(batch, true); - } else { - __netdev_dpdk_vhost_send(netdev, qid, batch->packets, - dp_packet_batch_size(batch)); + return 0; + } + + if (OVS_UNLIKELY(!rte_spinlock_trylock(&dev->tx_q[qid].tx_lock))) { + COVERAGE_INC(vhost_tx_contention); + rte_spinlock_lock(&dev->tx_q[qid].tx_lock); + } + + cnt = netdev_dpdk_common_send(netdev, batch, &stats); + + pkts = (struct rte_mbuf **) batch->packets; + vhost_batch_cnt = cnt; + retries = 0; + do { + int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ; + int tx_pkts; + + tx_pkts = rte_vhost_enqueue_burst(vid, vhost_qid, pkts, cnt); + if (OVS_LIKELY(tx_pkts)) { + /* Packets have been sent.*/ + cnt -= tx_pkts; + /* Prepare for possible retry.*/ + pkts = &pkts[tx_pkts]; + if (OVS_UNLIKELY(cnt && !retries)) { + /* + * Read max retries as there are packets not sent + * and no retries have already occurred. + */ + atomic_read_relaxed(&dev->vhost_tx_retries_max, &max_retries); + } + } else { + /* No packets sent - do not retry.*/ + break; + } + } while (cnt && (retries++ < max_retries)); + + rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); + + stats.tx_failure_drops += cnt; + stats.tx_retries = MIN(retries, max_retries); + + rte_spinlock_lock(&dev->stats_lock); + netdev_dpdk_vhost_update_tx_counters(dev, batch->packets, batch_cnt, + &stats); + rte_spinlock_unlock(&dev->stats_lock); + + pkts = (struct rte_mbuf **) batch->packets; + for (int i = 0; i < vhost_batch_cnt; i++) { + rte_pktmbuf_free(pkts[i]); } + return 0; } -static inline void -netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, - struct dp_packet_batch *batch, - bool concurrent_txq) +static int +netdev_dpdk_eth_send(struct netdev *netdev, int qid, + struct dp_packet_batch *batch, bool concurrent_txq) { + struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int batch_cnt = dp_packet_batch_size(batch); + struct netdev_dpdk_sw_stats stats; + int cnt, dropped; + if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) { + rte_spinlock_lock(&dev->stats_lock); + dev->stats.tx_dropped += dp_packet_batch_size(batch); + rte_spinlock_unlock(&dev->stats_lock); dp_packet_delete_batch(batch, true); - return; + return 0; } if (OVS_UNLIKELY(concurrent_txq)) { @@ -2852,56 +2828,27 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid, rte_spinlock_lock(&dev->tx_q[qid].tx_lock); } - if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) { - struct netdev *netdev = &dev->up; + cnt = netdev_dpdk_common_send(netdev, batch, &stats); - dpdk_do_tx_copy(netdev, qid, batch); - dp_packet_delete_batch(batch, true); - } else { + dropped = batch_cnt - cnt; + + dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt); + if (OVS_UNLIKELY(dropped)) { struct netdev_dpdk_sw_stats *sw_stats = dev->sw_stats; - int dropped; - int tx_failure, mtu_drops, qos_drops, hwol_drops; - int batch_cnt = dp_packet_batch_size(batch); - struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets; - hwol_drops = batch_cnt; - if (userspace_tso_enabled()) { - batch_cnt = netdev_dpdk_prep_hwol_batch(dev, pkts, batch_cnt); - } - hwol_drops -= batch_cnt; - mtu_drops = batch_cnt; - batch_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt); - mtu_drops -= batch_cnt; - qos_drops = batch_cnt; - batch_cnt = netdev_dpdk_qos_run(dev, pkts, batch_cnt, true); - qos_drops -= batch_cnt; - - tx_failure = netdev_dpdk_eth_tx_burst(dev, qid, pkts, batch_cnt); - - dropped = tx_failure + mtu_drops + qos_drops + hwol_drops; - if (OVS_UNLIKELY(dropped)) { - rte_spinlock_lock(&dev->stats_lock); - dev->stats.tx_dropped += dropped; - sw_stats->tx_failure_drops += tx_failure; - sw_stats->tx_mtu_exceeded_drops += mtu_drops; - sw_stats->tx_qos_drops += qos_drops; - sw_stats->tx_invalid_hwol_drops += hwol_drops; - rte_spinlock_unlock(&dev->stats_lock); - } + rte_spinlock_lock(&dev->stats_lock); + dev->stats.tx_dropped += dropped; + sw_stats->tx_failure_drops += stats.tx_failure_drops; + sw_stats->tx_mtu_exceeded_drops += stats.tx_mtu_exceeded_drops; + sw_stats->tx_qos_drops += stats.tx_qos_drops; + sw_stats->tx_invalid_hwol_drops += stats.tx_invalid_hwol_drops; + rte_spinlock_unlock(&dev->stats_lock); } if (OVS_UNLIKELY(concurrent_txq)) { rte_spinlock_unlock(&dev->tx_q[qid].tx_lock); } -} - -static int -netdev_dpdk_eth_send(struct netdev *netdev, int qid, - struct dp_packet_batch *batch, bool concurrent_txq) -{ - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - netdev_dpdk_send__(dev, qid, batch, concurrent_txq); return 0; } @@ -4686,11 +4633,11 @@ trtcm_policer_qos_construct(const struct smap *details, static void trtcm_policer_qos_destruct(struct qos_conf *conf) { - struct trtcm_policer_queue *queue, *next_queue; + struct trtcm_policer_queue *queue; struct trtcm_policer *policer = CONTAINER_OF(conf, struct trtcm_policer, qos_conf); - HMAP_FOR_EACH_SAFE (queue, next_queue, hmap_node, &policer->queues) { + HMAP_FOR_EACH_SAFE (queue, hmap_node, &policer->queues) { hmap_remove(&policer->queues, &queue->hmap_node); free(queue); } diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 620a451dec..2766b3f2bf 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -5331,11 +5331,11 @@ static void hfsc_tc_destroy(struct tc *tc) { struct hfsc *hfsc; - struct hfsc_class *hc, *next; + struct hfsc_class *hc; hfsc = CONTAINER_OF(tc, struct hfsc, tc); - HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, &hfsc->tc.queues) { + HMAP_FOR_EACH_SAFE (hc, tc_queue.hmap_node, &hfsc->tc.queues) { hmap_remove(&hfsc->tc.queues, &hc->tc_queue.hmap_node); free(hc); } @@ -6295,7 +6295,14 @@ get_stats_via_netlink(const struct netdev *netdev_, struct netdev_stats *stats) if (ofpbuf_try_pull(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg))) { const struct nlattr *a = nl_attr_find(reply, 0, IFLA_STATS64); if (a && nl_attr_get_size(a) >= sizeof(struct rtnl_link_stats64)) { - netdev_stats_from_rtnl_link_stats64(stats, nl_attr_get(a)); + const struct rtnl_link_stats64 *lstats = nl_attr_get(a); + struct rtnl_link_stats64 aligned_lstats; + + if (!IS_PTR_ALIGNED(lstats)) { + memcpy(&aligned_lstats, lstats, sizeof aligned_lstats); + lstats = &aligned_lstats; + } + netdev_stats_from_rtnl_link_stats64(stats, lstats); error = 0; } else { a = nl_attr_find(reply, 0, IFLA_STATS); diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 94dc6a9b74..12d299603a 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -363,6 +363,8 @@ dump_flow_pattern(struct ds *s, ds_put_cstr(s, "eth "); if (eth_spec) { + uint32_t has_vlan_mask; + if (!eth_mask) { eth_mask = &rte_flow_item_eth_mask; } @@ -377,6 +379,9 @@ dump_flow_pattern(struct ds *s, DUMP_PATTERN_ITEM(eth_mask->type, false, "type", "0x%04"PRIx16, ntohs(eth_spec->type), ntohs(eth_mask->type), 0); + has_vlan_mask = eth_mask->has_vlan ? UINT32_MAX : 0; + DUMP_PATTERN_ITEM(has_vlan_mask, false, "has_vlan", "%d", + eth_spec->has_vlan, eth_mask->has_vlan, 0); } ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { @@ -1369,6 +1374,7 @@ parse_flow_match(struct netdev *netdev, struct flow_patterns *patterns, struct match *match) { + struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL; struct flow *consumed_masks; uint8_t proto = 0; @@ -1414,6 +1420,11 @@ parse_flow_match(struct netdev *netdev, memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src); consumed_masks->dl_type = 0; + spec->has_vlan = 0; + mask->has_vlan = 1; + eth_spec = spec; + eth_mask = mask; + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask, NULL); } @@ -1427,8 +1438,14 @@ parse_flow_match(struct netdev *netdev, spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); - /* Match any protocols. */ - mask->inner_type = 0; + if (eth_spec && eth_mask) { + eth_spec->has_vlan = 1; + eth_mask->has_vlan = 1; + spec->inner_type = eth_spec->type; + mask->inner_type = eth_mask->type; + eth_spec->type = match->flow.vlans[0].tpid; + eth_mask->type = match->wc.masks.vlans[0].tpid; + } add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask, NULL); } diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 9845e8d3fe..262faf3c62 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -417,11 +417,11 @@ delete_chains_from_netdev(struct netdev *netdev, struct tcf_id *id) static int netdev_tc_flow_flush(struct netdev *netdev) { - struct ufid_tc_data *data, *next; + struct ufid_tc_data *data; int err; ovs_mutex_lock(&ufid_lock); - HMAP_FOR_EACH_SAFE (data, next, tc_to_ufid_node, &tc_to_ufid) { + HMAP_FOR_EACH_SAFE (data, tc_to_ufid_node, &tc_to_ufid) { if (data->netdev != netdev) { continue; } @@ -481,10 +481,10 @@ netdev_tc_flow_dump_destroy(struct netdev_flow_dump *dump) static void parse_flower_rewrite_to_netlink_action(struct ofpbuf *buf, - struct tc_flower *flower) + struct tc_action *action) { - char *mask = (char *) &flower->rewrite.mask; - char *data = (char *) &flower->rewrite.key; + char *mask = (char *) &action->rewrite.mask; + char *data = (char *) &action->rewrite.key; for (int type = 0; type < ARRAY_SIZE(set_flower_map); type++) { char *put = NULL; @@ -585,8 +585,10 @@ parse_tc_flower_to_stats(struct tc_flower *flower, } memset(stats, 0, sizeof *stats); - stats->n_packets = get_32aligned_u64(&flower->stats.n_packets); - stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes); + stats->n_packets = get_32aligned_u64(&flower->stats_sw.n_packets); + stats->n_packets += get_32aligned_u64(&flower->stats_hw.n_packets); + stats->n_bytes = get_32aligned_u64(&flower->stats_sw.n_bytes); + stats->n_bytes += get_32aligned_u64(&flower->stats_hw.n_bytes); stats->used = flower->lastused; } @@ -877,7 +879,7 @@ parse_tc_flower_to_match(struct tc_flower *flower, } break; case TC_ACT_PEDIT: { - parse_flower_rewrite_to_netlink_action(buf, flower); + parse_flower_rewrite_to_netlink_action(buf, action); } break; case TC_ACT_ENCAP: { @@ -1222,8 +1224,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, uint64_t set_stub[1024 / 8]; struct ofpbuf set_buf = OFPBUF_STUB_INITIALIZER(set_stub); char *set_data, *set_mask; - char *key = (char *) &flower->rewrite.key; - char *mask = (char *) &flower->rewrite.mask; + char *key = (char *) &action->rewrite.key; + char *mask = (char *) &action->rewrite.mask; const struct nlattr *attr; int i, j, type; size_t size; @@ -1265,14 +1267,6 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, } } - if (!is_all_zeros(&flower->rewrite, sizeof flower->rewrite)) { - if (flower->rewrite.rewrite == false) { - flower->rewrite.rewrite = true; - action->type = TC_ACT_PEDIT; - flower->action_count++; - } - } - if (hasmask && !is_all_zeros(set_mask, size)) { VLOG_DBG_RL(&rl, "unsupported sub attribute of set action type %d", type); @@ -1281,6 +1275,8 @@ parse_put_flow_set_masked_action(struct tc_flower *flower, } ofpbuf_uninit(&set_buf); + action->type = TC_ACT_PEDIT; + flower->action_count++; return 0; } @@ -1541,6 +1537,12 @@ parse_match_ct_state_to_flower(struct tc_flower *flower, struct match *match) flower->key.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); flower->mask.ct_state &= ~(TCA_FLOWER_KEY_CT_FLAGS_NEW); } + + if (flower->key.ct_state && + !(flower->key.ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { + flower->key.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + flower->mask.ct_state |= TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + } } if (mask->ct_zone) { @@ -1638,7 +1640,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, if (mask->vlans[0].tpid && eth_type_vlan(key->vlans[0].tpid)) { flower.key.encap_eth_type[0] = flower.key.eth_type; - flower.mask.encap_eth_type[0] = flower.mask.eth_type; + flower.mask.encap_eth_type[0] = CONSTANT_HTONS(0xffff); flower.key.eth_type = key->vlans[0].tpid; flower.mask.eth_type = mask->vlans[0].tpid; } @@ -1841,7 +1843,25 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port); return ENODEV; } + + if (!netdev_flow_api_equals(netdev, outdev)) { + VLOG_DBG_RL(&rl, + "Flow API provider mismatch between ingress (%s) " + "and egress (%s) ports", + netdev_get_name(netdev), netdev_get_name(outdev)); + netdev_close(outdev); + return EOPNOTSUPP; + } + action->out.ifindex_out = netdev_get_ifindex(outdev); + if (action->out.ifindex_out < 0) { + VLOG_DBG_RL(&rl, + "Can't find ifindex for output port %s, error %d", + netdev_get_name(outdev), action->out.ifindex_out); + netdev_close(outdev); + return -action->out.ifindex_out; + } + action->out.ingress = is_internal_port(netdev_get_type(outdev)); action->type = TC_ACT_OUTPUT; flower.action_count++; @@ -2015,9 +2035,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, if (stats) { memset(stats, 0, sizeof *stats); if (!tc_get_flower(&id, &flower)) { - stats->n_packets = get_32aligned_u64(&flower.stats.n_packets); - stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes); - stats->used = flower.lastused; + parse_tc_flower_to_stats(&flower, stats); } } diff --git a/lib/odp-util.c b/lib/odp-util.c index 9a705cffa3..2d2a6893c6 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -3429,16 +3429,16 @@ format_eth(struct ds *ds, const char *name, const struct eth_addr key, static void format_be64(struct ds *ds, const char *name, ovs_be64 key, - const ovs_be64 *mask, bool verbose) + const ovs_32aligned_be64 *mask_, bool verbose) { - bool mask_empty = mask && !*mask; + ovs_be64 mask = mask_ ? get_32aligned_be64(mask_) : htonll(0); - if (verbose || !mask_empty) { - bool mask_full = !mask || *mask == OVS_BE64_MAX; + if (verbose || mask) { + bool mask_full = !mask_ || mask == OVS_BE64_MAX; ds_put_format(ds, "%s=0x%"PRIx64, name, ntohll(key)); if (!mask_full) { /* Partially masked. */ - ds_put_format(ds, "/%#"PRIx64, ntohll(*mask)); + ds_put_format(ds, "/%#"PRIx64, ntohll(mask)); } ds_put_char(ds, ','); } @@ -4630,6 +4630,11 @@ odp_flow_format(const struct nlattr *key, size_t key_len, ds_put_char(ds, ','); } ds_put_cstr(ds, "eth()"); + } else if (attr_type == OVS_KEY_ATTR_PACKET_TYPE && is_wildcard) { + /* See the above help text, however in the case where the + * packet type is not shown, we still need to display the + * eth() header if the packets type is wildcarded. */ + has_packet_type_key = false; } ofpbuf_clear(&ofp); } @@ -7161,11 +7166,6 @@ parse_l2_5_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], } } } - } else if (src_flow->nw_proto == IPPROTO_IGMP - && src_flow->dl_type == htons(ETH_TYPE_IP)) { - /* OVS userspace parses the IGMP type, code, and group, but its - * datapaths do not, so there is always missing information. */ - return ODP_FIT_TOO_LITTLE; } if (is_mask && expected_bit != OVS_KEY_ATTR_UNSPEC) { if ((flow->tp_src || flow->tp_dst) && flow->nw_proto != 0xff) { @@ -7188,7 +7188,8 @@ parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], uint64_t present_attrs, int out_of_range_attr, uint64_t expected_attrs, struct flow *flow, const struct nlattr *key, size_t key_len, - const struct flow *src_flow, char **errorp) + const struct flow *src_flow, char **errorp, + bool ignore_vlan_limit) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); bool is_mask = src_flow != flow; @@ -7196,9 +7197,11 @@ parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], const struct nlattr *encap; enum odp_key_fitness encap_fitness; enum odp_key_fitness fitness = ODP_FIT_ERROR; + int vlan_limit; int encaps = 0; - while (encaps < flow_vlan_limit && + vlan_limit = ignore_vlan_limit ? FLOW_MAX_VLAN_HEADERS : flow_vlan_limit; + while (encaps < vlan_limit && (is_mask ? (src_flow->vlans[encaps].tci & htons(VLAN_CFI)) != 0 : eth_type_vlan(flow->dl_type))) { @@ -7259,6 +7262,14 @@ parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], } expected_attrs = 0; + /* For OVS to be backward compatible with newer datapath + * implementations, we should ignore out of range attributes. */ + if (out_of_range_attr) { + VLOG_DBG("Flow key decode found unknown OVS_KEY_ATTR, %d", + out_of_range_attr); + out_of_range_attr = 0; + } + if (!parse_ethertype(attrs, present_attrs, &expected_attrs, flow, src_flow, errorp)) { return ODP_FIT_ERROR; @@ -7281,7 +7292,7 @@ parse_8021q_onward(const struct nlattr *attrs[OVS_KEY_ATTR_MAX + 1], static enum odp_key_fitness odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, struct flow *flow, const struct flow *src_flow, - char **errorp) + char **errorp, bool ignore_vlan_limit) { /* New "struct flow" fields that are visible to the datapath (including all * data fields) should be translated from equivalent datapath flow fields @@ -7308,6 +7319,14 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, } expected_attrs = 0; + /* For OVS to be backward compatible with newer datapath implementations, + * we should ignore out of range attributes. */ + if (out_of_range_attr) { + VLOG_DBG("Flow key decode found unknown OVS_KEY_ATTR, %d", + out_of_range_attr); + out_of_range_attr = 0; + } + /* Metadata. */ if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_RECIRC_ID)) { flow->recirc_id = nl_attr_get_u32(attrs[OVS_KEY_ATTR_RECIRC_ID]); @@ -7431,7 +7450,7 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, : eth_type_vlan(src_flow->dl_type)) { fitness = parse_8021q_onward(attrs, present_attrs, out_of_range_attr, expected_attrs, flow, key, key_len, - src_flow, errorp); + src_flow, errorp, ignore_vlan_limit); } else { if (is_mask) { /* A missing VLAN mask means exact match on vlan_tci 0 (== no @@ -7497,7 +7516,7 @@ enum odp_key_fitness odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, struct flow *flow, char **errorp) { - return odp_flow_key_to_flow__(key, key_len, flow, flow, errorp); + return odp_flow_key_to_flow__(key, key_len, flow, flow, errorp, false); } /* Converts the 'mask_key_len' bytes of OVS_KEY_ATTR_* attributes in 'mask_key' @@ -7509,14 +7528,16 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, * If 'errorp' is nonnull, this function uses it for detailed error reports: if * the return value is ODP_FIT_ERROR, it stores a malloc()'d error string in * '*errorp', otherwise NULL. */ -enum odp_key_fitness -odp_flow_key_to_mask(const struct nlattr *mask_key, size_t mask_key_len, - struct flow_wildcards *mask, const struct flow *src_flow, - char **errorp) +static enum odp_key_fitness +odp_flow_key_to_mask__(const struct nlattr *mask_key, size_t mask_key_len, + struct flow_wildcards *mask, + const struct flow *src_flow, + char **errorp, bool ignore_vlan_limit) { if (mask_key_len) { return odp_flow_key_to_flow__(mask_key, mask_key_len, - &mask->masks, src_flow, errorp); + &mask->masks, src_flow, errorp, + ignore_vlan_limit); } else { if (errorp) { *errorp = NULL; @@ -7530,6 +7551,15 @@ odp_flow_key_to_mask(const struct nlattr *mask_key, size_t mask_key_len, } } +enum odp_key_fitness +odp_flow_key_to_mask(const struct nlattr *mask_key, size_t mask_key_len, + struct flow_wildcards *mask, + const struct flow *src_flow, char **errorp) +{ + return odp_flow_key_to_mask__(mask_key, mask_key_len, mask, src_flow, + errorp, false); +} + /* Converts the netlink formated key/mask to match. * Fails if odp_flow_key_from_key/mask and odp_flow_key_key/mask * disagree on the acceptable form of flow */ @@ -7540,12 +7570,15 @@ parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len, { enum odp_key_fitness fitness; - fitness = odp_flow_key_to_flow(key, key_len, &match->flow, NULL); + fitness = odp_flow_key_to_flow__(key, key_len, &match->flow, &match->flow, + NULL, true); if (fitness) { - /* This should not happen: it indicates that - * odp_flow_key_from_flow() and odp_flow_key_to_flow() disagree on - * the acceptable form of a flow. Log the problem as an error, - * with enough details to enable debugging. */ + /* This will happen when the odp_flow_key_to_flow() function can't + * parse the netlink message to a match structure. It will return + * ODP_FIT_TOO_LITTLE if there is not enough information to parse the + * content successfully, ODP_FIT_TOO_MUCH if there is too much netlink + * data and we do not know how to safely ignore it, and ODP_FIT_ERROR + * in any other case. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (!VLOG_DROP_ERR(&rl)) { @@ -7553,20 +7586,18 @@ parse_key_and_mask_to_match(const struct nlattr *key, size_t key_len, ds_init(&s); odp_flow_format(key, key_len, NULL, 0, NULL, &s, true); - VLOG_ERR("internal error parsing flow key %s", ds_cstr(&s)); + VLOG_ERR("internal error parsing flow key %s (%s)", + ds_cstr(&s), odp_key_fitness_to_string(fitness)); ds_destroy(&s); } return EINVAL; } - fitness = odp_flow_key_to_mask(mask, mask_len, &match->wc, &match->flow, - NULL); + fitness = odp_flow_key_to_mask__(mask, mask_len, &match->wc, &match->flow, + NULL, true); if (fitness) { - /* This should not happen: it indicates that - * odp_flow_key_from_mask() and odp_flow_key_to_mask() - * disagree on the acceptable form of a mask. Log the problem - * as an error, with enough details to enable debugging. */ + /* This should not happen, see comment above. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (!VLOG_DROP_ERR(&rl)) { diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index 006837c2e1..c13f97b5c9 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -853,7 +853,9 @@ decode_NXAST_RAW_CONTROLLER2(const struct ext_action_header *eah, case NXAC2PT_REASON: { uint8_t u8; error = ofpprop_parse_u8(&payload, &u8); - oc->reason = u8; + if (!error) { + oc->reason = u8; + } break; } @@ -3200,16 +3202,21 @@ set_field_split_str(char *arg, char **key, char **value, char **delim) { char *value_end; + *key = NULL; *value = arg; - value_end = strstr(arg, "->"); - *key = value_end + strlen("->"); if (delim) { - *delim = value_end; + *delim = NULL; } + value_end = strstr(arg, "->"); if (!value_end) { return xasprintf("%s: missing `->'", arg); } + + *key = value_end + strlen("->"); + if (delim) { + *delim = value_end; + } if (strlen(value_end) <= strlen("->")) { return xasprintf("%s: missing field name following `->'", arg); } diff --git a/lib/ofp-msgs.c b/lib/ofp-msgs.c index fec54f75f8..93aa812978 100644 --- a/lib/ofp-msgs.c +++ b/lib/ofp-msgs.c @@ -1123,8 +1123,8 @@ ofpmp_partial_error(struct hmap *assembler, struct ofpmp_partial *p, void ofpmp_assembler_clear(struct hmap *assembler) { - struct ofpmp_partial *p, *next; - HMAP_FOR_EACH_SAFE (p, next, hmap_node, assembler) { + struct ofpmp_partial *p; + HMAP_FOR_EACH_SAFE (p, hmap_node, assembler) { ofpmp_partial_destroy(assembler, p); } } @@ -1290,8 +1290,8 @@ ofpmp_assembler_execute(struct hmap *assembler, struct ofpbuf *msg, * on either side by parts with 0-byte bodies. We remove the 0-byte * ones here to simplify processing later. */ - struct ofpbuf *b, *next; - LIST_FOR_EACH_SAFE (b, next, list_node, out) { + struct ofpbuf *b; + LIST_FOR_EACH_SAFE (b, list_node, out) { if (b->size <= min_len && !ovs_list_is_short(out)) { ovs_list_remove(&b->list_node); ofpbuf_delete(b); diff --git a/lib/ofp-packet.c b/lib/ofp-packet.c index 4579548ee1..9485ddfc93 100644 --- a/lib/ofp-packet.c +++ b/lib/ofp-packet.c @@ -133,7 +133,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose, case NXPINT_FULL_LEN: { uint32_t u32; error = ofpprop_parse_u32(&payload, &u32); - *total_len = u32; + if (!error) { + *total_len = u32; + } break; } @@ -152,7 +154,9 @@ decode_nx_packet_in2(const struct ofp_header *oh, bool loose, case NXPINT_REASON: { uint8_t reason; error = ofpprop_parse_u8(&payload, &reason); - pin->reason = reason; + if (!error) { + pin->reason = reason; + } break; } @@ -883,7 +887,9 @@ ofputil_decode_packet_in_private(const struct ofp_header *oh, bool loose, case NXCPT_ODP_PORT: { uint32_t value; error = ofpprop_parse_u32(&payload, &value); - pin->odp_port = u32_to_odp(value); + if (!error) { + pin->odp_port = u32_to_odp(value); + } break; } diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c index 271105bdea..879275a7a3 100644 --- a/lib/ofpbuf.c +++ b/lib/ofpbuf.c @@ -426,6 +426,10 @@ void ofpbuf_reserve(struct ofpbuf *b, size_t size) { ovs_assert(!b->size); + + if (!size) { + return; + } ofpbuf_prealloc_tailroom(b, size); b->data = (char*)b->data + size; } @@ -436,6 +440,10 @@ ofpbuf_reserve(struct ofpbuf *b, size_t size) void * ofpbuf_push_uninit(struct ofpbuf *b, size_t size) { + if (!size) { + return b->data; + } + ofpbuf_prealloc_headroom(b, size); b->data = (char*)b->data - size; b->size += size; diff --git a/lib/ovs-lldp.c b/lib/ovs-lldp.c index 162311fa45..2d13e971ed 100644 --- a/lib/ovs-lldp.c +++ b/lib/ovs-lldp.c @@ -559,9 +559,9 @@ aa_mapping_unregister_mapping(struct lldp *lldp, struct lldpd_hardware *hw, struct aa_mapping_internal *m) { - struct lldpd_aa_isid_vlan_maps_tlv *lm, *lm_next; + struct lldpd_aa_isid_vlan_maps_tlv *lm; - LIST_FOR_EACH_SAFE (lm, lm_next, m_entries, + LIST_FOR_EACH_SAFE (lm, m_entries, &hw->h_lport.p_isid_vlan_maps) { uint32_t isid = lm->isid_vlan_data.isid; @@ -738,6 +738,14 @@ lldp_put_packet(struct lldp *lldp, struct dp_packet *packet, ovs_mutex_unlock(&mutex); } +/* Is LLDP enabled? + */ +bool +lldp_is_enabled(struct lldp *lldp) +{ + return lldp ? lldp->enabled : false; +} + /* Configures the LLDP stack. */ bool @@ -953,8 +961,8 @@ lldp_ref(const struct lldp *lldp_) void lldp_destroy_dummy(struct lldp *lldp) { - struct lldpd_hardware *hw, *hw_next; - struct lldpd_chassis *chassis, *chassis_next; + struct lldpd_hardware *hw; + struct lldpd_chassis *chassis; struct lldpd *cfg; if (!lldp) { @@ -963,13 +971,13 @@ lldp_destroy_dummy(struct lldp *lldp) cfg = lldp->lldpd; - LIST_FOR_EACH_SAFE (hw, hw_next, h_entries, &cfg->g_hardware) { + LIST_FOR_EACH_SAFE (hw, h_entries, &cfg->g_hardware) { ovs_list_remove(&hw->h_entries); free(hw->h_lport.p_lastframe); free(hw); } - LIST_FOR_EACH_SAFE (chassis, chassis_next, list, &cfg->g_chassis) { + LIST_FOR_EACH_SAFE (chassis, list, &cfg->g_chassis) { ovs_list_remove(&chassis->list); free(chassis); } diff --git a/lib/ovs-lldp.h b/lib/ovs-lldp.h index 0e536e8c27..661ac4e18a 100644 --- a/lib/ovs-lldp.h +++ b/lib/ovs-lldp.h @@ -86,6 +86,7 @@ void lldp_run(struct lldpd *cfg); bool lldp_should_send_packet(struct lldp *cfg); bool lldp_should_process_flow(struct lldp *lldp, const struct flow *flow); bool lldp_configure(struct lldp *lldp, const struct smap *cfg); +bool lldp_is_enabled(struct lldp *lldp); void lldp_process_packet(struct lldp *cfg, const struct dp_packet *); void lldp_put_packet(struct lldp *lldp, struct dp_packet *packet, const struct eth_addr eth_src); diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h index ecc251a7ff..83bd10cca5 100644 --- a/lib/ovs-numa.h +++ b/lib/ovs-numa.h @@ -68,9 +68,9 @@ void ovs_numa_dump_destroy(struct ovs_numa_dump *); int ovs_numa_thread_setaffinity_core(unsigned core_id); #define FOR_EACH_CORE_ON_DUMP(ITER, DUMP) \ - HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->cores) + HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->cores) #define FOR_EACH_NUMA_ON_DUMP(ITER, DUMP) \ - HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->numas) + HMAP_FOR_EACH (ITER, hmap_node, &(DUMP)->numas) #endif /* ovs-numa.h */ diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c index 1866bd3088..946aa04d18 100644 --- a/lib/ovs-rcu.c +++ b/lib/ovs-rcu.c @@ -444,3 +444,40 @@ ovsrcu_init_module(void) ovsthread_once_done(&once); } } + +static void +ovsrcu_barrier_func(void *seq_) +{ + struct seq *seq = (struct seq *) seq_; + seq_change(seq); +} + +/* Similar to the kernel rcu_barrier, ovsrcu_barrier waits for all outstanding + * RCU callbacks to complete. However, unlike the kernel rcu_barrier, which + * might return immediately if there are no outstanding RCU callbacks, + * this API will at least wait for a grace period. + * + * Another issue the caller might need to know is that the barrier is just + * for "one-shot", i.e. if inside some RCU callbacks, another RCU callback is + * registered, this API only guarantees the first round of RCU callbacks have + * been executed after it returns. + */ +void +ovsrcu_barrier(void) +{ + struct seq *seq = seq_create(); + /* First let all threads flush their cbsets. */ + ovsrcu_synchronize(); + + /* Then register a new cbset, ensure this cbset + * is at the tail of the global list. */ + uint64_t seqno = seq_read(seq); + ovsrcu_postpone__(ovsrcu_barrier_func, (void *) seq); + + do { + seq_wait(seq, seqno); + poll_block(); + } while (seqno == seq_read(seq)); + + seq_destroy(seq); +} diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h index ecc4c92010..8b397b7fb0 100644 --- a/lib/ovs-rcu.h +++ b/lib/ovs-rcu.h @@ -155,6 +155,19 @@ * port_delete(id); * } * + * Use ovsrcu_barrier() to wait for all the outstanding RCU callbacks to + * finish. This is useful when you have to destroy some resources however + * these resources are referenced in the outstanding RCU callbacks. + * + * void rcu_cb(void *A) { + * do_something(A); + * } + * + * void destroy_A() { + * ovsrcu_postpone(rcu_cb, A); // will use A later + * ovsrcu_barrier(); // wait for rcu_cb done + * do_destroy_A(); // free A + * } */ #include "compiler.h" @@ -310,4 +323,6 @@ void ovsrcu_synchronize(void); void ovsrcu_exit(void); +void ovsrcu_barrier(void); + #endif /* ovs-rcu.h */ diff --git a/lib/ovs-router.c b/lib/ovs-router.c index 09b81c6e5a..5d0fbd503e 100644 --- a/lib/ovs-router.c +++ b/lib/ovs-router.c @@ -164,9 +164,10 @@ static void rt_init_match(struct match *match, uint32_t mark, match->flow.pkt_mark = mark; } -static int -get_src_addr(const struct in6_addr *ip6_dst, - const char output_bridge[], struct in6_addr *psrc) +int +ovs_router_get_netdev_source_address(const struct in6_addr *ip6_dst, + const char output_bridge[], + struct in6_addr *psrc) { struct in6_addr *mask, *addr6; int err, n_in6, i, max_plen = -1; @@ -235,9 +236,11 @@ ovs_router_insert__(uint32_t mark, uint8_t priority, bool local, p->plen = plen; p->local = local; p->priority = priority; - err = get_src_addr(ip6_dst, output_bridge, &p->src_addr); + err = ovs_router_get_netdev_source_address(ip6_dst, output_bridge, + &p->src_addr); if (err && ipv6_addr_is_set(gw)) { - err = get_src_addr(gw, output_bridge, &p->src_addr); + err = ovs_router_get_netdev_source_address(gw, output_bridge, + &p->src_addr); } if (err) { struct ds ds = DS_EMPTY_INITIALIZER; diff --git a/lib/ovs-router.h b/lib/ovs-router.h index 34ea163eef..d8ce3c00de 100644 --- a/lib/ovs-router.h +++ b/lib/ovs-router.h @@ -37,6 +37,10 @@ void ovs_router_flush(void); void ovs_router_disable_system_routing_table(void); +int ovs_router_get_netdev_source_address(const struct in6_addr *ip6_dst, + const char output_bridge[], + struct in6_addr *psrc); + #ifdef __cplusplus } #endif diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c index dead31275d..9713c7dc7c 100644 --- a/lib/ovsdb-cs.c +++ b/lib/ovsdb-cs.c @@ -900,8 +900,8 @@ ovsdb_cs_db_get_table(struct ovsdb_cs_db *db, const char *table) static void ovsdb_cs_db_destroy_tables(struct ovsdb_cs_db *db) { - struct ovsdb_cs_db_table *table, *next; - HMAP_FOR_EACH_SAFE (table, next, hmap_node, &db->tables) { + struct ovsdb_cs_db_table *table; + HMAP_FOR_EACH_SAFE (table, hmap_node, &db->tables) { json_destroy(table->ack_cond); json_destroy(table->req_cond); json_destroy(table->new_cond); @@ -1793,8 +1793,8 @@ ovsdb_cs_update_server_row(struct server_row *row, static void ovsdb_cs_clear_server_rows(struct ovsdb_cs *cs) { - struct server_row *row, *next; - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &cs->server_rows) { + struct server_row *row; + HMAP_FOR_EACH_SAFE (row, hmap_node, &cs->server_rows) { ovsdb_cs_delete_server_row(cs, row); } } @@ -2128,9 +2128,9 @@ void ovsdb_cs_free_schema(struct shash *schema) { if (schema) { - struct shash_node *node, *next; + struct shash_node *node; - SHASH_FOR_EACH_SAFE (node, next, schema) { + SHASH_FOR_EACH_SAFE (node, schema) { struct sset *sset = node->data; sset_destroy(sset); free(sset); diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c index 6b1c20ff85..61ad7679a6 100644 --- a/lib/ovsdb-data.c +++ b/lib/ovsdb-data.c @@ -1957,6 +1957,19 @@ ovsdb_datum_add_unsafe(struct ovsdb_datum *datum, } } +void +ovsdb_datum_add_from_index_unsafe(struct ovsdb_datum *dst, + const struct ovsdb_datum *src, + size_t idx, + const struct ovsdb_type *type) +{ + const union ovsdb_atom *key = &src->keys[idx]; + const union ovsdb_atom *value = type->value.type != OVSDB_TYPE_VOID + ? &src->values[idx] + : NULL; + ovsdb_datum_add_unsafe(dst, key, value, type, NULL); +} + /* Adds 'n' atoms starting from index 'start_idx' from 'src' to the end of * 'dst'. 'dst' should have enough memory allocated to hold the additional * 'n' atoms. Atoms are not cloned, i.e. 'dst' will reference the same data. @@ -2165,12 +2178,10 @@ ovsdb_datum_added_removed(struct ovsdb_datum *added, int c = ovsdb_atom_compare_3way(&old->keys[oi], &new->keys[ni], type->key.type); if (c < 0) { - ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi], - type, NULL); + ovsdb_datum_add_from_index_unsafe(removed, old, oi, type); oi++; } else if (c > 0) { - ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni], - type, NULL); + ovsdb_datum_add_from_index_unsafe(added, new, ni, type); ni++; } else { if (type->value.type != OVSDB_TYPE_VOID && @@ -2186,13 +2197,11 @@ ovsdb_datum_added_removed(struct ovsdb_datum *added, } for (; oi < old->n; oi++) { - ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi], - type, NULL); + ovsdb_datum_add_from_index_unsafe(removed, old, oi, type); } for (; ni < new->n; ni++) { - ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni], - type, NULL); + ovsdb_datum_add_from_index_unsafe(added, new, ni, type); } } @@ -2228,12 +2237,10 @@ ovsdb_datum_diff(struct ovsdb_datum *diff, int c = ovsdb_atom_compare_3way(&old->keys[oi], &new->keys[ni], type->key.type); if (c < 0) { - ovsdb_datum_add_unsafe(diff, &old->keys[oi], &old->values[oi], - type, NULL); + ovsdb_datum_add_from_index_unsafe(diff, old, oi, type); oi++; } else if (c > 0) { - ovsdb_datum_add_unsafe(diff, &new->keys[ni], &new->values[ni], - type, NULL); + ovsdb_datum_add_from_index_unsafe(diff, new, ni, type); ni++; } else { if (type->value.type != OVSDB_TYPE_VOID && @@ -2247,13 +2254,11 @@ ovsdb_datum_diff(struct ovsdb_datum *diff, } for (; oi < old->n; oi++) { - ovsdb_datum_add_unsafe(diff, &old->keys[oi], &old->values[oi], - type, NULL); + ovsdb_datum_add_from_index_unsafe(diff, old, oi, type); } for (; ni < new->n; ni++) { - ovsdb_datum_add_unsafe(diff, &new->keys[ni], &new->values[ni], - type, NULL); + ovsdb_datum_add_from_index_unsafe(diff, new, ni, type); } } diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h index 47115a7b85..ba5d179a65 100644 --- a/lib/ovsdb-data.h +++ b/lib/ovsdb-data.h @@ -280,6 +280,10 @@ void ovsdb_datum_add_unsafe(struct ovsdb_datum *, const union ovsdb_atom *value, const struct ovsdb_type *, const union ovsdb_atom *range_end_atom); +void ovsdb_datum_add_from_index_unsafe(struct ovsdb_datum *dst, + const struct ovsdb_datum *src, + size_t idx, + const struct ovsdb_type *type); /* Transactions with named-uuid row names. */ struct json *ovsdb_datum_to_json_with_row_names(const struct ovsdb_datum *, diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index c19128d55c..882ede7559 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -389,25 +389,25 @@ ovsdb_idl_clear(struct ovsdb_idl *db) */ for (size_t i = 0; i < db->class_->n_tables; i++) { struct ovsdb_idl_table *table = &db->tables[i]; - struct ovsdb_idl_row *row, *next_row; + struct ovsdb_idl_row *row; if (hmap_is_empty(&table->rows)) { continue; } - HMAP_FOR_EACH_SAFE (row, next_row, hmap_node, &table->rows) { - struct ovsdb_idl_arc *arc, *next_arc; + HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { + struct ovsdb_idl_arc *arc; if (!ovsdb_idl_row_is_orphan(row)) { ovsdb_idl_remove_from_indexes(row); ovsdb_idl_row_unparse(row); } - LIST_FOR_EACH_SAFE (arc, next_arc, src_node, &row->src_arcs) { + LIST_FOR_EACH_SAFE (arc, src_node, &row->src_arcs) { ovs_list_remove(&arc->src_node); ovs_list_remove(&arc->dst_node); free(arc); } - LIST_FOR_EACH_SAFE (arc, next_arc, dst_node, &row->dst_arcs) { + LIST_FOR_EACH_SAFE (arc, dst_node, &row->dst_arcs) { ovs_list_remove(&arc->src_node); ovs_list_remove(&arc->dst_node); free(arc); @@ -1041,8 +1041,8 @@ ovsdb_idl_condition_destroy(struct ovsdb_idl_condition *cond) void ovsdb_idl_condition_clear(struct ovsdb_idl_condition *cond) { - struct ovsdb_idl_clause *clause, *next; - HMAP_FOR_EACH_SAFE (clause, next, hmap_node, &cond->clauses) { + struct ovsdb_idl_clause *clause; + HMAP_FOR_EACH_SAFE (clause, hmap_node, &cond->clauses) { hmap_remove(&cond->clauses, &clause->hmap_node); ovsdb_idl_clause_destroy(clause); } @@ -1345,9 +1345,9 @@ ovsdb_idl_track_clear__(struct ovsdb_idl *idl, bool flush_all) struct ovsdb_idl_table *table = &idl->tables[i]; if (!ovs_list_is_empty(&table->track_list)) { - struct ovsdb_idl_row *row, *next; + struct ovsdb_idl_row *row; - LIST_FOR_EACH_SAFE(row, next, track_node, &table->track_list) { + LIST_FOR_EACH_SAFE (row, track_node, &table->track_list) { if (row->updated) { free(row->updated); row->updated = NULL; @@ -1480,9 +1480,9 @@ ovsdb_idl_parse_update(struct ovsdb_idl *idl, static void ovsdb_idl_reparse_deleted(struct ovsdb_idl *db) { - struct ovsdb_idl_row *row, *next; + struct ovsdb_idl_row *row; - LIST_FOR_EACH_SAFE (row, next, track_node, &db->deleted_untracked_rows) { + LIST_FOR_EACH_SAFE (row, track_node, &db->deleted_untracked_rows) { ovsdb_idl_row_untrack_change(row); add_tracked_change_for_references(row); ovsdb_idl_row_reparse_backrefs(row); @@ -1906,8 +1906,8 @@ ovsdb_idl_index_create2(struct ovsdb_idl *idl, static void ovsdb_idl_destroy_indexes(struct ovsdb_idl_table *table) { - struct ovsdb_idl_index *index, *next; - LIST_FOR_EACH_SAFE (index, next, node, &table->indexes) { + struct ovsdb_idl_index *index; + LIST_FOR_EACH_SAFE (index, node, &table->indexes) { skiplist_destroy(index->skiplist, NULL); free(index->columns); free(index); @@ -2145,12 +2145,12 @@ ovsdb_idl_row_clear_new(struct ovsdb_idl_row *row) static void ovsdb_idl_row_clear_arcs(struct ovsdb_idl_row *row, bool destroy_dsts) { - struct ovsdb_idl_arc *arc, *next; + struct ovsdb_idl_arc *arc; /* Delete all forward arcs. If 'destroy_dsts', destroy any orphaned rows * that this causes to be unreferenced. */ - LIST_FOR_EACH_SAFE (arc, next, src_node, &row->src_arcs) { + LIST_FOR_EACH_SAFE (arc, src_node, &row->src_arcs) { ovs_list_remove(&arc->dst_node); if (destroy_dsts && ovsdb_idl_row_is_orphan(arc->dst) @@ -2166,7 +2166,7 @@ ovsdb_idl_row_clear_arcs(struct ovsdb_idl_row *row, bool destroy_dsts) static void ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *row) { - struct ovsdb_idl_arc *arc, *next; + struct ovsdb_idl_arc *arc; /* This is trickier than it looks. ovsdb_idl_row_clear_arcs() will destroy * 'arc', so we need to use the "safe" variant of list traversal. However, @@ -2178,7 +2178,7 @@ ovsdb_idl_row_reparse_backrefs(struct ovsdb_idl_row *row) * (If duplicate arcs were possible then we would need to make sure that * 'next' didn't also point into 'arc''s destination, but we forbid * duplicate arcs.) */ - LIST_FOR_EACH_SAFE (arc, next, dst_node, &row->dst_arcs) { + LIST_FOR_EACH_SAFE (arc, dst_node, &row->dst_arcs) { struct ovsdb_idl_row *ref = arc->src; ovsdb_idl_row_unparse(ref); @@ -2329,9 +2329,9 @@ ovsdb_idl_row_destroy_postprocess(struct ovsdb_idl *idl) struct ovsdb_idl_table *table = &idl->tables[i]; if (!ovs_list_is_empty(&table->track_list)) { - struct ovsdb_idl_row *row, *next; + struct ovsdb_idl_row *row; - LIST_FOR_EACH_SAFE(row, next, track_node, &table->track_list) { + LIST_FOR_EACH_SAFE (row, track_node, &table->track_list) { if (!ovsdb_idl_track_is_set(row->table)) { ovs_list_remove(&row->track_node); ovsdb_idl_row_unparse(row); @@ -2729,7 +2729,7 @@ ovsdb_idl_txn_increment(struct ovsdb_idl_txn *txn, void ovsdb_idl_txn_destroy(struct ovsdb_idl_txn *txn) { - struct ovsdb_idl_txn_insert *insert, *next; + struct ovsdb_idl_txn_insert *insert; if (txn->status == TXN_INCOMPLETE) { ovsdb_cs_forget_transaction(txn->idl->cs, txn->request_id); @@ -2739,7 +2739,7 @@ ovsdb_idl_txn_destroy(struct ovsdb_idl_txn *txn) ovsdb_idl_txn_abort(txn); ds_destroy(&txn->comment); free(txn->error); - HMAP_FOR_EACH_SAFE (insert, next, hmap_node, &txn->inserted_rows) { + HMAP_FOR_EACH_SAFE (insert, hmap_node, &txn->inserted_rows) { free(insert); } hmap_destroy(&txn->inserted_rows); @@ -2824,7 +2824,7 @@ substitute_uuids(struct json *json, const struct ovsdb_idl_txn *txn) static void ovsdb_idl_txn_disassemble(struct ovsdb_idl_txn *txn) { - struct ovsdb_idl_row *row, *next; + struct ovsdb_idl_row *row; /* This must happen early. Otherwise, ovsdb_idl_row_parse() will call an * ovsdb_idl_column's 'parse' function, which will call @@ -2832,7 +2832,7 @@ ovsdb_idl_txn_disassemble(struct ovsdb_idl_txn *txn) * transaction and fail to update the graph. */ txn->idl->txn = NULL; - HMAP_FOR_EACH_SAFE (row, next, txn_node, &txn->txn_rows) { + HMAP_FOR_EACH_SAFE (row, txn_node, &txn->txn_rows) { enum { INSERTED, MODIFIED, DELETED } op = (!row->new_datum ? DELETED : !row->old_datum ? INSERTED diff --git a/lib/ovsdb-map-op.c b/lib/ovsdb-map-op.c index 7b90ba84f9..795066e8ef 100644 --- a/lib/ovsdb-map-op.c +++ b/lib/ovsdb-map-op.c @@ -91,8 +91,8 @@ map_op_list_create(void) void map_op_list_destroy(struct map_op_list *list, const struct ovsdb_type *type) { - struct map_op *map_op, *next; - HMAP_FOR_EACH_SAFE (map_op, next, node, &list->hmap) { + struct map_op *map_op; + HMAP_FOR_EACH_SAFE (map_op, node, &list->hmap) { map_op_destroy(map_op, type); } hmap_destroy(&list->hmap); diff --git a/lib/ovsdb-set-op.c b/lib/ovsdb-set-op.c index 62c4621181..321043282e 100644 --- a/lib/ovsdb-set-op.c +++ b/lib/ovsdb-set-op.c @@ -90,8 +90,8 @@ set_op_list_create(void) void set_op_list_destroy(struct set_op_list *list, const struct ovsdb_type *type) { - struct set_op *set_op, *next; - HMAP_FOR_EACH_SAFE (set_op, next, node, &list->hmap) { + struct set_op *set_op; + HMAP_FOR_EACH_SAFE (set_op, node, &list->hmap) { set_op_destroy(set_op, type); } hmap_destroy(&list->hmap); diff --git a/lib/packets.c b/lib/packets.c index d0fba81766..874066e3c6 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -427,9 +427,9 @@ add_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse, } if (!l3_encap) { - ovs_be32 *header = dp_packet_push_uninit(packet, MPLS_HLEN); + struct mpls_hdr *header = dp_packet_push_uninit(packet, MPLS_HLEN); - *header = lse; + put_16aligned_be32(&header->mpls_lse, lse); packet->l2_5_ofs = 0; packet->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE, ntohs(ethtype)); diff --git a/lib/pcap-file.c b/lib/pcap-file.c index 41835f6f4d..3ed7ea4880 100644 --- a/lib/pcap-file.c +++ b/lib/pcap-file.c @@ -344,9 +344,9 @@ tcp_reader_open(void) void tcp_reader_close(struct tcp_reader *r) { - struct tcp_stream *stream, *next_stream; + struct tcp_stream *stream; - HMAP_FOR_EACH_SAFE (stream, next_stream, hmap_node, &r->streams) { + HMAP_FOR_EACH_SAFE (stream, hmap_node, &r->streams) { tcp_stream_destroy(r, stream); } hmap_destroy(&r->streams); diff --git a/lib/perf-counter.c b/lib/perf-counter.c index e4eca58d03..6952fcb594 100644 --- a/lib/perf-counter.c +++ b/lib/perf-counter.c @@ -178,14 +178,14 @@ perf_counters_clear(void) void perf_counters_destroy(void) { - struct shash_node *node, *next; + struct shash_node *node; if (fd__ != -1) { ioctl(fd__, PERF_EVENT_IOC_DISABLE, 0); close(fd__); } - SHASH_FOR_EACH_SAFE (node, next, &perf_counters) { + SHASH_FOR_EACH_SAFE (node, &perf_counters) { shash_delete(&perf_counters, node); } diff --git a/lib/poll-loop.c b/lib/poll-loop.c index 4e751ff2c7..70fabeb8a3 100644 --- a/lib/poll-loop.c +++ b/lib/poll-loop.c @@ -298,9 +298,9 @@ log_wakeup(const char *where, const struct pollfd *pollfd, int timeout) static void free_poll_nodes(struct poll_loop *loop) { - struct poll_node *node, *next; + struct poll_node *node; - HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) { + HMAP_FOR_EACH_SAFE (node, hmap_node, &loop->poll_nodes) { hmap_remove(&loop->poll_nodes, &node->hmap_node); #ifdef _WIN32 if (node->wevent && node->pollfd.fd) { diff --git a/lib/rculist.h b/lib/rculist.h index 1072b87af2..c0d77acf94 100644 --- a/lib/rculist.h +++ b/lib/rculist.h @@ -365,35 +365,57 @@ rculist_is_singleton_protected(const struct rculist *list) return list_next == list->prev && list_next != list; } -#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \ - for (INIT_CONTAINER(ITER, rculist_next(RCULIST), MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER)) -#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \ - for (ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, rculist_next(&(ITER)->MEMBER), MEMBER)) - -#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \ - for (INIT_CONTAINER(ITER, (RCULIST)->prev, MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) -#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \ - for (ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, (ITER)->MEMBER.prev, MEMBER)) - -#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \ - for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \ - &(ITER)->MEMBER != (RCULIST); \ - ASSIGN_CONTAINER(ITER, rculist_next_protected(&(ITER)->MEMBER), \ - MEMBER)) - -#define RCULIST_FOR_EACH_SAFE_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \ - for (INIT_CONTAINER(ITER, rculist_next_protected(RCULIST), MEMBER); \ - (&(ITER)->MEMBER != (RCULIST) \ - ? INIT_CONTAINER(NEXT, rculist_next_protected(&(ITER)->MEMBER), \ - MEMBER), 1 : 0); \ - (ITER) = (NEXT)) +#define RCULIST_FOR_EACH(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(RCULIST), \ + const struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER)))) + +#define RCULIST_FOR_EACH_CONTINUE(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, rculist_next(&(ITER)->MEMBER), \ + const struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, rculist_next(ITER_VAR(ITER)))) + +#define RCULIST_FOR_EACH_REVERSE_PROTECTED(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, (RCULIST)->prev, struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev)) + +#define RCULIST_FOR_EACH_REVERSE_PROTECTED_CONTINUE(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, (ITER)->MEMBER.prev, struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, ITER_VAR(VAR).prev)) + +#define RCULIST_FOR_EACH_PROTECTED(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR(ITER, MEMBER, rculist_next_protected(RCULIST), \ + struct rculist); \ + CONDITION_MULTIVAR(ITER, MEMBER, ITER_VAR(ITER) != (RCULIST)); \ + UPDATE_MULTIVAR(ITER, rculist_next_protected(ITER_VAR(ITER))) \ + +#define RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED(ITER, MEMBER, RCULIST) \ + for (INIT_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \ + rculist_next_protected(RCULIST), \ + struct rculist); \ + CONDITION_MULTIVAR_SAFE_SHORT(ITER, MEMBER, \ + ITER_VAR(ITER) != (RCULIST), \ + ITER_NEXT_VAR(ITER) = rculist_next_protected(ITER_VAR(VAR))); \ + UPDATE_MULTIVAR_SHORT(ITER)) + +#define RCULIST_FOR_EACH_SAFE_LONG_PROTECTED(ITER, NEXT, MEMBER, RCULIST) \ + for (INIT_MULTIVAR_SAFE_LONG(ITER, NEXT, MEMBER, \ + rculist_next_protected(RCULIST) \ + struct rculist); \ + CONDITION_MULTIVAR_SAFE_LONG(VAR, NEXT, MEMBER \ + ITER_VAR(ITER) != (RCULIST), \ + ITER_VAR(NEXT) = rculist_next_protected(ITER_VAR(VAR)), \ + ITER_VAR(NEXT) != (RCULIST)); \ + UPDATE_MULTIVAR_LONG(ITER)) + +#define RCULIST_FOR_EACH_SAFE_PROTECTED(...) \ + OVERLOAD_SAFE_MACRO(RCULIST_FOR_EACH_SAFE_LONG_PROTECTED, \ + RCULIST_FOR_EACH_SAFE_SHORT_PROTECTED, \ + 4, __VA_ARGS__) + #endif /* rculist.h */ diff --git a/lib/reconnect.c b/lib/reconnect.c index a929ddfd2d..89a0bcaf95 100644 --- a/lib/reconnect.c +++ b/lib/reconnect.c @@ -75,7 +75,8 @@ struct reconnect { static void reconnect_transition__(struct reconnect *, long long int now, enum state state); -static long long int reconnect_deadline__(const struct reconnect *); +static long long int reconnect_deadline__(const struct reconnect *, + long long int now); static bool reconnect_may_retry(struct reconnect *); static const char * @@ -539,7 +540,7 @@ reconnect_transition__(struct reconnect *fsm, long long int now, } static long long int -reconnect_deadline__(const struct reconnect *fsm) +reconnect_deadline__(const struct reconnect *fsm, long long int now) { ovs_assert(fsm->state_entered != LLONG_MIN); switch (fsm->state) { @@ -557,8 +558,18 @@ reconnect_deadline__(const struct reconnect *fsm) if (fsm->probe_interval) { long long int base = MAX(fsm->last_activity, fsm->state_entered); long long int expiration = base + fsm->probe_interval; - if (fsm->last_receive_attempt >= expiration) { + if (now < expiration || fsm->last_receive_attempt >= expiration) { + /* We still have time before the expiration or the time has + * already passed and there was no activity. In the first case + * we need to wait for the expiration, in the second - we're + * already past the deadline. */ return expiration; + } else { + /* Time has already passed, but we didn't attempt to receive + * anything. We need to wake up and try to receive even if + * nothing is pending, so we can update the expiration time or + * transition to a different state. */ + return now + 1; } } return LLONG_MAX; @@ -566,8 +577,10 @@ reconnect_deadline__(const struct reconnect *fsm) case S_IDLE: if (fsm->probe_interval) { long long int expiration = fsm->state_entered + fsm->probe_interval; - if (fsm->last_receive_attempt >= expiration) { + if (now < expiration || fsm->last_receive_attempt >= expiration) { return expiration; + } else { + return now + 1; } } return LLONG_MAX; @@ -618,7 +631,7 @@ reconnect_deadline__(const struct reconnect *fsm) enum reconnect_action reconnect_run(struct reconnect *fsm, long long int now) { - if (now >= reconnect_deadline__(fsm)) { + if (now >= reconnect_deadline__(fsm, now)) { switch (fsm->state) { case S_VOID: return 0; @@ -671,7 +684,7 @@ reconnect_wait(struct reconnect *fsm, long long int now) int reconnect_timeout(struct reconnect *fsm, long long int now) { - long long int deadline = reconnect_deadline__(fsm); + long long int deadline = reconnect_deadline__(fsm, now); if (deadline != LLONG_MAX) { long long int remaining = deadline - now; return MAX(0, MIN(INT_MAX, remaining)); diff --git a/lib/seq.c b/lib/seq.c index 6581cb06ba..99e5bf8bd1 100644 --- a/lib/seq.c +++ b/lib/seq.c @@ -297,9 +297,9 @@ static void seq_thread_woke(struct seq_thread *thread) OVS_REQUIRES(seq_mutex) { - struct seq_waiter *waiter, *next_waiter; + struct seq_waiter *waiter; - LIST_FOR_EACH_SAFE (waiter, next_waiter, list_node, &thread->waiters) { + LIST_FOR_EACH_SAFE (waiter, list_node, &thread->waiters) { ovs_assert(waiter->thread == thread); seq_waiter_destroy(waiter); } @@ -319,9 +319,9 @@ static void seq_wake_waiters(struct seq *seq) OVS_REQUIRES(seq_mutex) { - struct seq_waiter *waiter, *next_waiter; + struct seq_waiter *waiter; - HMAP_FOR_EACH_SAFE (waiter, next_waiter, hmap_node, &seq->waiters) { + HMAP_FOR_EACH_SAFE (waiter, hmap_node, &seq->waiters) { latch_set(&waiter->thread->latch); seq_waiter_destroy(waiter); } diff --git a/lib/shash.c b/lib/shash.c index a8433629ab..a7b2c64582 100644 --- a/lib/shash.c +++ b/lib/shash.c @@ -68,9 +68,9 @@ shash_moved(struct shash *sh) void shash_clear(struct shash *sh) { - struct shash_node *node, *next; + struct shash_node *node; - SHASH_FOR_EACH_SAFE (node, next, sh) { + SHASH_FOR_EACH_SAFE (node, sh) { hmap_remove(&sh->map, &node->node); free(node->name); free(node); @@ -81,9 +81,9 @@ shash_clear(struct shash *sh) void shash_clear_free_data(struct shash *sh) { - struct shash_node *node, *next; + struct shash_node *node; - SHASH_FOR_EACH_SAFE (node, next, sh) { + SHASH_FOR_EACH_SAFE (node, sh) { hmap_remove(&sh->map, &node->node); free(node->data); free(node->name); diff --git a/lib/simap.c b/lib/simap.c index f404ece677..0ee08d74d5 100644 --- a/lib/simap.c +++ b/lib/simap.c @@ -63,9 +63,9 @@ simap_moved(struct simap *simap) void simap_clear(struct simap *simap) { - struct simap_node *node, *next; + struct simap_node *node; - SIMAP_FOR_EACH_SAFE (node, next, simap) { + SIMAP_FOR_EACH_SAFE (node, simap) { hmap_remove(&simap->map, &node->node); free(node->name); free(node); diff --git a/lib/simap.h b/lib/simap.h index 5e646e6607..8db7bea7c9 100644 --- a/lib/simap.h +++ b/lib/simap.h @@ -41,12 +41,22 @@ struct simap_node { BUILD_ASSERT_TYPE(SIMAP_NODE, struct simap_node *), \ BUILD_ASSERT_TYPE(SIMAP, struct simap *)) -#define SIMAP_FOR_EACH_SAFE(SIMAP_NODE, NEXT, SIMAP) \ - HMAP_FOR_EACH_SAFE_INIT (SIMAP_NODE, NEXT, node, &(SIMAP)->map, \ +#define SIMAP_FOR_EACH_SAFE_SHORT(SIMAP_NODE, SIMAP) \ + HMAP_FOR_EACH_SAFE_SHORT_INIT (SIMAP_NODE, node, &(SIMAP)->map, \ BUILD_ASSERT_TYPE(SIMAP_NODE, struct simap_node *), \ - BUILD_ASSERT_TYPE(NEXT, struct simap_node *), \ BUILD_ASSERT_TYPE(SIMAP, struct simap *)) +#define SIMAP_FOR_EACH_SAFE_LONG(SIMAP_NODE, NEXT, SIMAP) \ + HMAP_FOR_EACH_SAFE_LONG_INIT (SIMAP_NODE, NEXT, node, &(SIMAP)->map, \ + BUILD_ASSERT_TYPE(SIMAP_NODE, struct simap_node *), \ + BUILD_ASSERT_TYPE(NEXT, struct simap_node *), \ + BUILD_ASSERT_TYPE(SIMAP, struct simap *)) + +#define SIMAP_FOR_EACH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(SIMAP_FOR_EACH_SAFE_LONG, \ + SIMAP_FOR_EACH_SAFE_SHORT, \ + 3, __VA_ARGS__) + void simap_init(struct simap *); void simap_destroy(struct simap *); void simap_swap(struct simap *, struct simap *); diff --git a/lib/smap.c b/lib/smap.c index e82261497c..b23eeb52d3 100644 --- a/lib/smap.c +++ b/lib/smap.c @@ -185,9 +185,9 @@ smap_steal(struct smap *smap, struct smap_node *node, void smap_clear(struct smap *smap) { - struct smap_node *node, *next; + struct smap_node *node; - SMAP_FOR_EACH_SAFE (node, next, smap) { + SMAP_FOR_EACH_SAFE (node, smap) { smap_remove_node(smap, node); } } diff --git a/lib/smap.h b/lib/smap.h index a921159667..2fe6c540a7 100644 --- a/lib/smap.h +++ b/lib/smap.h @@ -45,13 +45,24 @@ struct smap_node { BUILD_ASSERT_TYPE(SMAP_NODE, struct smap_node *), \ BUILD_ASSERT_TYPE(SMAP, struct smap *)) -#define SMAP_FOR_EACH_SAFE(SMAP_NODE, NEXT, SMAP) \ - HMAP_FOR_EACH_SAFE_INIT ( \ +#define SMAP_FOR_EACH_SAFE_SHORT(SMAP_NODE, SMAP) \ + HMAP_FOR_EACH_SAFE_SHORT_INIT ( \ + SMAP_NODE, node, &(SMAP)->map, \ + BUILD_ASSERT_TYPE(SMAP_NODE, struct smap_node *), \ + BUILD_ASSERT_TYPE(SMAP, struct smap *)) + +#define SMAP_FOR_EACH_SAFE_LONG(SMAP_NODE, NEXT, SMAP) \ + HMAP_FOR_EACH_SAFE_LONG_INIT ( \ SMAP_NODE, NEXT, node, &(SMAP)->map, \ BUILD_ASSERT_TYPE(SMAP_NODE, struct smap_node *), \ BUILD_ASSERT_TYPE(NEXT, struct smap_node *), \ BUILD_ASSERT_TYPE(SMAP, struct smap *)) +#define SMAP_FOR_EACH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(SMAP_FOR_EACH_SAFE_LONG, \ + SMAP_FOR_EACH_SAFE_SHORT, \ + 3, __VA_ARGS__) + /* Initializer for an immutable struct smap 'SMAP' that contains one or two * key-value pairs, e.g. * diff --git a/lib/socket-util.c b/lib/socket-util.c index 4f1ffecf5d..38705cc51e 100644 --- a/lib/socket-util.c +++ b/lib/socket-util.c @@ -62,7 +62,8 @@ static bool parse_sockaddr_components(struct sockaddr_storage *ss, const char *port_s, uint16_t default_port, const char *s, - bool resolve_host); + bool resolve_host, + bool *dns_failure); /* Sets 'fd' to non-blocking mode. Returns 0 if successful, otherwise a * positive errno value. */ @@ -438,7 +439,7 @@ parse_sockaddr_components_dns(struct sockaddr_storage *ss OVS_UNUSED, dns_resolve(host_s, &tmp_host_s); if (tmp_host_s != NULL) { parse_sockaddr_components(ss, tmp_host_s, port_s, - default_port, s, false); + default_port, s, false, NULL); free(tmp_host_s); return true; } @@ -450,11 +451,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss, char *host_s, const char *port_s, uint16_t default_port, const char *s, - bool resolve_host) + bool resolve_host, bool *dns_failure) { struct sockaddr_in *sin = sin_cast(sa_cast(ss)); int port; + if (dns_failure) { + *dns_failure = false; + } + if (port_s && port_s[0]) { if (!str_to_int(port_s, 10, &port) || port < 0 || port > 65535) { VLOG_ERR("%s: bad port number \"%s\"", s, port_s); @@ -501,10 +506,15 @@ parse_sockaddr_components(struct sockaddr_storage *ss, return true; resolve: - if (resolve_host && parse_sockaddr_components_dns(ss, host_s, port_s, - default_port, s)) { - return true; - } else if (!resolve_host) { + if (resolve_host) { + if (parse_sockaddr_components_dns(ss, host_s, port_s, + default_port, s)) { + return true; + } + if (dns_failure) { + *dns_failure = true; + } + } else { VLOG_ERR("%s: bad IP address \"%s\"", s, host_s); } exit: @@ -521,10 +531,12 @@ exit: * It resolves the host if 'resolve_host' is true. * * On success, returns true and stores the parsed remote address into '*ss'. - * On failure, logs an error, stores zeros into '*ss', and returns false. */ + * On failure, logs an error, stores zeros into '*ss', and returns false, + * '*dns_failure' indicates if the host resolution failed. */ bool inet_parse_active(const char *target_, int default_port, - struct sockaddr_storage *ss, bool resolve_host) + struct sockaddr_storage *ss, + bool resolve_host, bool *dns_failure) { char *target = xstrdup(target_); char *port, *host; @@ -539,7 +551,7 @@ inet_parse_active(const char *target_, int default_port, ok = false; } else { ok = parse_sockaddr_components(ss, host, port, default_port, - target_, resolve_host); + target_, resolve_host, dns_failure); } if (!ok) { memset(ss, 0, sizeof *ss); @@ -576,7 +588,7 @@ inet_open_active(int style, const char *target, int default_port, int error; /* Parse. */ - if (!inet_parse_active(target, default_port, &ss, true)) { + if (!inet_parse_active(target, default_port, &ss, true, NULL)) { error = EAFNOSUPPORT; goto exit; } @@ -660,7 +672,7 @@ inet_parse_passive(const char *target_, int default_port, ok = false; } else { ok = parse_sockaddr_components(ss, host, port, default_port, - target_, true); + target_, true, NULL); } if (!ok) { memset(ss, 0, sizeof *ss); @@ -783,7 +795,8 @@ inet_parse_address(const char *target_, struct sockaddr_storage *ss) { char *target = xstrdup(target_); char *host = unbracket(target); - bool ok = parse_sockaddr_components(ss, host, NULL, 0, target_, false); + bool ok = parse_sockaddr_components(ss, host, NULL, 0, + target_, false, NULL); if (!ok) { memset(ss, 0, sizeof *ss); } diff --git a/lib/socket-util.h b/lib/socket-util.h index 9ccb7d4cc4..bf66393df9 100644 --- a/lib/socket-util.h +++ b/lib/socket-util.h @@ -49,7 +49,8 @@ ovs_be32 guess_netmask(ovs_be32 ip); void inet_parse_host_port_tokens(char *s, char **hostp, char **portp); void inet_parse_port_host_tokens(char *s, char **portp, char **hostp); bool inet_parse_active(const char *target, int default_port, - struct sockaddr_storage *ssp, bool resolve_host); + struct sockaddr_storage *ssp, + bool resolve_host, bool *dns_failure); int inet_open_active(int style, const char *target, int default_port, struct sockaddr_storage *ssp, int *fdp, uint8_t dscp); diff --git a/lib/sset.c b/lib/sset.c index b2e3f43ec9..6fbaa9d60d 100644 --- a/lib/sset.c +++ b/lib/sset.c @@ -212,9 +212,9 @@ sset_add_array(struct sset *set, char **names, size_t n) void sset_clear(struct sset *set) { - const char *name, *next; + const char *name; - SSET_FOR_EACH_SAFE (name, next, set) { + SSET_FOR_EACH_SAFE (name, set) { sset_delete(set, SSET_NODE_FROM_NAME(name)); } } @@ -312,7 +312,9 @@ sset_at_position(const struct sset *set, struct sset_position *pos) struct hmap_node *hmap_node; hmap_node = hmap_at_position(&set->map, &pos->pos); - return SSET_NODE_FROM_HMAP_NODE(hmap_node); + return hmap_node + ? SSET_NODE_FROM_HMAP_NODE(hmap_node) + : NULL; } /* Replaces 'a' by the intersection of 'a' and 'b'. That is, removes from 'a' @@ -320,9 +322,9 @@ sset_at_position(const struct sset *set, struct sset_position *pos) void sset_intersect(struct sset *a, const struct sset *b) { - const char *name, *next; + const char *name; - SSET_FOR_EACH_SAFE (name, next, a) { + SSET_FOR_EACH_SAFE (name, a) { if (!sset_contains(b, name)) { sset_delete(a, SSET_NODE_FROM_NAME(name)); } diff --git a/lib/sset.h b/lib/sset.h index f0bb8b5344..214d6fb41c 100644 --- a/lib/sset.h +++ b/lib/sset.h @@ -87,13 +87,26 @@ void sset_intersect(struct sset *, const struct sset *); NAME != NULL; \ (NAME) = SSET_NEXT(SSET, NAME)) -#define SSET_FOR_EACH_SAFE(NAME, NEXT, SSET) \ +#define SSET_FOR_EACH_SAFE_LONG(NAME, NEXT, SSET) \ for ((NAME) = SSET_FIRST(SSET); \ (NAME != NULL \ ? (NEXT) = SSET_NEXT(SSET, NAME), true \ : false); \ (NAME) = (NEXT)) +#define SSET_FOR_EACH_SAFE_SHORT(NAME, SSET) \ + for (const char * NAME__next = \ + ((NAME) = SSET_FIRST(SSET), NULL); \ + (NAME != NULL \ + ? (NAME__next = SSET_NEXT(SSET, NAME), true) \ + : (NAME__next = NULL, false)); \ + (NAME) = NAME__next) + +#define SSET_FOR_EACH_SAFE(...) \ + OVERLOAD_SAFE_MACRO(SSET_FOR_EACH_SAFE_LONG, \ + SSET_FOR_EACH_SAFE_SHORT, \ + 3, __VA_ARGS__) + const char **sset_array(const struct sset *); const char **sset_sort(const struct sset *); diff --git a/lib/stopwatch.c b/lib/stopwatch.c index 1c71df1a12..ec567603b1 100644 --- a/lib/stopwatch.c +++ b/lib/stopwatch.c @@ -464,7 +464,7 @@ stopwatch_thread(void *ign OVS_UNUSED) static void stopwatch_exit(void) { - struct shash_node *node, *node_next; + struct shash_node *node; struct stopwatch_packet *pkt = stopwatch_packet_create(OP_SHUTDOWN); stopwatch_packet_write(pkt); xpthread_join(stopwatch_thread_id, NULL); @@ -473,7 +473,7 @@ stopwatch_exit(void) * other competing thread. We are now the sole owners * of all data in the file. */ - SHASH_FOR_EACH_SAFE (node, node_next, &stopwatches) { + SHASH_FOR_EACH_SAFE (node, &stopwatches) { struct stopwatch *sw = node->data; shash_delete(&stopwatches, node); free(sw); diff --git a/lib/stream.c b/lib/stream.c index fcaddf10ad..71039e24f1 100644 --- a/lib/stream.c +++ b/lib/stream.c @@ -788,7 +788,7 @@ stream_parse_target_with_default_port(const char *target, int default_port, struct sockaddr_storage *ss) { return ((!strncmp(target, "tcp:", 4) || !strncmp(target, "ssl:", 4)) - && inet_parse_active(target + 4, default_port, ss, true)); + && inet_parse_active(target + 4, default_port, ss, true, NULL)); } /* Attempts to guess the content type of a stream whose first few bytes were diff --git a/lib/tc.c b/lib/tc.c index adb2d3182a..bbb8c86f7b 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -568,16 +568,17 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) flower->key.encap_eth_type[0] = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ETH_TYPE]); + flower->mask.encap_eth_type[0] = CONSTANT_HTONS(0xffff); if (attrs[TCA_FLOWER_KEY_VLAN_ID]) { flower->key.vlan_id[0] = nl_attr_get_u16(attrs[TCA_FLOWER_KEY_VLAN_ID]); - flower->mask.vlan_id[0] = 0xffff; + flower->mask.vlan_id[0] = VLAN_VID_MASK >> VLAN_VID_SHIFT; } if (attrs[TCA_FLOWER_KEY_VLAN_PRIO]) { flower->key.vlan_prio[0] = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_VLAN_PRIO]); - flower->mask.vlan_prio[0] = 0xff; + flower->mask.vlan_prio[0] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT; } if (!attrs[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) { @@ -590,17 +591,18 @@ nl_parse_flower_vlan(struct nlattr **attrs, struct tc_flower *flower) } flower->key.encap_eth_type[1] = flower->key.encap_eth_type[0]; + flower->mask.encap_eth_type[1] = CONSTANT_HTONS(0xffff); flower->key.encap_eth_type[0] = encap_ethtype; if (attrs[TCA_FLOWER_KEY_CVLAN_ID]) { flower->key.vlan_id[1] = nl_attr_get_u16(attrs[TCA_FLOWER_KEY_CVLAN_ID]); - flower->mask.vlan_id[1] = 0xffff; + flower->mask.vlan_id[1] = VLAN_VID_MASK >> VLAN_VID_SHIFT; } if (attrs[TCA_FLOWER_KEY_CVLAN_PRIO]) { flower->key.vlan_prio[1] = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_CVLAN_PRIO]); - flower->mask.vlan_prio[1] = 0xff; + flower->mask.vlan_prio[1] = VLAN_PCP_MASK >> VLAN_PCP_SHIFT; } } @@ -937,24 +939,21 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) { key->icmp_code = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]); mask->icmp_code = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE]); + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_CODE_MASK]); } if (attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]) { - key->icmp_type = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]); + key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE]); mask->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK]); } } else if (ip_proto == IPPROTO_ICMPV6) { if (attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]) { - key->icmp_code = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); + key->icmp_code = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); mask->icmp_code = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE]); + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_CODE_MASK]); } if (attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]) { - key->icmp_type = - nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]); + key->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE]); mask->icmp_type = nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK]); } @@ -1006,14 +1005,14 @@ static const struct nl_policy pedit_policy[] = { static int nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) { - struct tc_action *action; + struct tc_action *action = &flower->actions[flower->action_count++]; struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)]; const struct tc_pedit *pe; const struct tc_pedit_key *keys; const struct nlattr *nla, *keys_ex, *ex_type; const void *keys_attr; - char *rewrite_key = (void *) &flower->rewrite.key; - char *rewrite_mask = (void *) &flower->rewrite.mask; + char *rewrite_key = (void *) &action->rewrite.key; + char *rewrite_mask = (void *) &action->rewrite.mask; size_t keys_ex_size, left; int type, i = 0, err; @@ -1092,7 +1091,6 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) i++; } - action = &flower->actions[flower->action_count++]; action->type = TC_ACT_PEDIT; return 0; @@ -1314,8 +1312,8 @@ nl_parse_act_gact(struct nlattr *options, struct tc_flower *flower) struct nlattr *gact_attrs[ARRAY_SIZE(gact_policy)]; const struct tc_gact *p; struct nlattr *gact_parms; - const struct tcf_t *tm; struct tc_action *action; + struct tcf_t tm; if (!nl_parse_nested(options, gact_policy, gact_attrs, ARRAY_SIZE(gact_policy))) { @@ -1335,8 +1333,9 @@ nl_parse_act_gact(struct nlattr *options, struct tc_flower *flower) return EINVAL; } - tm = nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof *tm); - nl_parse_tcf(tm, flower); + memcpy(&tm, nl_attr_get_unspec(gact_attrs[TCA_GACT_TM], sizeof tm), + sizeof tm); + nl_parse_tcf(&tm, flower); return 0; } @@ -1357,9 +1356,9 @@ nl_parse_act_mirred(struct nlattr *options, struct tc_flower *flower) struct nlattr *mirred_attrs[ARRAY_SIZE(mirred_policy)]; const struct tc_mirred *m; const struct nlattr *mirred_parms; - const struct tcf_t *tm; struct nlattr *mirred_tm; struct tc_action *action; + struct tcf_t tm; if (!nl_parse_nested(options, mirred_policy, mirred_attrs, ARRAY_SIZE(mirred_policy))) { @@ -1387,8 +1386,8 @@ nl_parse_act_mirred(struct nlattr *options, struct tc_flower *flower) action->type = TC_ACT_OUTPUT; mirred_tm = mirred_attrs[TCA_MIRRED_TM]; - tm = nl_attr_get_unspec(mirred_tm, sizeof *tm); - nl_parse_tcf(tm, flower); + memcpy(&tm, nl_attr_get_unspec(mirred_tm, sizeof tm), sizeof tm); + nl_parse_tcf(&tm, flower); return 0; } @@ -1487,7 +1486,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) if (ipv4_max) { ovs_be32 addr = nl_attr_get_be32(ipv4_max); - action->ct.range.ipv4.max = addr; + if (action->ct.range.ipv4.min != addr) { + action->ct.range.ipv4.max = addr; + } } } else if (ipv6_min) { action->ct.range.ip_family = AF_INET6; @@ -1496,7 +1497,9 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) if (ipv6_max) { struct in6_addr addr = nl_attr_get_in6_addr(ipv6_max); - action->ct.range.ipv6.max = addr; + if (!ipv6_addr_equals(&action->ct.range.ipv6.min, &addr)) { + action->ct.range.ipv6.max = addr; + } } } @@ -1504,6 +1507,10 @@ nl_parse_act_ct(struct nlattr *options, struct tc_flower *flower) action->ct.range.port.min = nl_attr_get_be16(port_min); if (port_max) { action->ct.range.port.max = nl_attr_get_be16(port_max); + if (action->ct.range.port.min == + action->ct.range.port.max) { + action->ct.range.port.max = 0; + } } } } @@ -1702,6 +1709,9 @@ static const struct nl_policy stats_policy[] = { [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC, .min_len = sizeof(struct gnet_stats_basic), .optional = false, }, + [TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct gnet_stats_basic), + .optional = true, }, }; static int @@ -1714,8 +1724,9 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, const char *act_kind; struct nlattr *action_attrs[ARRAY_SIZE(act_policy)]; struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)]; - struct ovs_flow_stats *stats = &flower->stats; - const struct gnet_stats_basic *bs; + struct ovs_flow_stats *stats_sw = &flower->stats_sw; + struct ovs_flow_stats *stats_hw = &flower->stats_hw; + struct gnet_stats_basic bs_all, bs_hw, bs_sw; int err = 0; if (!nl_parse_nested(action, act_policy, action_attrs, @@ -1771,10 +1782,30 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, return EPROTO; } - bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs); - if (bs->packets) { - put_32aligned_u64(&stats->n_packets, bs->packets); - put_32aligned_u64(&stats->n_bytes, bs->bytes); + memcpy(&bs_all, + nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof bs_all), + sizeof bs_all); + if (stats_attrs[TCA_STATS_BASIC_HW]) { + memcpy(&bs_hw, nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC_HW], + sizeof bs_hw), + sizeof bs_hw); + + bs_sw.packets = bs_all.packets - bs_hw.packets; + bs_sw.bytes = bs_all.bytes - bs_hw.bytes; + } else { + bs_sw.packets = bs_all.packets; + bs_sw.bytes = bs_all.bytes; + } + + if (bs_sw.packets > get_32aligned_u64(&stats_sw->n_packets)) { + put_32aligned_u64(&stats_sw->n_packets, bs_sw.packets); + put_32aligned_u64(&stats_sw->n_bytes, bs_sw.bytes); + } + + if (stats_attrs[TCA_STATS_BASIC_HW] + && bs_hw.packets > get_32aligned_u64(&stats_hw->n_packets)) { + put_32aligned_u64(&stats_hw->n_packets, bs_hw.packets); + put_32aligned_u64(&stats_hw->n_bytes, bs_hw.bytes); } return 0; @@ -2399,14 +2430,14 @@ nl_msg_put_act_flags(struct ofpbuf *request) { * first_word_mask/last_word_mask - the mask to use for the first/last read * (as we read entire words). */ static void -calc_offsets(struct tc_flower *flower, struct flower_key_to_pedit *m, +calc_offsets(struct tc_action *action, struct flower_key_to_pedit *m, int *cur_offset, int *cnt, ovs_be32 *last_word_mask, ovs_be32 *first_word_mask, ovs_be32 **mask, ovs_be32 **data) { int start_offset, max_offset, total_size; int diff, right_zero_bits, left_zero_bits; - char *rewrite_key = (void *) &flower->rewrite.key; - char *rewrite_mask = (void *) &flower->rewrite.mask; + char *rewrite_key = (void *) &action->rewrite.key; + char *rewrite_mask = (void *) &action->rewrite.mask; max_offset = m->offset + m->size; start_offset = ROUND_DOWN(m->offset, 4); @@ -2473,7 +2504,8 @@ csum_update_flag(struct tc_flower *flower, static int nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, - struct tc_flower *flower) + struct tc_flower *flower, + struct tc_action *action) { struct { struct tc_pedit sel; @@ -2497,7 +2529,7 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, continue; } - calc_offsets(flower, m, &cur_offset, &cnt, &last_word_mask, + calc_offsets(action, m, &cur_offset, &cnt, &last_word_mask, &first_word_mask, &mask, &data); for (j = 0; j < cnt; j++, mask++, data++, cur_offset += 4) { @@ -2556,6 +2588,29 @@ nl_msg_put_flower_acts_release(struct ofpbuf *request, uint16_t act_index) nl_msg_end_nested(request, act_offset); } +/* Aggregates all previous successive pedit actions csum_update_flags + * to flower->csum_update_flags. Only append one csum action to the + * last pedit action. */ +static void +nl_msg_put_csum_act(struct ofpbuf *request, struct tc_flower *flower, + uint16_t *act_index) +{ + size_t act_offset; + + /* No pedit actions or processed already. */ + if (!flower->csum_update_flags) { + return; + } + + act_offset = nl_msg_start_nested(request, (*act_index)++); + nl_msg_put_act_csum(request, flower->csum_update_flags); + nl_msg_put_act_flags(request); + nl_msg_end_nested(request, act_offset); + + /* Clear it. So we can have another series of pedit actions. */ + flower->csum_update_flags = 0; +} + static int nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) { @@ -2572,20 +2627,22 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) action = flower->actions; for (i = 0; i < flower->action_count; i++, action++) { + if (action->type != TC_ACT_PEDIT) { + nl_msg_put_csum_act(request, flower, &act_index); + } switch (action->type) { case TC_ACT_PEDIT: { act_offset = nl_msg_start_nested(request, act_index++); - error = nl_msg_put_flower_rewrite_pedits(request, flower); + error = nl_msg_put_flower_rewrite_pedits(request, flower, + action); if (error) { return error; } nl_msg_end_nested(request, act_offset); - if (flower->csum_update_flags) { - act_offset = nl_msg_start_nested(request, act_index++); - nl_msg_put_act_csum(request, flower->csum_update_flags); - nl_msg_put_act_flags(request); - nl_msg_end_nested(request, act_offset); + if (i == flower->action_count - 1) { + /* If this is the last action check csum calc again. */ + nl_msg_put_csum_act(request, flower, &act_index); } } break; @@ -2914,13 +2971,13 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) FLOWER_PUT_MASKED_VALUE(icmp_code, TCA_FLOWER_KEY_ICMPV6_CODE); FLOWER_PUT_MASKED_VALUE(icmp_type, TCA_FLOWER_KEY_ICMPV6_TYPE); } - - FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE); - FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE); - FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK); - FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS); } + FLOWER_PUT_MASKED_VALUE(ct_state, TCA_FLOWER_KEY_CT_STATE); + FLOWER_PUT_MASKED_VALUE(ct_zone, TCA_FLOWER_KEY_CT_ZONE); + FLOWER_PUT_MASKED_VALUE(ct_mark, TCA_FLOWER_KEY_CT_MARK); + FLOWER_PUT_MASKED_VALUE(ct_label, TCA_FLOWER_KEY_CT_LABELS); + if (host_eth_type == ETH_P_IP) { FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_src, TCA_FLOWER_KEY_IPV4_SRC); FLOWER_PUT_MASKED_VALUE(ipv4.ipv4_dst, TCA_FLOWER_KEY_IPV4_DST); @@ -2993,12 +3050,79 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) return 0; } +static void +log_tc_flower_match(const char *msg, + const struct tc_flower *a, + const struct tc_flower *b) +{ + uint8_t key_a[sizeof(struct tc_flower_key)]; + uint8_t key_b[sizeof(struct tc_flower_key)]; + struct ds s = DS_EMPTY_INITIALIZER; + + for (int i = 0; i < sizeof a->key; i++) { + uint8_t mask_a = ((uint8_t *) &a->mask)[i]; + uint8_t mask_b = ((uint8_t *) &b->mask)[i]; + + key_a[i] = ((uint8_t *) &a->key)[i] & mask_a; + key_b[i] = ((uint8_t *) &b->key)[i] & mask_b; + } + ds_put_cstr(&s, "\nExpected Mask:\n"); + ds_put_hex(&s, &a->mask, sizeof a->mask); + ds_put_cstr(&s, "\nReceived Mask:\n"); + ds_put_hex(&s, &b->mask, sizeof b->mask); + ds_put_cstr(&s, "\nExpected Key:\n"); + ds_put_hex(&s, &a->key, sizeof a->key); + ds_put_cstr(&s, "\nReceived Key:\n"); + ds_put_hex(&s, &b->key, sizeof b->key); + ds_put_cstr(&s, "\nExpected Masked Key:\n"); + ds_put_hex(&s, key_a, sizeof key_a); + ds_put_cstr(&s, "\nReceived Masked Key:\n"); + ds_put_hex(&s, key_b, sizeof key_b); + + if (a->action_count != b->action_count) { + /* If action count is not equal, we print all actions to see which + * ones are missing. */ + const struct tc_action *action; + int i; + + ds_put_cstr(&s, "\nExpected Actions:\n"); + for (i = 0, action = a->actions; i < a->action_count; i++, action++) { + ds_put_cstr(&s, " - "); + ds_put_hex(&s, action, sizeof *action); + ds_put_cstr(&s, "\n"); + } + ds_put_cstr(&s, "Received Actions:\n"); + for (i = 0, action = b->actions; i < b->action_count; i++, action++) { + ds_put_cstr(&s, " - "); + ds_put_hex(&s, action, sizeof *action); + ds_put_cstr(&s, "\n"); + } + } else { + /* Only dump the delta in actions. */ + const struct tc_action *action_a = a->actions; + const struct tc_action *action_b = b->actions; + + for (int i = 0; i < a->action_count; i++, action_a++, action_b++) { + if (memcmp(action_a, action_b, sizeof *action_a)) { + ds_put_format(&s, + "\nAction %d mismatch:\n - Expected Action: ", + i); + ds_put_hex(&s, action_a, sizeof *action_a); + ds_put_cstr(&s, "\n - Received Action: "); + ds_put_hex(&s, action_b, sizeof *action_b); + } + } + } + VLOG_DBG_RL(&error_rl, "%s%s", msg, ds_cstr(&s)); + ds_destroy(&s); +} + static bool cmp_tc_flower_match_action(const struct tc_flower *a, const struct tc_flower *b) { if (memcmp(&a->mask, &b->mask, sizeof a->mask)) { - VLOG_DBG_RL(&error_rl, "tc flower compare failed mask compare"); + log_tc_flower_match("tc flower compare failed mask compare:", a, b); return false; } @@ -3011,8 +3135,8 @@ cmp_tc_flower_match_action(const struct tc_flower *a, uint8_t key_b = ((uint8_t *)&b->key)[i] & mask; if (key_a != key_b) { - VLOG_DBG_RL(&error_rl, "tc flower compare failed key compare at " - "%d", i); + log_tc_flower_match("tc flower compare failed masked key compare:", + a, b); return false; } } @@ -3022,14 +3146,15 @@ cmp_tc_flower_match_action(const struct tc_flower *a, const struct tc_action *action_b = b->actions; if (a->action_count != b->action_count) { - VLOG_DBG_RL(&error_rl, "tc flower compare failed action length check"); + log_tc_flower_match("tc flower compare failed action length check", + a, b); return false; } for (int i = 0; i < a->action_count; i++, action_a++, action_b++) { if (memcmp(action_a, action_b, sizeof *action_a)) { - VLOG_DBG_RL(&error_rl, "tc flower compare failed action compare " - "for %d", i); + log_tc_flower_match("tc flower compare failed action compare", + a, b); return false; } } diff --git a/lib/tc.h b/lib/tc.h index a147ca461d..d6cdddd169 100644 --- a/lib/tc.h +++ b/lib/tc.h @@ -256,11 +256,23 @@ struct tc_action { bool force; bool commit; } ct; + + struct { + struct tc_flower_key key; + struct tc_flower_key mask; + } rewrite; }; enum tc_action_type type; }; +/* assert that if we overflow with a masked write of uint32_t to the last byte + * of action.rewrite we overflow inside struct tc_action. + * shouldn't happen unless someone moves rewrite to the end of action */ +BUILD_ASSERT_DECL(offsetof(struct tc_action, rewrite) + + MEMBER_SIZEOF(struct tc_action, rewrite) + + sizeof(uint32_t) - 2 < sizeof(struct tc_action)); + enum tc_offloaded_state { TC_OFFLOADED_STATE_UNDEFINED, TC_OFFLOADED_STATE_IN_HW, @@ -330,15 +342,10 @@ struct tc_flower { int action_count; struct tc_action actions[TCA_ACT_MAX_NUM]; - struct ovs_flow_stats stats; + struct ovs_flow_stats stats_sw; + struct ovs_flow_stats stats_hw; uint64_t lastused; - struct { - bool rewrite; - struct tc_flower_key key; - struct tc_flower_key mask; - } rewrite; - uint32_t csum_update_flags; bool tunnel; @@ -352,13 +359,6 @@ struct tc_flower { enum tc_offload_policy tc_policy; }; -/* assert that if we overflow with a masked write of uint32_t to the last byte - * of flower.rewrite we overflow inside struct flower. - * shouldn't happen unless someone moves rewrite to the end of flower */ -BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite) - + MEMBER_SIZEOF(struct tc_flower, rewrite) - + sizeof(uint32_t) - 2 < sizeof(struct tc_flower)); - int tc_replace_flower(struct tcf_id *id, struct tc_flower *flower); int tc_del_filter(struct tcf_id *id); int tc_get_flower(struct tcf_id *id, struct tc_flower *flower); diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c index 58269d3b16..050eafa6b8 100644 --- a/lib/tnl-ports.c +++ b/lib/tnl-ports.c @@ -71,7 +71,7 @@ tnl_port_cast(const struct cls_rule *cr) { BUILD_ASSERT_DECL(offsetof(struct tnl_port_in, cr) == 0); - return CONTAINER_OF(cr, struct tnl_port_in, cr); + return cr ? CONTAINER_OF(cr, struct tnl_port_in, cr) : NULL; } static void @@ -259,14 +259,14 @@ ipdev_map_delete(struct ip_device *ip_dev, ovs_be16 tp_port, uint8_t nw_proto) void tnl_port_map_delete(odp_port_t port, const char type[]) { - struct tnl_port *p, *next; + struct tnl_port *p; struct ip_device *ip_dev; uint8_t nw_proto; nw_proto = tnl_type_to_nw_proto(type); ovs_mutex_lock(&mutex); - LIST_FOR_EACH_SAFE(p, next, node, &port_list) { + LIST_FOR_EACH_SAFE (p, node, &port_list) { if (p->port == port && p->nw_proto == nw_proto && ovs_refcount_unref_relaxed(&p->ref_cnt) == 1) { ovs_list_remove(&p->node); @@ -444,11 +444,11 @@ delete_ipdev(struct ip_device *ip_dev) void tnl_port_map_insert_ipdev(const char dev_name[]) { - struct ip_device *ip_dev, *next; + struct ip_device *ip_dev; ovs_mutex_lock(&mutex); - LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) { + LIST_FOR_EACH_SAFE (ip_dev, node, &addr_list) { if (!strcmp(netdev_get_name(ip_dev->dev), dev_name)) { if (ip_dev->change_seq == netdev_get_change_seq(ip_dev->dev)) { goto out; @@ -466,10 +466,10 @@ out: void tnl_port_map_delete_ipdev(const char dev_name[]) { - struct ip_device *ip_dev, *next; + struct ip_device *ip_dev; ovs_mutex_lock(&mutex); - LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) { + LIST_FOR_EACH_SAFE (ip_dev, node, &addr_list) { if (!strcmp(netdev_get_name(ip_dev->dev), dev_name)) { delete_ipdev(ip_dev); } @@ -480,10 +480,10 @@ tnl_port_map_delete_ipdev(const char dev_name[]) void tnl_port_map_run(void) { - struct ip_device *ip_dev, *next; + struct ip_device *ip_dev; ovs_mutex_lock(&mutex); - LIST_FOR_EACH_SAFE(ip_dev, next, node, &addr_list) { + LIST_FOR_EACH_SAFE (ip_dev, node, &addr_list) { char dev_name[IFNAMSIZ]; if (ip_dev->change_seq == netdev_get_change_seq(ip_dev->dev)) { diff --git a/lib/unixctl.c b/lib/unixctl.c index 69aed6722c..103357ee91 100644 --- a/lib/unixctl.c +++ b/lib/unixctl.c @@ -390,8 +390,8 @@ unixctl_server_run(struct unixctl_server *server) } } - struct unixctl_conn *conn, *next; - LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) { + struct unixctl_conn *conn; + LIST_FOR_EACH_SAFE (conn, node, &server->conns) { int error = run_connection(conn); if (error && error != EAGAIN) { kill_connection(conn); @@ -422,9 +422,9 @@ void unixctl_server_destroy(struct unixctl_server *server) { if (server) { - struct unixctl_conn *conn, *next; + struct unixctl_conn *conn; - LIST_FOR_EACH_SAFE (conn, next, node, &server->conns) { + LIST_FOR_EACH_SAFE (conn, node, &server->conns) { kill_connection(conn); } diff --git a/lib/vconn.c b/lib/vconn.c index 7415e6291f..b556762277 100644 --- a/lib/vconn.c +++ b/lib/vconn.c @@ -960,8 +960,8 @@ vconn_transact_multipart(struct vconn *vconn, ovs_list_init(replies); /* Send all the requests. */ - struct ofpbuf *b, *next; - LIST_FOR_EACH_SAFE (b, next, list_node, requests) { + struct ofpbuf *b; + LIST_FOR_EACH_SAFE (b, list_node, requests) { ovs_list_remove(&b->list_node); int error = vconn_send_block(vconn, b); if (error) { diff --git a/ofproto/bond.c b/ofproto/bond.c index cdfdf0b9d8..845f69e21d 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -338,7 +338,7 @@ static void update_recirc_rules__(struct bond *bond) { struct match match; - struct bond_pr_rule_op *pr_op, *next_op; + struct bond_pr_rule_op *pr_op; uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; int i; @@ -372,7 +372,7 @@ update_recirc_rules__(struct bond *bond) ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); - HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) { + HMAP_FOR_EACH_SAFE (pr_op, hmap_node, &bond->pr_rule_ops) { int error; switch (pr_op->op) { case ADD: @@ -1258,7 +1258,7 @@ insert_bal(struct ovs_list *bals, struct bond_member *member) break; } } - ovs_list_insert(&pos->bal_node, &member->bal_node); + ovs_list_insert(pos ? &pos->bal_node : bals, &member->bal_node); } /* Removes 'member' from its current list and then inserts it into 'bals' so diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index fa8f6cd0e8..172a58cfb7 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -310,8 +310,8 @@ connmgr_destroy(struct connmgr *mgr) return; } - struct ofservice *ofservice, *next_ofservice; - HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, hmap_node, &mgr->services) { + struct ofservice *ofservice; + HMAP_FOR_EACH_SAFE (ofservice, hmap_node, &mgr->services) { ofservice_destroy(ofservice); } hmap_destroy(&mgr->services); @@ -351,8 +351,8 @@ connmgr_run(struct connmgr *mgr, } } - struct ofconn *ofconn, *next_ofconn; - LIST_FOR_EACH_SAFE (ofconn, next_ofconn, connmgr_node, &mgr->conns) { + struct ofconn *ofconn; + LIST_FOR_EACH_SAFE (ofconn, connmgr_node, &mgr->conns) { ofconn_run(ofconn, handle_openflow); } ofmonitor_run(mgr); @@ -592,8 +592,8 @@ connmgr_set_controllers(struct connmgr *mgr, struct shash *controllers) /* Delete services that are no longer configured. * Update configuration of all now-existing services. */ - struct ofservice *ofservice, *next_ofservice; - HMAP_FOR_EACH_SAFE (ofservice, next_ofservice, hmap_node, &mgr->services) { + struct ofservice *ofservice; + HMAP_FOR_EACH_SAFE (ofservice, hmap_node, &mgr->services) { const char *target = ofservice->target; struct ofproto_controller *c = shash_find_data(controllers, target); if (!c) { @@ -1137,9 +1137,9 @@ ofconn_remove_bundle(struct ofconn *ofconn, struct ofp_bundle *bundle) static void bundle_remove_all(struct ofconn *ofconn) { - struct ofp_bundle *b, *next; + struct ofp_bundle *b; - HMAP_FOR_EACH_SAFE (b, next, node, &ofconn->bundles) { + HMAP_FOR_EACH_SAFE (b, node, &ofconn->bundles) { ofp_bundle_remove__(ofconn, b); } } @@ -1149,8 +1149,8 @@ bundle_remove_expired(struct ofconn *ofconn, long long int now) { long long int limit = now - bundle_idle_timeout; - struct ofp_bundle *b, *next; - HMAP_FOR_EACH_SAFE (b, next, node, &ofconn->bundles) { + struct ofp_bundle *b; + HMAP_FOR_EACH_SAFE (b, node, &ofconn->bundles) { if (b->used <= limit) { ofconn_send_error(ofconn, b->msg, OFPERR_OFPBFC_TIMEOUT); ofp_bundle_remove__(ofconn, b); @@ -1247,8 +1247,8 @@ ofconn_destroy(struct ofconn *ofconn) free(ofconn->async_cfg); - struct ofmonitor *monitor, *next_monitor; - HMAP_FOR_EACH_SAFE (monitor, next_monitor, ofconn_node, + struct ofmonitor *monitor; + HMAP_FOR_EACH_SAFE (monitor, ofconn_node, &ofconn->monitors) { ofmonitor_destroy(monitor); } @@ -1953,8 +1953,8 @@ static void ofservice_close_all(struct ofservice *ofservice) OVS_REQUIRES(ofproto_mutex) { - struct ofconn *ofconn, *next; - LIST_FOR_EACH_SAFE (ofconn, next, ofservice_node, &ofservice->conns) { + struct ofconn *ofconn; + LIST_FOR_EACH_SAFE (ofconn, ofservice_node, &ofservice->conns) { ofconn_destroy(ofconn); } } diff --git a/ofproto/in-band.c b/ofproto/in-band.c index 82d8dfa147..3992251f5f 100644 --- a/ofproto/in-band.c +++ b/ofproto/in-band.c @@ -377,7 +377,7 @@ in_band_run(struct in_band *ib) uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; - struct in_band_rule *rule, *next; + struct in_band_rule *rule; ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); @@ -391,7 +391,7 @@ in_band_run(struct in_band *ib) update_rules(ib); - HMAP_FOR_EACH_SAFE (rule, next, hmap_node, &ib->rules) { + HMAP_FOR_EACH_SAFE (rule, hmap_node, &ib->rules) { switch (rule->op) { case ADD: ofproto_add_flow(ib->ofproto, &rule->match, rule->priority, diff --git a/ofproto/netflow.c b/ofproto/netflow.c index ed58de17de..aad9f9c77a 100644 --- a/ofproto/netflow.c +++ b/ofproto/netflow.c @@ -299,7 +299,7 @@ static void netflow_run__(struct netflow *nf) OVS_REQUIRES(mutex) { long long int now = time_msec(); - struct netflow_flow *nf_flow, *next; + struct netflow_flow *nf_flow; if (nf->packet.size) { collectors_send(nf->collectors, nf->packet.data, nf->packet.size); @@ -312,7 +312,7 @@ netflow_run__(struct netflow *nf) OVS_REQUIRES(mutex) nf->next_timeout = now + 1000; - HMAP_FOR_EACH_SAFE (nf_flow, next, hmap_node, &nf->flows) { + HMAP_FOR_EACH_SAFE (nf_flow, hmap_node, &nf->flows) { if (now > nf_flow->last_expired + nf->active_timeout) { bool idle = nf_flow->used < nf_flow->last_expired; netflow_expire__(nf, nf_flow); @@ -416,8 +416,8 @@ netflow_unref(struct netflow *nf) collectors_destroy(nf->collectors); ofpbuf_uninit(&nf->packet); - struct netflow_flow *nf_flow, *next; - HMAP_FOR_EACH_SAFE (nf_flow, next, hmap_node, &nf->flows) { + struct netflow_flow *nf_flow; + HMAP_FOR_EACH_SAFE (nf_flow, hmap_node, &nf->flows) { hmap_remove(&nf->flows, &nf_flow->hmap_node); free(nf_flow); } diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c index 9280e008ea..742eed3998 100644 --- a/ofproto/ofproto-dpif-ipfix.c +++ b/ofproto/ofproto-dpif-ipfix.c @@ -926,17 +926,21 @@ dpif_ipfix_bridge_exporter_destroy(struct dpif_ipfix_bridge_exporter *exporter) static void dpif_ipfix_bridge_exporter_set_options( struct dpif_ipfix_bridge_exporter *exporter, - const struct ofproto_ipfix_bridge_exporter_options *options) + const struct ofproto_ipfix_bridge_exporter_options *options, + bool *options_changed) { - bool options_changed; - if (!options || sset_is_empty(&options->targets)) { /* No point in doing any work if there are no targets. */ - dpif_ipfix_bridge_exporter_clear(exporter); + if (exporter->options) { + dpif_ipfix_bridge_exporter_clear(exporter); + *options_changed = true; + } else { + *options_changed = false; + } return; } - options_changed = ( + *options_changed = ( !exporter->options || !ofproto_ipfix_bridge_exporter_options_equal( options, exporter->options)); @@ -945,7 +949,7 @@ dpif_ipfix_bridge_exporter_set_options( * shortchanged in collectors (which indicates that opening one or * more of the configured collectors failed, so that we should * retry). */ - if (options_changed + if (*options_changed || collectors_count(exporter->exporter.collectors) < sset_count(&options->targets)) { if (!dpif_ipfix_exporter_set_options( @@ -957,7 +961,7 @@ dpif_ipfix_bridge_exporter_set_options( } /* Avoid reconfiguring if options didn't change. */ - if (!options_changed) { + if (!*options_changed) { return; } @@ -1015,17 +1019,21 @@ dpif_ipfix_flow_exporter_destroy(struct dpif_ipfix_flow_exporter *exporter) static bool dpif_ipfix_flow_exporter_set_options( struct dpif_ipfix_flow_exporter *exporter, - const struct ofproto_ipfix_flow_exporter_options *options) + const struct ofproto_ipfix_flow_exporter_options *options, + bool *options_changed) { - bool options_changed; - if (sset_is_empty(&options->targets)) { /* No point in doing any work if there are no targets. */ - dpif_ipfix_flow_exporter_clear(exporter); + if (exporter->options) { + dpif_ipfix_flow_exporter_clear(exporter); + *options_changed = true; + } else { + *options_changed = false; + } return true; } - options_changed = ( + *options_changed = ( !exporter->options || !ofproto_ipfix_flow_exporter_options_equal( options, exporter->options)); @@ -1034,7 +1042,7 @@ dpif_ipfix_flow_exporter_set_options( * shortchanged in collectors (which indicates that opening one or * more of the configured collectors failed, so that we should * retry). */ - if (options_changed + if (*options_changed || collectors_count(exporter->exporter.collectors) < sset_count(&options->targets)) { if (!dpif_ipfix_exporter_set_options( @@ -1046,7 +1054,7 @@ dpif_ipfix_flow_exporter_set_options( } /* Avoid reconfiguring if options didn't change. */ - if (!options_changed) { + if (!*options_changed) { return true; } @@ -1069,7 +1077,7 @@ remove_flow_exporter(struct dpif_ipfix *di, free(node); } -void +bool dpif_ipfix_set_options( struct dpif_ipfix *di, const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options, @@ -1077,16 +1085,19 @@ dpif_ipfix_set_options( size_t n_flow_exporters_options) OVS_EXCLUDED(mutex) { int i; + bool beo_changed, feo_changed, entry_changed; struct ofproto_ipfix_flow_exporter_options *options; - struct dpif_ipfix_flow_exporter_map_node *node, *next; + struct dpif_ipfix_flow_exporter_map_node *node; ovs_mutex_lock(&mutex); dpif_ipfix_bridge_exporter_set_options(&di->bridge_exporter, - bridge_exporter_options); + bridge_exporter_options, + &beo_changed); /* Add new flow exporters and update current flow exporters. */ options = (struct ofproto_ipfix_flow_exporter_options *) flow_exporters_options; + feo_changed = false; for (i = 0; i < n_flow_exporters_options; i++) { node = dpif_ipfix_find_flow_exporter_map_node( di, options->collector_set_id); @@ -1095,15 +1106,19 @@ dpif_ipfix_set_options( dpif_ipfix_flow_exporter_init(&node->exporter); hmap_insert(&di->flow_exporter_map, &node->node, hash_int(options->collector_set_id, 0)); + feo_changed = true; } - if (!dpif_ipfix_flow_exporter_set_options(&node->exporter, options)) { + if (!dpif_ipfix_flow_exporter_set_options(&node->exporter, + options, + &entry_changed)) { remove_flow_exporter(di, node); } + feo_changed = entry_changed ? true : feo_changed; options++; } /* Remove dropped flow exporters, if any needs to be removed. */ - HMAP_FOR_EACH_SAFE (node, next, node, &di->flow_exporter_map) { + HMAP_FOR_EACH_SAFE (node, node, &di->flow_exporter_map) { /* This is slow but doesn't take any extra memory, and * this table is not supposed to contain many rows anyway. */ options = (struct ofproto_ipfix_flow_exporter_options *) @@ -1117,10 +1132,12 @@ dpif_ipfix_set_options( } if (i == n_flow_exporters_options) { /* Not found. */ remove_flow_exporter(di, node); + feo_changed = true; } } ovs_mutex_unlock(&mutex); + return beo_changed || feo_changed; } struct dpif_ipfix * @@ -1215,7 +1232,7 @@ static void dpif_ipfix_clear(struct dpif_ipfix *di) OVS_REQUIRES(mutex) { struct dpif_ipfix_flow_exporter_map_node *exp_node; - struct dpif_ipfix_port *dip, *next; + struct dpif_ipfix_port *dip; dpif_ipfix_bridge_exporter_clear(&di->bridge_exporter); @@ -1224,7 +1241,7 @@ dpif_ipfix_clear(struct dpif_ipfix *di) OVS_REQUIRES(mutex) free(exp_node); } - HMAP_FOR_EACH_SAFE (dip, next, hmap_node, &di->ports) { + HMAP_FOR_EACH_SAFE (dip, hmap_node, &di->ports) { dpif_ipfix_del_port__(di, dip); } } @@ -2799,7 +2816,7 @@ dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter, bool forced_end, const uint64_t export_time_usec, const uint32_t export_time_sec) { - struct ipfix_flow_cache_entry *entry, *next_entry; + struct ipfix_flow_cache_entry *entry; uint64_t max_flow_start_timestamp_usec; bool template_msg_sent = false; enum ipfix_flow_end_reason flow_end_reason; @@ -2811,7 +2828,7 @@ dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter, max_flow_start_timestamp_usec = export_time_usec - 1000000LL * exporter->cache_active_timeout; - LIST_FOR_EACH_SAFE (entry, next_entry, cache_flow_start_timestamp_list_node, + LIST_FOR_EACH_SAFE (entry, cache_flow_start_timestamp_list_node, &exporter->cache_flow_start_timestamp_list) { if (forced_end) { flow_end_reason = FORCED_END; diff --git a/ofproto/ofproto-dpif-ipfix.h b/ofproto/ofproto-dpif-ipfix.h index 1f42cd5275..75c0ab81ac 100644 --- a/ofproto/ofproto-dpif-ipfix.h +++ b/ofproto/ofproto-dpif-ipfix.h @@ -48,7 +48,7 @@ bool dpif_ipfix_get_bridge_exporter_output_sampling(const struct dpif_ipfix *); bool dpif_ipfix_get_flow_exporter_tunnel_sampling(const struct dpif_ipfix *, const uint32_t); bool dpif_ipfix_is_tunnel_port(const struct dpif_ipfix *, odp_port_t); -void dpif_ipfix_set_options( +bool dpif_ipfix_set_options( struct dpif_ipfix *, const struct ofproto_ipfix_bridge_exporter_options *, const struct ofproto_ipfix_flow_exporter_options *, size_t); diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index 30e7caf54a..e8e1de920b 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -468,7 +468,8 @@ sflow_choose_agent_address(const char *agent_device, const char *target; SSET_FOR_EACH (target, targets) { struct sockaddr_storage ss; - if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &ss, true)) { + if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, + &ss, true, NULL)) { /* sFlow only supports target in default routing table with * packet mark zero. */ @@ -590,10 +591,10 @@ void dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex) { if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) { - struct dpif_sflow_port *dsp, *next; + struct dpif_sflow_port *dsp; dpif_sflow_clear(ds); - HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) { + HMAP_FOR_EACH_SAFE (dsp, hmap_node, &ds->ports) { dpif_sflow_del_port__(ds, dsp); } hmap_destroy(&ds->ports); diff --git a/ofproto/ofproto-dpif-trace.c b/ofproto/ofproto-dpif-trace.c index 78a54c715d..109940ad2a 100644 --- a/ofproto/ofproto-dpif-trace.c +++ b/ofproto/ofproto-dpif-trace.c @@ -65,8 +65,8 @@ static void oftrace_node_list_destroy(struct ovs_list *nodes) { if (nodes) { - struct oftrace_node *node, *next; - LIST_FOR_EACH_SAFE (node, next, node, nodes) { + struct oftrace_node *node; + LIST_FOR_EACH_SAFE (node, node, nodes) { ovs_list_remove(&node->node); oftrace_node_destroy(node); } diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c index dcc91cb380..9224ee2e6d 100644 --- a/ofproto/ofproto-dpif-xlate-cache.c +++ b/ofproto/ofproto-dpif-xlate-cache.c @@ -209,6 +209,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) { switch (entry->type) { case XC_TABLE: + ofproto_unref(&(entry->table.ofproto->up)); break; case XC_RULE: ofproto_rule_unref(&entry->rule->up); @@ -231,6 +232,7 @@ xlate_cache_clear_entry(struct xc_entry *entry) free(entry->learn.ofm); break; case XC_NORMAL: + ofproto_unref(&(entry->normal.ofproto->up)); break; case XC_FIN_TIMEOUT: /* 'u.fin.rule' is always already held as a XC_RULE, which diff --git a/ofproto/ofproto-dpif-xlate-cache.h b/ofproto/ofproto-dpif-xlate-cache.h index 114aff8ea3..0fc6d2ea60 100644 --- a/ofproto/ofproto-dpif-xlate-cache.h +++ b/ofproto/ofproto-dpif-xlate-cache.h @@ -61,9 +61,8 @@ enum xc_type { * that a flow relates to, although they may be used for other effects as well * (for instance, refreshing hard timeouts for learned flows). * - * An explicit reference is taken to all pointers other than the ones for - * struct ofproto_dpif. ofproto_dpif pointers are explicitly protected by - * destroying all xlate caches before the ofproto is destroyed. */ + * An explicit reference is taken to all pointers. + */ struct xc_entry { enum xc_type type; union { diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 578cbfe581..7716c22f49 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -865,7 +865,7 @@ xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge) ovs_list_init(&xbridge->xbundles); hmap_init(&xbridge->xports); hmap_insert(&xcfg->xbridges, &xbridge->hmap_node, - hash_pointer(xbridge->ofproto, 0)); + uuid_hash(&xbridge->ofproto->uuid)); } static void @@ -1222,13 +1222,13 @@ xlate_txn_start(void) static void xlate_xcfg_free(struct xlate_cfg *xcfg) { - struct xbridge *xbridge, *next_xbridge; + struct xbridge *xbridge; if (!xcfg) { return; } - HMAP_FOR_EACH_SAFE (xbridge, next_xbridge, hmap_node, &xcfg->xbridges) { + HMAP_FOR_EACH_SAFE (xbridge, hmap_node, &xcfg->xbridges) { xlate_xbridge_remove(xcfg, xbridge); } @@ -1282,18 +1282,18 @@ xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name, static void xlate_xbridge_remove(struct xlate_cfg *xcfg, struct xbridge *xbridge) { - struct xbundle *xbundle, *next_xbundle; - struct xport *xport, *next_xport; + struct xbundle *xbundle; + struct xport *xport; if (!xbridge) { return; } - HMAP_FOR_EACH_SAFE (xport, next_xport, ofp_node, &xbridge->xports) { + HMAP_FOR_EACH_SAFE (xport, ofp_node, &xbridge->xports) { xlate_xport_remove(xcfg, xport); } - LIST_FOR_EACH_SAFE (xbundle, next_xbundle, list_node, &xbridge->xbundles) { + LIST_FOR_EACH_SAFE (xbundle, list_node, &xbridge->xbundles) { xlate_xbundle_remove(xcfg, xbundle); } @@ -1639,7 +1639,7 @@ xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto) xbridges = &xcfg->xbridges; - HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0), + HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, uuid_hash(&ofproto->uuid), xbridges) { if (xbridge->ofproto == ofproto) { return xbridge; @@ -1661,6 +1661,23 @@ xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid) return NULL; } +struct ofproto_dpif * +xlate_ofproto_lookup(const struct uuid *uuid) +{ + struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); + struct xbridge *xbridge; + + if (!xcfg) { + return NULL; + } + + xbridge = xbridge_lookup_by_uuid(xcfg, uuid); + if (xbridge != NULL) { + return xbridge->ofproto; + } + return NULL; +} + static struct xbundle * xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle) { @@ -2125,9 +2142,14 @@ mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle, int snaplen; /* Get the details of the mirror represented by the rightmost 1-bit. */ - ovs_assert(mirror_get(xbridge->mbridge, raw_ctz(mirrors), - &vlans, &dup_mirrors, - &out, &snaplen, &out_vlan)); + if (OVS_UNLIKELY(!mirror_get(xbridge->mbridge, raw_ctz(mirrors), + &vlans, &dup_mirrors, + &out, &snaplen, &out_vlan))) { + /* The mirror got reconfigured before we got to read it's + * configuration. */ + mirrors = zero_rightmost_1bit(mirrors); + continue; + } /* If this mirror selects on the basis of VLAN, and it does not select @@ -3015,7 +3037,7 @@ xlate_normal(struct xlate_ctx *ctx) bool is_grat_arp = is_gratuitous_arp(flow, wc); if (ctx->xin->allow_side_effects && flow->packet_type == htonl(PT_ETH) - && in_port->pt_mode != NETDEV_PT_LEGACY_L3 + && in_port && in_port->pt_mode != NETDEV_PT_LEGACY_L3 ) { update_learning_table(ctx, in_xbundle, flow->dl_src, vlan, is_grat_arp); @@ -3024,12 +3046,14 @@ xlate_normal(struct xlate_ctx *ctx) struct xc_entry *entry; /* Save just enough info to update mac learning table later. */ - entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL); - entry->normal.ofproto = ctx->xbridge->ofproto; - entry->normal.in_port = flow->in_port.ofp_port; - entry->normal.dl_src = flow->dl_src; - entry->normal.vlan = vlan; - entry->normal.is_gratuitous_arp = is_grat_arp; + if (ofproto_try_ref(&ctx->xbridge->ofproto->up)) { + entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL); + entry->normal.ofproto = ctx->xbridge->ofproto; + entry->normal.in_port = flow->in_port.ofp_port; + entry->normal.dl_src = flow->dl_src; + entry->normal.vlan = vlan; + entry->normal.is_gratuitous_arp = is_grat_arp; + } } /* Determine output bundle. */ @@ -3048,7 +3072,6 @@ xlate_normal(struct xlate_ctx *ctx) */ ctx->xout->slow |= SLOW_ACTION; - memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src); if (mcast_snooping_is_membership(flow->tp_src) || mcast_snooping_is_query(flow->tp_src)) { if (ctx->xin->allow_side_effects && ctx->xin->packet) { @@ -3523,6 +3546,9 @@ propagate_tunnel_data_to_flow__(struct flow *dst_flow, dst_flow->dl_dst = dmac; dst_flow->dl_src = smac; + /* Clear VLAN entries which do not apply for tunnel flows. */ + memset(dst_flow->vlans, 0, sizeof dst_flow->vlans); + dst_flow->packet_type = htonl(PT_ETH); dst_flow->nw_dst = src_flow->tunnel.ip_dst; dst_flow->nw_src = src_flow->tunnel.ip_src; @@ -3654,14 +3680,27 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport, err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac); if (err) { + struct in6_addr nh_s_ip6 = in6addr_any; + xlate_report(ctx, OFT_DETAIL, "neighbor cache miss for %s on bridge %s, " "sending %s request", buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND"); + + err = ovs_router_get_netdev_source_address(&d_ip6, + out_dev->xbridge->name, + &nh_s_ip6); + if (err) { + nh_s_ip6 = s_ip6; + } + if (d_ip) { - tnl_send_arp_request(ctx, out_dev, smac, s_ip, d_ip); + ovs_be32 nh_s_ip; + + nh_s_ip = in6_addr_get_mapped_ipv4(&nh_s_ip6); + tnl_send_arp_request(ctx, out_dev, smac, nh_s_ip, d_ip); } else { - tnl_send_nd_request(ctx, out_dev, smac, &s_ip6, &d_ip6); + tnl_send_nd_request(ctx, out_dev, smac, &nh_s_ip6, &d_ip6); } return err; } @@ -4176,6 +4215,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, if (xport->pt_mode == NETDEV_PT_LEGACY_L3) { flow->packet_type = PACKET_TYPE_BE(OFPHTN_ETHERTYPE, ntohs(flow->dl_type)); + if (ctx->pending_encap) { + /* The Ethernet header was not actually added yet. */ + ctx->pending_encap = false; + } } } @@ -5622,7 +5665,8 @@ xlate_sample_action(struct xlate_ctx *ctx, /* Scale the probability from 16-bit to 32-bit while representing * the same percentage. */ - uint32_t probability = (os->probability << 16) | os->probability; + uint32_t probability = + ((uint32_t) os->probability << 16) | os->probability; /* If ofp_port in flow sample action is equel to ofp_port, * this sample action is a input port action. */ @@ -7784,6 +7828,12 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) goto exit; } + if (!xin->frozen_state + && xin->flow.ct_state + && xin->flow.ct_state & CS_TRACKED) { + ctx.conntracked = true; + } + /* Tunnel metadata in udpif format must be normalized before translation. */ if (flow->tunnel.flags & FLOW_TNL_F_UDPIF) { const struct tun_table *tun_tab = ofproto_get_tun_tab( diff --git a/ofproto/ofproto-dpif-xlate.h b/ofproto/ofproto-dpif-xlate.h index 851088d794..2ba90e999c 100644 --- a/ofproto/ofproto-dpif-xlate.h +++ b/ofproto/ofproto-dpif-xlate.h @@ -176,6 +176,7 @@ void xlate_ofproto_set(struct ofproto_dpif *, const char *name, struct dpif *, bool forward_bpdu, bool has_in_band, const struct dpif_backer_support *support); void xlate_remove_ofproto(struct ofproto_dpif *); +struct ofproto_dpif *xlate_ofproto_lookup(const struct uuid *uuid); void xlate_bundle_set(struct ofproto_dpif *, struct ofbundle *, const char *name, enum port_vlan_mode, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 8143dd965f..f9562dee87 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -215,10 +215,6 @@ struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers); static struct hmap all_ofproto_dpifs_by_name = HMAP_INITIALIZER(&all_ofproto_dpifs_by_name); -/* All existing ofproto_dpif instances, indexed by ->uuid. */ -static struct hmap all_ofproto_dpifs_by_uuid = - HMAP_INITIALIZER(&all_ofproto_dpifs_by_uuid); - static bool ofproto_use_tnl_push_pop = true; static void ofproto_unixctl_init(void); static void ct_zone_config_init(struct dpif_backer *backer); @@ -1663,7 +1659,7 @@ static int construct(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct shash_node *node, *next; + struct shash_node *node; int error; /* Tunnel module can get used right after the udpif threads are running. */ @@ -1701,7 +1697,7 @@ construct(struct ofproto *ofproto_) ofproto->ams_seqno = seq_read(ofproto->ams_seq); - SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) { + SHASH_FOR_EACH_SAFE (node, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; if (!strcmp(iface_hint->br_name, ofproto->up.name)) { @@ -1720,9 +1716,6 @@ construct(struct ofproto *ofproto_) hmap_insert(&all_ofproto_dpifs_by_name, &ofproto->all_ofproto_dpifs_by_name_node, hash_string(ofproto->up.name, 0)); - hmap_insert(&all_ofproto_dpifs_by_uuid, - &ofproto->all_ofproto_dpifs_by_uuid_node, - uuid_hash(&ofproto->uuid)); memset(&ofproto->stats, 0, sizeof ofproto->stats); ofproto_init_tables(ofproto_, N_TABLES); @@ -1820,8 +1813,6 @@ destruct(struct ofproto *ofproto_, bool del) hmap_remove(&all_ofproto_dpifs_by_name, &ofproto->all_ofproto_dpifs_by_name_node); - hmap_remove(&all_ofproto_dpifs_by_uuid, - &ofproto->all_ofproto_dpifs_by_uuid_node); OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { CLS_FOR_EACH (rule, up.cr, &table->cls) { @@ -1857,6 +1848,8 @@ destruct(struct ofproto *ofproto_, bool del) seq_destroy(ofproto->ams_seq); + /* Wait for all the meter destroy work to finish. */ + ovsrcu_barrier(); close_dpif_backer(ofproto->backer, del); } @@ -1945,7 +1938,7 @@ run(struct ofproto *ofproto_) new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif)); if (ofproto->dump_seq != new_dump_seq) { - struct rule *rule, *next_rule; + struct rule *rule; long long now = time_msec(); /* We know stats are relatively fresh, so now is a good time to do some @@ -1955,7 +1948,7 @@ run(struct ofproto *ofproto_) /* Expire OpenFlow flows whose idle_timeout or hard_timeout * has passed. */ ovs_mutex_lock(&ofproto_mutex); - LIST_FOR_EACH_SAFE (rule, next_rule, expirable, + LIST_FOR_EACH_SAFE (rule, expirable, &ofproto->up.expirable) { rule_expire(rule_dpif_cast(rule), now); } @@ -2346,6 +2339,7 @@ set_ipfix( struct dpif_ipfix *di = ofproto->ipfix; bool has_options = bridge_exporter_options || flow_exporters_options; bool new_di = false; + bool options_changed = false; if (has_options && !di) { di = ofproto->ipfix = dpif_ipfix_create(); @@ -2355,7 +2349,7 @@ set_ipfix( if (di) { /* Call set_options in any case to cleanly flush the flow * caches in the last exporters that are to be destroyed. */ - dpif_ipfix_set_options( + options_changed = dpif_ipfix_set_options( di, bridge_exporter_options, flow_exporters_options, n_flow_exporters_options); @@ -2371,6 +2365,10 @@ set_ipfix( dpif_ipfix_unref(di); ofproto->ipfix = NULL; } + + if (new_di || options_changed) { + ofproto->backer->need_revalidate = REV_RECONFIGURE; + } } return 0; @@ -2493,11 +2491,11 @@ set_lldp(struct ofport *ofport_, { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + bool old_enable = lldp_is_enabled(ofport->lldp); int error = 0; - if (cfg) { + if (cfg && !smap_is_empty(cfg)) { if (!ofport->lldp) { - ofproto->backer->need_revalidate = REV_RECONFIGURE; ofport->lldp = lldp_create(ofport->up.netdev, ofport_->mtu, cfg); } @@ -2509,6 +2507,9 @@ set_lldp(struct ofport *ofport_, } else if (ofport->lldp) { lldp_unref(ofport->lldp); ofport->lldp = NULL; + } + + if (lldp_is_enabled(ofport->lldp) != old_enable) { ofproto->backer->need_revalidate = REV_RECONFIGURE; } @@ -3106,11 +3107,11 @@ bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos) { struct ofproto_dpif *ofproto = bundle->ofproto; struct mac_learning *ml = ofproto->ml; - struct mac_entry *mac, *next_mac; + struct mac_entry *mac; ofproto->backer->need_revalidate = REV_RECONFIGURE; ovs_rwlock_wrlock(&ml->rwlock); - LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) { + LIST_FOR_EACH_SAFE (mac, lru_node, &ml->lrus) { if (mac_entry_get_port(ml, mac) == bundle) { if (all_ofprotos) { struct ofproto_dpif *o; @@ -3141,13 +3142,13 @@ bundle_move(struct ofbundle *old, struct ofbundle *new) { struct ofproto_dpif *ofproto = old->ofproto; struct mac_learning *ml = ofproto->ml; - struct mac_entry *mac, *next_mac; + struct mac_entry *mac; ovs_assert(new->ofproto == old->ofproto); ofproto->backer->need_revalidate = REV_RECONFIGURE; ovs_rwlock_wrlock(&ml->rwlock); - LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) { + LIST_FOR_EACH_SAFE (mac, lru_node, &ml->lrus) { if (mac_entry_get_port(ml, mac) == old) { mac_entry_set_port(ml, mac, new); } @@ -3244,7 +3245,7 @@ static void bundle_destroy(struct ofbundle *bundle) { struct ofproto_dpif *ofproto; - struct ofport_dpif *port, *next_port; + struct ofport_dpif *port; if (!bundle) { return; @@ -3257,7 +3258,7 @@ bundle_destroy(struct ofbundle *bundle) xlate_bundle_remove(bundle); xlate_txn_commit(); - LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { + LIST_FOR_EACH_SAFE (port, bundle_node, &bundle->ports) { bundle_del_port(port); } @@ -3347,9 +3348,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, } } if (!ok || ovs_list_size(&bundle->ports) != s->n_members) { - struct ofport_dpif *next_port; - - LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { + LIST_FOR_EACH_SAFE (port, bundle_node, &bundle->ports) { for (i = 0; i < s->n_members; i++) { if (s->members[i] == port->up.ofp_port) { goto found; @@ -3963,6 +3962,10 @@ port_add(struct ofproto *ofproto_, struct netdev *netdev) simap_put(&ofproto->backer->tnl_backers, dp_port_name, odp_to_u32(port_no)); } + } else { + struct dpif *dpif = ofproto->backer->dpif; + const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif)); + netdev_set_dpif_type(netdev, dpif_type_str); } if (netdev_get_tunnel_config(netdev)) { @@ -4471,12 +4474,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig); } if (xcache) { - struct xc_entry *entry; + if (ofproto_try_ref(&ofproto->up)) { + struct xc_entry *entry; - entry = xlate_cache_add_entry(xcache, XC_TABLE); - entry->table.ofproto = ofproto; - entry->table.id = *table_id; - entry->table.match = true; + entry = xlate_cache_add_entry(xcache, XC_TABLE); + entry->table.ofproto = ofproto; + entry->table.id = *table_id; + entry->table.match = true; + } } return rule; } @@ -4507,12 +4512,14 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, stats->n_packets, &orig); } if (xcache) { - struct xc_entry *entry; + if (ofproto_try_ref(&ofproto->up)) { + struct xc_entry *entry; - entry = xlate_cache_add_entry(xcache, XC_TABLE); - entry->table.ofproto = ofproto; - entry->table.id = next_id; - entry->table.match = (rule != NULL); + entry = xlate_cache_add_entry(xcache, XC_TABLE); + entry->table.ofproto = ofproto; + entry->table.id = next_id; + entry->table.match = (rule != NULL); + } } if (rule) { goto out; /* Match. */ @@ -5550,9 +5557,9 @@ ct_zone_timeout_policy_sweep(struct dpif_backer *backer) { if (!ovs_list_is_empty(&backer->ct_tp_kill_list) && time_msec() >= timeout_policy_cleanup_timer) { - struct ct_timeout_policy *ct_tp, *next; + struct ct_timeout_policy *ct_tp; - LIST_FOR_EACH_SAFE (ct_tp, next, list_node, &backer->ct_tp_kill_list) { + LIST_FOR_EACH_SAFE (ct_tp, list_node, &backer->ct_tp_kill_list) { if (!ct_dpif_del_timeout_policy(backer->dpif, ct_tp->tp_id)) { ovs_list_remove(&ct_tp->list_node); ct_timeout_policy_destroy(ct_tp, backer->tp_ids); @@ -5594,6 +5601,7 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id, ct_timeout_policy_unref(backer, ct_zone->ct_tp); ct_zone->ct_tp = ct_tp; ct_tp->ref_count++; + backer->need_revalidate = REV_RECONFIGURE; } } else { struct ct_zone *new_ct_zone = ct_zone_alloc(zone_id); @@ -5601,6 +5609,7 @@ ct_set_zone_timeout_policy(const char *datapath_type, uint16_t zone_id, cmap_insert(&backer->ct_zones, &new_ct_zone->node, hash_int(zone_id, 0)); ct_tp->ref_count++; + backer->need_revalidate = REV_RECONFIGURE; } } @@ -5617,6 +5626,7 @@ ct_del_zone_timeout_policy(const char *datapath_type, uint16_t zone_id) if (ct_zone) { ct_timeout_policy_unref(backer, ct_zone->ct_tp); ct_zone_remove_and_destroy(backer, ct_zone); + backer->need_revalidate = REV_RECONFIGURE; } } @@ -5818,15 +5828,7 @@ ofproto_dpif_lookup_by_name(const char *name) struct ofproto_dpif * ofproto_dpif_lookup_by_uuid(const struct uuid *uuid) { - struct ofproto_dpif *ofproto; - - HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_by_uuid_node, - uuid_hash(uuid), &all_ofproto_dpifs_by_uuid) { - if (uuid_equals(&ofproto->uuid, uuid)) { - return ofproto; - } - } - return NULL; + return xlate_ofproto_lookup(uuid); } static void diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 14b909973d..47e96e62e1 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -143,6 +143,8 @@ struct ofproto { /* Variable length mf_field mapping. Stores all configured variable length * meta-flow fields (struct mf_field) in a switch. */ struct vl_mff_map vl_mff_map; + /* refcount to this ofproto, held by rule/group/xlate_caches */ + struct ovs_refcount refcount; }; void ofproto_init_tables(struct ofproto *, int n_tables); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 56aeac7209..933f7de2dc 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -549,6 +549,7 @@ ofproto_create(const char *datapath_name, const char *datapath_type, ovs_mutex_init(&ofproto->vl_mff_map.mutex); cmap_init(&ofproto->vl_mff_map.cmap); + ovs_refcount_init(&ofproto->refcount); error = ofproto->ofproto_class->construct(ofproto); if (error) { @@ -1695,9 +1696,33 @@ ofproto_destroy__(struct ofproto *ofproto) ofproto->ofproto_class->dealloc(ofproto); } -/* Destroying rules is doubly deferred, must have 'ofproto' around for them. - * - 1st we defer the removal of the rules from the classifier - * - 2nd we defer the actual destruction of the rules. */ +/* + * Rule destruction requires ofproto to remain accessible. + * Depending on the rule destruction call (shown in below), it can take several + * RCU grace periods before the ofproto reference is not needed anymore. + * The ofproto destruction callback is thus protected by a refcount, + * and such destruction is itself deferred. + * + * remove_rules_postponed (one grace period) + * -> remove_rule_rcu + * -> remove_rule_rcu__ + * -> ofproto_rule_unref -> ref count != 1 + * -> ... more grace periods. + * -> rule_destroy_cb (> 2 grace periods) + * -> free + * + * NOTE: The original ofproto destruction is only deferred by two grace + * periods to keep ofproto accessible. By using refcount together the + * destruction can be deferred for longer time. Now ofproto has 3 states: + * + * state 1: alive, with refcount >= 1 + * state 2: dying, with refcount == 0, however pointer is valid + * state 3: died, memory freed, pointer might be dangling. + * + * We only need to add refcount to certain objects whose destruction can + * take several RCU grace periods (rule, group, xlate_cache). Other + * references to ofproto must be cleared before the 2 RCU grace periods. + */ static void ofproto_destroy_defer__(struct ofproto *ofproto) OVS_EXCLUDED(ofproto_mutex) @@ -1705,11 +1730,31 @@ ofproto_destroy_defer__(struct ofproto *ofproto) ovsrcu_postpone(ofproto_destroy__, ofproto); } +void +ofproto_ref(struct ofproto *ofproto) +{ + ovs_refcount_ref(&ofproto->refcount); +} + +bool +ofproto_try_ref(struct ofproto *ofproto) +{ + return ovs_refcount_try_ref_rcu(&ofproto->refcount); +} + +void +ofproto_unref(struct ofproto *ofproto) +{ + if (ofproto && ovs_refcount_unref(&ofproto->refcount) == 1) { + ovsrcu_postpone(ofproto_destroy_defer__, ofproto); + } +} + void ofproto_destroy(struct ofproto *p, bool del) OVS_EXCLUDED(ofproto_mutex) { - struct ofport *ofport, *next_ofport; + struct ofport *ofport; struct ofport_usage *usage; if (!p) { @@ -1717,7 +1762,7 @@ ofproto_destroy(struct ofproto *p, bool del) } ofproto_flush__(p, del); - HMAP_FOR_EACH_SAFE (ofport, next_ofport, hmap_node, &p->ports) { + HMAP_FOR_EACH_SAFE (ofport, hmap_node, &p->ports) { ofport_destroy(ofport, del); } @@ -1736,8 +1781,7 @@ ofproto_destroy(struct ofproto *p, bool del) p->connmgr = NULL; ovs_mutex_unlock(&ofproto_mutex); - /* Destroying rules is deferred, must have 'ofproto' around for them. */ - ovsrcu_postpone(ofproto_destroy_defer__, p); + ofproto_unref(p); } /* Destroys the datapath with the respective 'name' and 'type'. With the Linux @@ -2782,7 +2826,7 @@ init_ports(struct ofproto *p) { struct ofproto_port_dump dump; struct ofproto_port ofproto_port; - struct shash_node *node, *next; + struct shash_node *node; OFPROTO_PORT_FOR_EACH (&ofproto_port, &dump, p) { const char *name = ofproto_port.name; @@ -2813,7 +2857,7 @@ init_ports(struct ofproto *p) } } - SHASH_FOR_EACH_SAFE(node, next, &init_ofp_ports) { + SHASH_FOR_EACH_SAFE (node, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; if (!strcmp(iface_hint->br_name, p->name)) { @@ -2929,6 +2973,9 @@ ofproto_rule_destroy__(struct rule *rule) cls_rule_destroy(CONST_CAST(struct cls_rule *, &rule->cr)); rule_actions_destroy(rule_get_actions(rule)); ovs_mutex_destroy(&rule->mutex); + /* ofproto_unref() must be called first. It is possible because ofproto + * destruction is deferred by an RCU grace period. */ + ofproto_unref(rule->ofproto); rule->ofproto->ofproto_class->rule_dealloc(rule); } @@ -3069,6 +3116,9 @@ group_destroy_cb(struct ofgroup *group) &group->props)); ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *, &group->buckets)); + /* ofproto_unref() must be called first. It is possible because ofproto + * destruction is deferred by an RCU grace period. */ + ofproto_unref(group->ofproto); group->ofproto->ofproto_class->group_dealloc(group); } @@ -5271,10 +5321,15 @@ ofproto_rule_create(struct ofproto *ofproto, struct cls_rule *cr, struct rule *rule; enum ofperr error; + if (!ofproto_try_ref(ofproto)) { + return OFPERR_OFPFMFC_UNKNOWN; + } + /* Allocate new rule. */ rule = ofproto->ofproto_class->rule_alloc(); if (!rule) { cls_rule_destroy(cr); + ofproto_unref(ofproto); VLOG_WARN_RL(&rl, "%s: failed to allocate a rule.", ofproto->name); return OFPERR_OFPFMFC_UNKNOWN; } @@ -6797,9 +6852,9 @@ static void meter_delete_all(struct ofproto *ofproto) OVS_REQUIRES(ofproto_mutex) { - struct meter *meter, *next; + struct meter *meter; - HMAP_FOR_EACH_SAFE (meter, next, node, &ofproto->meters) { + HMAP_FOR_EACH_SAFE (meter, node, &ofproto->meters) { hmap_remove(&ofproto->meters, &meter->node); meter_destroy(ofproto, meter); } @@ -7339,8 +7394,13 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm, return OFPERR_OFPGMFC_BAD_TYPE; } + if (!ofproto_try_ref(ofproto)) { + return OFPERR_OFPFMFC_UNKNOWN; + } + *ofgroup = ofproto->ofproto_class->group_alloc(); if (!*ofgroup) { + ofproto_unref(ofproto); VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name); return OFPERR_OFPGMFC_OUT_OF_GROUPS; } @@ -7377,6 +7437,7 @@ init_group(struct ofproto *ofproto, const struct ofputil_group_mod *gm, &(*ofgroup)->props)); ofputil_bucket_list_destroy(CONST_CAST(struct ovs_list *, &(*ofgroup)->buckets)); + ofproto_unref(ofproto); ofproto->ofproto_class->group_dealloc(*ofgroup); } return error; @@ -8902,7 +8963,7 @@ eviction_group_hash_rule(struct rule *rule) hash = table->eviction_group_id_basis; miniflow_expand(rule->cr.match.flow, &flow); for (sf = table->eviction_fields; - sf < &table->eviction_fields[table->n_eviction_fields]; + sf && sf < &table->eviction_fields[table->n_eviction_fields]; sf++) { if (mf_are_prereqs_ok(sf->field, &flow, NULL)) { @@ -9138,8 +9199,8 @@ oftable_configure_eviction(struct oftable *table, unsigned int eviction, /* Destroy existing eviction groups, then destroy and recreate data * structures to recover memory. */ - struct eviction_group *evg, *next; - HMAP_FOR_EACH_SAFE (evg, next, id_node, &table->eviction_groups_by_id) { + struct eviction_group *evg; + HMAP_FOR_EACH_SAFE (evg, id_node, &table->eviction_groups_by_id) { eviction_group_destroy(table, evg); } hmap_destroy(&table->eviction_groups_by_id); diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index b0262da2df..4e15167ab7 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -563,6 +563,10 @@ int ofproto_port_get_cfm_status(const struct ofproto *, enum ofputil_table_miss ofproto_table_get_miss_config(const struct ofproto *, uint8_t table_id); +void ofproto_ref(struct ofproto *); +void ofproto_unref(struct ofproto *); +bool ofproto_try_ref(struct ofproto *); + #ifdef __cplusplus } #endif diff --git a/ovsdb/.gitignore b/ovsdb/.gitignore index fbcefafc6e..a4f9d38f11 100644 --- a/ovsdb/.gitignore +++ b/ovsdb/.gitignore @@ -1,5 +1,7 @@ /_server.ovsschema.inc /_server.ovsschema.stamp +/local-config.ovsschema.stamp +/ovsdb.local-config.5 /ovsdb-client /ovsdb-client.1 /ovsdb-doc diff --git a/ovsdb/automake.mk b/ovsdb/automake.mk index 62cc02686f..3b3140102b 100644 --- a/ovsdb/automake.mk +++ b/ovsdb/automake.mk @@ -148,4 +148,25 @@ ovsdb/ovsdb-server.5: \ $(srcdir)/ovsdb/_server.xml > $@.tmp && \ mv $@.tmp $@ +EXTRA_DIST += ovsdb/local-config.ovsschema +pkgdata_DATA += ovsdb/local-config.ovsschema + +# Version checking for local-config.ovsschema. +ALL_LOCAL += ovsdb/local-config.ovsschema.stamp +ovsdb/local-config.ovsschema.stamp: ovsdb/local-config.ovsschema + $(srcdir)/build-aux/cksum-schema-check $? $@ +CLEANFILES += ovsdb/local-config.ovsschema.stamp + +# Local_Config schema documentation +EXTRA_DIST += ovsdb/local-config.xml +CLEANFILES += ovsdb/ovsdb.local-config.5 +man_MANS += ovsdb/ovsdb.local-config.5 +ovsdb/ovsdb.local-config.5: \ + ovsdb/ovsdb-doc ovsdb/ ovsdb/local-config.xml ovsdb/local-config.ovsschema + $(AM_V_GEN)$(OVSDB_DOC) \ + --version=$(VERSION) \ + $(srcdir)/ovsdb/local-config.ovsschema \ + $(srcdir)/ovsdb/local-config.xml > $@.tmp && \ + mv $@.tmp $@ + EXTRA_DIST += ovsdb/TODO.rst diff --git a/ovsdb/condition.c b/ovsdb/condition.c index 388dd54a16..9aa3788dbb 100644 --- a/ovsdb/condition.c +++ b/ovsdb/condition.c @@ -220,13 +220,13 @@ ovsdb_condition_optimize(struct ovsdb_condition *cnd) static void ovsdb_condition_optimize_destroy(struct ovsdb_condition *cnd) { - struct shash_node *node, *next; + struct shash_node *node; - SHASH_FOR_EACH_SAFE (node, next, &cnd->o_columns) { + SHASH_FOR_EACH_SAFE (node, &cnd->o_columns) { struct ovsdb_o_column *o_column = node->data; - struct ovsdb_o_clause *c, *c_next; + struct ovsdb_o_clause *c; - HMAP_FOR_EACH_SAFE(c, c_next, hmap_node, &o_column->o_clauses) { + HMAP_FOR_EACH_SAFE (c, hmap_node, &o_column->o_clauses) { hmap_remove(&o_column->o_clauses, &c->hmap_node); free(c); } diff --git a/ovsdb/file.c b/ovsdb/file.c index 9f44007d97..ca80c28235 100644 --- a/ovsdb/file.c +++ b/ovsdb/file.c @@ -524,6 +524,7 @@ ovsdb_file_read__(const char *filename, bool rw, error = ovsdb_txn_replay_commit(txn); if (error) { + ovsdb_error_destroy(error); ovsdb_storage_unread(storage); break; } diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 351c39d8aa..916a1f414e 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -197,9 +197,9 @@ ovsdb_jsonrpc_server_remove_db(struct ovsdb_jsonrpc_server *svr, void ovsdb_jsonrpc_server_destroy(struct ovsdb_jsonrpc_server *svr) { - struct shash_node *node, *next; + struct shash_node *node; - SHASH_FOR_EACH_SAFE (node, next, &svr->remotes) { + SHASH_FOR_EACH_SAFE (node, &svr->remotes) { ovsdb_jsonrpc_server_del_remote(node); } shash_destroy(&svr->remotes); @@ -227,9 +227,9 @@ void ovsdb_jsonrpc_server_set_remotes(struct ovsdb_jsonrpc_server *svr, const struct shash *new_remotes) { - struct shash_node *node, *next; + struct shash_node *node; - SHASH_FOR_EACH_SAFE (node, next, &svr->remotes) { + SHASH_FOR_EACH_SAFE (node, &svr->remotes) { struct ovsdb_jsonrpc_remote *remote = node->data; struct ovsdb_jsonrpc_options *options = shash_find_data(new_remotes, node->name); @@ -585,9 +585,9 @@ ovsdb_jsonrpc_session_set_options(struct ovsdb_jsonrpc_session *session, static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote) { - struct ovsdb_jsonrpc_session *s, *next; + struct ovsdb_jsonrpc_session *s; - LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { + LIST_FOR_EACH_SAFE (s, node, &remote->sessions) { int error = ovsdb_jsonrpc_session_run(s); if (error) { ovsdb_jsonrpc_session_close(s); @@ -642,9 +642,9 @@ ovsdb_jsonrpc_session_get_memory_usage_all( static void ovsdb_jsonrpc_session_close_all(struct ovsdb_jsonrpc_remote *remote) { - struct ovsdb_jsonrpc_session *s, *next; + struct ovsdb_jsonrpc_session *s; - LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { + LIST_FOR_EACH_SAFE (s, node, &remote->sessions) { ovsdb_jsonrpc_session_close(s); } } @@ -660,9 +660,9 @@ static void ovsdb_jsonrpc_session_reconnect_all(struct ovsdb_jsonrpc_remote *remote, bool force, const char *comment) { - struct ovsdb_jsonrpc_session *s, *next; + struct ovsdb_jsonrpc_session *s; - LIST_FOR_EACH_SAFE (s, next, node, &remote->sessions) { + LIST_FOR_EACH_SAFE (s, node, &remote->sessions) { if (force || !s->db_change_aware) { jsonrpc_session_force_reconnect(s->js); if (comment && jsonrpc_session_is_connected(s->js)) { @@ -909,9 +909,9 @@ error: static void ovsdb_jsonrpc_session_unlock_all(struct ovsdb_jsonrpc_session *s) { - struct ovsdb_lock_waiter *waiter, *next; + struct ovsdb_lock_waiter *waiter; - HMAP_FOR_EACH_SAFE (waiter, next, session_node, &s->up.waiters) { + HMAP_FOR_EACH_SAFE (waiter, session_node, &s->up.waiters) { ovsdb_jsonrpc_session_unlock__(waiter); } } @@ -1198,8 +1198,8 @@ static void ovsdb_jsonrpc_trigger_remove__(struct ovsdb_jsonrpc_session *s, struct ovsdb *db) { - struct ovsdb_jsonrpc_trigger *t, *next; - HMAP_FOR_EACH_SAFE (t, next, hmap_node, &s->triggers) { + struct ovsdb_jsonrpc_trigger *t; + HMAP_FOR_EACH_SAFE (t, hmap_node, &s->triggers) { if (!db || t->trigger.db == db) { ovsdb_jsonrpc_trigger_complete(t); } @@ -1226,8 +1226,8 @@ ovsdb_jsonrpc_trigger_complete_all(struct ovsdb_jsonrpc_session *s) static void ovsdb_jsonrpc_trigger_complete_done(struct ovsdb_jsonrpc_session *s) { - struct ovsdb_jsonrpc_trigger *trigger, *next; - LIST_FOR_EACH_SAFE (trigger, next, trigger.node, &s->up.completions) { + struct ovsdb_jsonrpc_trigger *trigger; + LIST_FOR_EACH_SAFE (trigger, trigger.node, &s->up.completions) { ovsdb_jsonrpc_trigger_complete(trigger); } } @@ -1688,8 +1688,8 @@ ovsdb_jsonrpc_monitor_preremove_db(struct ovsdb_jsonrpc_session *s, { ovs_assert(db); - struct ovsdb_jsonrpc_monitor *m, *next; - HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) { + struct ovsdb_jsonrpc_monitor *m; + HMAP_FOR_EACH_SAFE (m, node, &s->monitors) { if (m->db == db) { ovsdb_jsonrpc_monitor_destroy(m, true); } @@ -1700,9 +1700,9 @@ ovsdb_jsonrpc_monitor_preremove_db(struct ovsdb_jsonrpc_session *s, static void ovsdb_jsonrpc_monitor_remove_all(struct ovsdb_jsonrpc_session *s) { - struct ovsdb_jsonrpc_monitor *m, *next; + struct ovsdb_jsonrpc_monitor *m; - HMAP_FOR_EACH_SAFE (m, next, node, &s->monitors) { + HMAP_FOR_EACH_SAFE (m, node, &s->monitors) { ovsdb_jsonrpc_monitor_destroy(m, false); } } diff --git a/ovsdb/local-config.ovsschema b/ovsdb/local-config.ovsschema new file mode 100644 index 0000000000..bd86d0f4f6 --- /dev/null +++ b/ovsdb/local-config.ovsschema @@ -0,0 +1,43 @@ +{ + "name": "Local_Config", + "version": "1.0.0", + "cksum": "2048726482 1858", + "tables": { + "Config": { + "columns": { + "connections": { + "type": {"key": {"type": "uuid", + "refTable": "Connection"}, + "min": 0, + "max": "unlimited"}}}, + "maxRows": 1, + "isRoot": true}, + "Connection": { + "columns": { + "target": {"type": "string"}, + "max_backoff": {"type": {"key": {"type": "integer", + "minInteger": 1000}, + "min": 0, + "max": 1}}, + "inactivity_probe": {"type": {"key": "integer", + "min": 0, + "max": 1}}, + "read_only": {"type": "boolean"}, + "role": {"type": "string"}, + "other_config": {"type": {"key": "string", + "value": "string", + "min": 0, + "max": "unlimited"}}, + "external_ids": {"type": {"key": "string", + "value": "string", + "min": 0, + "max": "unlimited"}}, + "is_connected": {"type": "boolean", "ephemeral": true}, + "status": {"type": {"key": "string", + "value": "string", + "min": 0, + "max": "unlimited"}, + "ephemeral": true}}, + "indexes": [["target"]]} + } +} diff --git a/ovsdb/local-config.xml b/ovsdb/local-config.xml new file mode 100644 index 0000000000..b502aea4cf --- /dev/null +++ b/ovsdb/local-config.xml @@ -0,0 +1,296 @@ + + +

+ This database is for local configuration of an ovsdb-server. The + database is meant to be unique, even among multiple clustered db + servers, so that configuration that is local to that server can + be configured separately (e.g. Connection information). +

+ + +

+ The root local configuration table for an ovsdb-server. This table + must have exactly one row. +

+ + + Database clients to which the Open vSwitch database server should + connect or on which it should listen, along with options for how these + connections should be configured. See the + table for more information. + + +
+ + +

+ Configuration for a database connection to an Open vSwitch database + (OVSDB) client. +

+ +

+ This table primarily configures the Open vSwitch database server + (ovsdb-server). +

+ +

+ The Open vSwitch database server can initiate and maintain active + connections to remote clients. It can also listen for database + connections. +

+ + + +

Connection methods for clients.

+

+ The following connection methods are currently supported: +

+
+
+ ssl:host[:port] +
+
+

+ The specified SSL port on the host at the given + host, which can either be a DNS name (if built with + unbound library) or an IP address. A valid SSL configuration must + be provided when this form is used, this configuration can be + specified via command-line options or the + table. +

+

+ If port is not specified, it defaults to 6640. +

+

+ SSL support is an optional feature that is not always + built as part of Open vSwitch. +

+
+ +
+ tcp:host[:port] +
+
+

+ The specified TCP port on the host at the given + host, which can either be a DNS name (if built with + unbound library) or an IP address. If host is an IPv6 + address, wrap it in square brackets, e.g. + tcp:[::1]:6640. +

+

+ If port is not specified, it defaults to 6640. +

+
+
+ pssl:[port][:host] +
+
+

+ Listens for SSL connections on the specified TCP port. + Specify 0 for port to have the kernel automatically + choose an available port. If host, which can either + be a DNS name (if built with unbound library) or an IP address, + is specified, then connections are restricted to the resolved or + specified local IPaddress (either IPv4 or IPv6 address). If + host is an IPv6 address, wrap in square brackets, + e.g. pssl:6640:[::1]. If host is not + specified then it listens only on IPv4 (but not IPv6) addresses. + A valid SSL configuration must be provided when this form is + used, this can be specified either via command-line options or + the table. +

+

+ If port is not specified, it defaults to 6640. +

+

+ SSL support is an optional feature that is not always built as + part of Open vSwitch. +

+
+
+ ptcp:[port][:host] +
+
+

+ Listens for connections on the specified TCP port. + Specify 0 for port to have the kernel automatically + choose an available port. If host, which can either + be a DNS name (if built with unbound library) or an IP address, + is specified, then connections are restricted to the resolved or + specified local IP address (either IPv4 or IPv6 address). If + host is an IPv6 address, wrap it in square brackets, + e.g. ptcp:6640:[::1]. If host is not + specified then it listens only on IPv4 addresses. +

+

+ If port is not specified, it defaults to 6640. +

+
+
+

When multiple clients are configured, the + values must be unique. Duplicate values yield + unspecified results.

+
+ + + true to restrict these connections to read-only + transactions, false to allow them to modify the database. + + + + String containing role name for this connection entry. + +
+ + + + Maximum number of milliseconds to wait between connection attempts. + Default is implementation-specific. + + + + Maximum number of milliseconds of idle time on connection to the client + before sending an inactivity probe message. If Open vSwitch does not + communicate with the client for the specified number of seconds, it + will send a probe. If a response is not received for the same + additional amount of time, Open vSwitch assumes the connection has been + broken and attempts to reconnect. Default is implementation-specific. + A value of 0 disables inactivity probes. + + + + +

+ Key-value pair of is always updated. + Other key-value pairs in the status columns may be updated depends + on the type. +

+ +

+ When specifies a connection method that + listens for inbound connections (e.g. ptcp: or + punix:), both and + may also be updated while the + remaining key-value pairs are omitted. +

+ +

+ On the other hand, when specifies an + outbound connection, all key-value pairs may be updated, except + the above-mentioned two key-value pairs associated with inbound + connection targets. They are omitted. +

+ + + true if currently connected to this client, + false otherwise. + + + + A human-readable description of the last error on the connection + to the manager; i.e. strerror(errno). This key + will exist only if an error has occurred. + + + +

+ The state of the connection to the manager: +

+
+
VOID
+
Connection is disabled.
+ +
BACKOFF
+
Attempting to reconnect at an increasing period.
+ +
CONNECTING
+
Attempting to connect.
+ +
ACTIVE
+
Connected, remote host responsive.
+ +
IDLE
+
Connection is idle. Waiting for response to keep-alive.
+
+

+ These values may change in the future. They are provided only for + human consumption. +

+
+ + + The amount of time since this client last successfully connected + to the database (in seconds). Value is empty if client has never + successfully been connected. + + + + The amount of time since this client last disconnected from the + database (in seconds). Value is empty if client has never + disconnected. + + + + Space-separated list of the names of OVSDB locks that the connection + holds. Omitted if the connection does not hold any locks. + + + + Space-separated list of the names of OVSDB locks that the connection is + currently waiting to acquire. Omitted if the connection is not waiting + for any locks. + + + + Space-separated list of the names of OVSDB locks that the connection + has had stolen by another OVSDB client. Omitted if no locks have been + stolen from this connection. + + + + When specifies a connection method that + listens for inbound connections (e.g. ptcp: or + pssl:) and more than one connection is actually active, + the value is the number of active connections. Otherwise, this + key-value pair is omitted. + + + + When is ptcp: or + pssl:, this is the TCP port on which the OVSDB server is + listening. (This is particularly useful when specifies a port of 0, allowing the kernel to + choose any available port.) + +
+ + + + The Differentiated Service Code Point (DSCP) is specified using 6 bits + in the Type of Service (TOS) field in the IP header. DSCP provides a + mechanism to classify the network traffic and provide Quality of + Service (QoS) on IP networks. + + The DSCP value specified here is used when establishing the connection + between the manager and the Open vSwitch. If no value is specified, a + default value of 48 is chosen. Valid DSCP values must be in the range + 0 to 63. + + + + + External configuration options + + + External client-defined key-value pairs + + +
+
diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 0f222cc992..952fa902e4 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -638,14 +638,14 @@ ovsdb_monitor_change_set_destroy(struct ovsdb_monitor_change_set *mcs) { ovs_list_remove(&mcs->list_node); - struct ovsdb_monitor_change_set_for_table *mcst, *next_mcst; - LIST_FOR_EACH_SAFE (mcst, next_mcst, list_in_change_set, + struct ovsdb_monitor_change_set_for_table *mcst; + LIST_FOR_EACH_SAFE (mcst, list_in_change_set, &mcs->change_set_for_tables) { ovs_list_remove(&mcst->list_in_change_set); ovs_list_remove(&mcst->list_in_mt); - struct ovsdb_monitor_row *row, *next; - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mcst->rows) { + struct ovsdb_monitor_row *row; + HMAP_FOR_EACH_SAFE (row, hmap_node, &mcst->rows) { hmap_remove(&mcst->rows, &row->hmap_node); ovsdb_monitor_row_destroy(mcst->mt, row, mcst->n_columns); } @@ -700,13 +700,13 @@ void ovsdb_monitor_session_condition_destroy( struct ovsdb_monitor_session_condition *condition) { - struct shash_node *node, *next; + struct shash_node *node; if (!condition) { return; } - SHASH_FOR_EACH_SAFE (node, next, &condition->tables) { + SHASH_FOR_EACH_SAFE (node, &condition->tables) { struct ovsdb_monitor_table_condition *mtc = node->data; ovsdb_condition_destroy(&mtc->new_condition); @@ -1122,11 +1122,11 @@ ovsdb_monitor_compose_update( json = NULL; struct ovsdb_monitor_change_set_for_table *mcst; LIST_FOR_EACH (mcst, list_in_change_set, &mcs->change_set_for_tables) { - struct ovsdb_monitor_row *row, *next; + struct ovsdb_monitor_row *row; struct json *table_json = NULL; struct ovsdb_monitor_table *mt = mcst->mt; - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &mcst->rows) { + HMAP_FOR_EACH_SAFE (row, hmap_node, &mcst->rows) { struct json *row_json; row_json = (*row_update)(mt, condition, OVSDB_MONITOR_ROW, row, initial, changed, mcst->n_columns); @@ -1711,8 +1711,8 @@ ovsdb_monitor_destroy(struct ovsdb_monitor *dbmon) ovsdb_monitor_json_cache_flush(dbmon); hmap_destroy(&dbmon->json_cache); - struct ovsdb_monitor_change_set *cs, *cs_next; - LIST_FOR_EACH_SAFE (cs, cs_next, list_node, &dbmon->change_sets) { + struct ovsdb_monitor_change_set *cs; + LIST_FOR_EACH_SAFE (cs, list_node, &dbmon->change_sets) { ovsdb_monitor_change_set_destroy(cs); } @@ -1760,14 +1760,14 @@ ovsdb_monitors_commit(struct ovsdb *db, const struct ovsdb_txn *txn) void ovsdb_monitors_remove(struct ovsdb *db) { - struct ovsdb_monitor *m, *next_m; + struct ovsdb_monitor *m; - LIST_FOR_EACH_SAFE (m, next_m, list_node, &db->monitors) { - struct jsonrpc_monitor_node *jm, *next_jm; + LIST_FOR_EACH_SAFE (m, list_node, &db->monitors) { + struct jsonrpc_monitor_node *jm; /* Delete all front-end monitors. Removing the last front-end monitor * will also destroy the corresponding ovsdb_monitor. */ - LIST_FOR_EACH_SAFE (jm, next_jm, node, &m->jsonrpc_monitors) { + LIST_FOR_EACH_SAFE (jm, node, &m->jsonrpc_monitors) { ovsdb_jsonrpc_monitor_destroy(jm->jsonrpc_monitor, false); } } @@ -1789,14 +1789,14 @@ ovsdb_monitor_get_memory_usage(struct simap *usage) void ovsdb_monitor_prereplace_db(struct ovsdb *db) { - struct ovsdb_monitor *m, *next_m; + struct ovsdb_monitor *m; - LIST_FOR_EACH_SAFE (m, next_m, list_node, &db->monitors) { - struct jsonrpc_monitor_node *jm, *next_jm; + LIST_FOR_EACH_SAFE (m, list_node, &db->monitors) { + struct jsonrpc_monitor_node *jm; /* Delete all front-end monitors. Removing the last front-end monitor * will also destroy the corresponding ovsdb_monitor. */ - LIST_FOR_EACH_SAFE (jm, next_jm, node, &m->jsonrpc_monitors) { + LIST_FOR_EACH_SAFE (jm, node, &m->jsonrpc_monitors) { ovsdb_jsonrpc_monitor_destroy(jm->jsonrpc_monitor, true); } } diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in index 10a70ae26f..13c5359395 100755 --- a/ovsdb/ovsdb-idlc.in +++ b/ovsdb/ovsdb-idlc.in @@ -251,10 +251,18 @@ const struct %(s)s *%(s)s_table_first(const struct %(s)s_table *); for ((ROW) = %(s)s_table_first(TABLE); \\ (ROW); \\ (ROW) = %(s)s_next(ROW)) -#define %(S)s_TABLE_FOR_EACH_SAFE(ROW, NEXT, TABLE) \\ +#define %(S)s_TABLE_FOR_EACH_SAFE_LONG(ROW, NEXT, TABLE) \\ for ((ROW) = %(s)s_table_first(TABLE); \\ (ROW) ? ((NEXT) = %(s)s_next(ROW), 1) : 0; \\ (ROW) = (NEXT)) +#define %(S)s_TABLE_FOR_EACH_SAFE_SHORT(ROW, TABLE) \\ + for (const struct %(s)s * ROW__next = ((ROW) = %(s)s_table_first(TABLE), NULL); \\ + (ROW) ? (ROW__next = %(s)s_next(ROW), 1) : (ROW__next = NULL, 0); \\ + (ROW) = ROW__next) +#define %(S)s_TABLE_FOR_EACH_SAFE(...) \\ + OVERLOAD_SAFE_MACRO(%(S)s_TABLE_FOR_EACH_SAFE_LONG, \\ + %(S)s_TABLE_FOR_EACH_SAFE_SHORT, 3, __VA_ARGS__) + const struct %(s)s *%(s)s_get_for_uuid(const struct ovsdb_idl *, const struct uuid *); const struct %(s)s *%(s)s_table_get_for_uuid(const struct %(s)s_table *, const struct uuid *); @@ -264,10 +272,17 @@ const struct %(s)s *%(s)s_next(const struct %(s)s *); for ((ROW) = %(s)s_first(IDL); \\ (ROW); \\ (ROW) = %(s)s_next(ROW)) -#define %(S)s_FOR_EACH_SAFE(ROW, NEXT, IDL) \\ +#define %(S)s_FOR_EACH_SAFE_LONG(ROW, NEXT, IDL) \\ for ((ROW) = %(s)s_first(IDL); \\ (ROW) ? ((NEXT) = %(s)s_next(ROW), 1) : 0; \\ (ROW) = (NEXT)) +#define %(S)s_FOR_EACH_SAFE_SHORT(ROW, IDL) \\ + for (const struct %(s)s * ROW__next = ((ROW) = %(s)s_first(IDL), NULL); \\ + (ROW) ? (ROW__next = %(s)s_next(ROW), 1) : (ROW__next = NULL, 0); \\ + (ROW) = ROW__next) +#define %(S)s_FOR_EACH_SAFE(...) \\ + OVERLOAD_SAFE_MACRO(%(S)s_FOR_EACH_SAFE_LONG, \\ + %(S)s_FOR_EACH_SAFE_SHORT, 3, __VA_ARGS__) unsigned int %(s)s_get_seqno(const struct ovsdb_idl *); unsigned int %(s)s_row_get_seqno(const struct %(s)s *row, enum ovsdb_idl_change change); diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index 9fe90592ea..774416fc7a 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -26,6 +26,7 @@ #include "command-line.h" #include "daemon.h" #include "dirs.h" +#include "dns-resolve.h" #include "openvswitch/dynamic-string.h" #include "fatal-signal.h" #include "file.h" @@ -228,8 +229,7 @@ main_loop(struct server_config *config, ovsdb_relay_run(); - struct shash_node *next; - SHASH_FOR_EACH_SAFE (node, next, all_dbs) { + SHASH_FOR_EACH_SAFE (node, all_dbs) { struct db *db = node->data; ovsdb_txn_history_run(db->db); ovsdb_storage_run(db->db->storage); @@ -321,7 +321,7 @@ main(int argc, char *argv[]) FILE *config_tmpfile; struct server_config server_config; struct shash all_dbs; - struct shash_node *node, *next; + struct shash_node *node; int replication_probe_interval = REPLICATION_DEFAULT_PROBE_INTERVAL; ovs_cmdl_proctitle_init(argc, argv); @@ -329,6 +329,7 @@ main(int argc, char *argv[]) service_start(&argc, &argv); fatal_ignore_sigpipe(); process_init(); + dns_resolve_init(true); bool active = false; parse_options(argc, argv, &db_filenames, &remotes, &unixctl_path, @@ -490,7 +491,7 @@ main(int argc, char *argv[]) main_loop(&server_config, jsonrpc, &all_dbs, unixctl, &remotes, run_process, &exiting, &is_backup); - SHASH_FOR_EACH_SAFE(node, next, &all_dbs) { + SHASH_FOR_EACH_SAFE (node, &all_dbs) { struct db *db = node->data; close_db(&server_config, db, NULL); shash_delete(&all_dbs, node); @@ -511,6 +512,7 @@ main(int argc, char *argv[]) run_command, process_status_msg(status)); } } + dns_resolve_destroy(); perf_counters_destroy(); service_stop(); return 0; @@ -1240,8 +1242,8 @@ update_server_status(struct shash *all_dbs) /* Update rows for databases that still exist. * Delete rows for databases that no longer exist. */ - const struct ovsdb_row *row, *next_row; - HMAP_FOR_EACH_SAFE (row, next_row, hmap_node, &database_table->rows) { + const struct ovsdb_row *row; + HMAP_FOR_EACH_SAFE (row, hmap_node, &database_table->rows) { const char *name; ovsdb_util_read_string_column(row, "name", &name); struct db *db = shash_find_data(all_dbs, name); diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index d4a9e34cc4..df2e373c3c 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -1579,15 +1579,14 @@ do_check_cluster(struct ovs_cmdl_context *ctx) } free(c.servers); - struct commit *next_commit; - HMAP_FOR_EACH_SAFE (commit, next_commit, hmap_node, &c.commits) { + HMAP_FOR_EACH_SAFE (commit, hmap_node, &c.commits) { hmap_remove(&c.commits, &commit->hmap_node); free(commit); } hmap_destroy(&c.commits); - struct leader *leader, *next_leader; - HMAP_FOR_EACH_SAFE (leader, next_leader, hmap_node, &c.leaders) { + struct leader *leader; + HMAP_FOR_EACH_SAFE (leader, hmap_node, &c.leaders) { hmap_remove(&c.leaders, &leader->hmap_node); free(leader); } diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c index e6d866182c..91b4a01af8 100644 --- a/ovsdb/ovsdb.c +++ b/ovsdb/ovsdb.c @@ -571,8 +571,8 @@ ovsdb_replace(struct ovsdb *dst, struct ovsdb *src) ovsdb_monitor_prereplace_db(dst); /* Cancel triggers. */ - struct ovsdb_trigger *trigger, *next; - LIST_FOR_EACH_SAFE (trigger, next, node, &dst->triggers) { + struct ovsdb_trigger *trigger; + LIST_FOR_EACH_SAFE (trigger, node, &dst->triggers) { ovsdb_trigger_prereplace_db(trigger); } diff --git a/ovsdb/query.c b/ovsdb/query.c index de74519989..eebe564127 100644 --- a/ovsdb/query.c +++ b/ovsdb/query.c @@ -40,9 +40,9 @@ ovsdb_query(struct ovsdb_table *table, const struct ovsdb_condition *cnd, } } else { /* Linear scan. */ - const struct ovsdb_row *row, *next; + const struct ovsdb_row *row; - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { + HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { if (ovsdb_condition_match_every_clause(row, cnd) && !output_row(row, aux)) { break; diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c index 30760233ee..e685c8103b 100644 --- a/ovsdb/raft-private.c +++ b/ovsdb/raft-private.c @@ -36,7 +36,10 @@ raft_address_validate(const char *address) return NULL; } else if (!strncmp(address, "ssl:", 4) || !strncmp(address, "tcp:", 4)) { struct sockaddr_storage ss; - if (!inet_parse_active(address + 4, -1, &ss, true)) { + bool dns_failure = false; + + if (!inet_parse_active(address + 4, -1, &ss, true, &dns_failure) + && !dns_failure) { return ovsdb_error(NULL, "%s: syntax error in address", address); } return NULL; @@ -147,8 +150,8 @@ raft_server_destroy(struct raft_server *s) void raft_servers_destroy(struct hmap *servers) { - struct raft_server *s, *next; - HMAP_FOR_EACH_SAFE (s, next, hmap_node, servers) { + struct raft_server *s; + HMAP_FOR_EACH_SAFE (s, hmap_node, servers) { hmap_remove(servers, &s->hmap_node); raft_server_destroy(s); } diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 1a3447a8dd..856d083f21 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -74,9 +74,12 @@ enum raft_failure_test { FT_CRASH_BEFORE_SEND_EXEC_REQ, FT_CRASH_AFTER_SEND_EXEC_REQ, FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, + FT_CRASH_BEFORE_SEND_SNAPSHOT_REP, FT_DELAY_ELECTION, FT_DONT_SEND_VOTE_REQUEST, FT_STOP_RAFT_RPC, + FT_TRANSFER_LEADERSHIP, + FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ, }; static enum raft_failure_test failure_test; @@ -379,12 +382,19 @@ static bool raft_handle_write_error(struct raft *, struct ovsdb_error *); static void raft_run_reconfigure(struct raft *); static void raft_set_leader(struct raft *, const struct uuid *sid); + static struct raft_server * raft_find_server(const struct raft *raft, const struct uuid *sid) { return raft_server_find(&raft->servers, sid); } +static struct raft_server * +raft_find_new_server(struct raft *raft, const struct uuid *uuid) +{ + return raft_server_find(&raft->add_servers, uuid); +} + static char * raft_make_address_passive(const char *address_) { @@ -692,8 +702,8 @@ static void raft_set_servers(struct raft *raft, const struct hmap *new_servers, enum vlog_level level) { - struct raft_server *s, *next; - HMAP_FOR_EACH_SAFE (s, next, hmap_node, &raft->servers) { + struct raft_server *s; + HMAP_FOR_EACH_SAFE (s, hmap_node, &raft->servers) { if (!raft_server_find(new_servers, &s->sid)) { ovs_assert(s != raft->remove_server); @@ -703,7 +713,7 @@ raft_set_servers(struct raft *raft, const struct hmap *new_servers, } } - HMAP_FOR_EACH_SAFE (s, next, hmap_node, new_servers) { + HMAP_FOR_EACH_SAFE (s, hmap_node, new_servers) { if (!raft_find_server(raft, &s->sid)) { VLOG(level, "server %s added to configuration", s->nickname); @@ -1376,8 +1386,8 @@ raft_close__(struct raft *raft) raft->remove_server = NULL; } - struct raft_conn *conn, *next; - LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { + struct raft_conn *conn; + LIST_FOR_EACH_SAFE (conn, list_node, &raft->conns) { raft_conn_close(conn); } } @@ -1713,8 +1723,8 @@ raft_waiters_run(struct raft *raft) } uint64_t cur = ovsdb_log_commit_progress(raft->log); - struct raft_waiter *w, *next; - LIST_FOR_EACH_SAFE (w, next, list_node, &raft->waiters) { + struct raft_waiter *w; + LIST_FOR_EACH_SAFE (w, list_node, &raft->waiters) { if (cur < w->commit_ticket) { break; } @@ -1736,8 +1746,8 @@ raft_waiters_wait(struct raft *raft) static void raft_waiters_destroy(struct raft *raft) { - struct raft_waiter *w, *next; - LIST_FOR_EACH_SAFE (w, next, list_node, &raft->waiters) { + struct raft_waiter *w; + LIST_FOR_EACH_SAFE (w, list_node, &raft->waiters) { raft_waiter_destroy(w); } } @@ -1867,6 +1877,8 @@ raft_open_conn(struct raft *raft, const char *address, const struct uuid *sid) static void raft_conn_close(struct raft_conn *conn) { + VLOG_DBG("closing connection to server %s (%s)", + conn->nickname, jsonrpc_session_get_name(conn->js)); jsonrpc_session_close(conn->js); ovs_list_remove(&conn->list_node); free(conn->nickname); @@ -1921,6 +1933,13 @@ raft_run(struct raft *raft) return; } + if (failure_test == FT_TRANSFER_LEADERSHIP) { + /* Using this function as it conveniently implements all we need and + * snapshotting is the main test scenario for leadership transfer. */ + raft_notify_snapshot_recommended(raft); + failure_test = FT_NO_TEST; + } + raft_waiters_run(raft); if (!raft->listener && time_msec() >= raft->listen_backoff) { @@ -1957,16 +1976,29 @@ raft_run(struct raft *raft) } /* Close unneeded sessions. */ - struct raft_conn *next; - LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { + struct raft_server *server; + LIST_FOR_EACH_SAFE (conn, list_node, &raft->conns) { if (!raft_conn_should_stay_open(raft, conn)) { + server = raft_find_new_server(raft, &conn->sid); + if (server) { + /* We only have one incoming connection from joining servers, + * so if it's closed, we need to destroy the record about the + * server. This way the process can be started over on the + * next join request. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + VLOG_INFO_RL(&rl, "cluster "CID_FMT": server %s (%s) " + "disconnected while joining", + CID_ARGS(&raft->cid), + server->nickname, server->address); + hmap_remove(&raft->add_servers, &server->hmap_node); + raft_server_destroy(server); + } raft->n_disconnections++; raft_conn_close(conn); } } /* Open needed sessions. */ - struct raft_server *server; HMAP_FOR_EACH (server, hmap_node, &raft->servers) { raft_open_conn(raft, server->address, &server->sid); } @@ -2039,11 +2071,18 @@ raft_run(struct raft *raft) * commands becomes new leader: the pending commands can still complete * if the crashed leader has replicated the transactions to majority of * followers before it crashed. */ - struct raft_command *cmd, *next_cmd; - HMAP_FOR_EACH_SAFE (cmd, next_cmd, hmap_node, &raft->commands) { + struct raft_command *cmd; + HMAP_FOR_EACH_SAFE (cmd, hmap_node, &raft->commands) { if (cmd->timestamp && now - cmd->timestamp > raft->election_timer * 2) { - raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT); + if (cmd->index && raft->role != RAFT_LEADER) { + /* This server lost leadership and command didn't complete + * in time. Likely, it wasn't replicated to the majority + * of servers before losing the leadership. */ + raft_command_complete(raft, cmd, RAFT_CMD_LOST_LEADERSHIP); + } else { + raft_command_complete(raft, cmd, RAFT_CMD_TIMEOUT); + } } } raft_reset_ping_timer(raft); @@ -2235,6 +2274,9 @@ raft_command_initiate(struct raft *raft, if (failure_test == FT_CRASH_AFTER_SEND_APPEND_REQ) { ovs_fatal(0, "Raft test: crash after sending append_request."); } + if (failure_test == FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ) { + failure_test = FT_TRANSFER_LEADERSHIP; + } raft_reset_ping_timer(raft); return cmd; @@ -2243,8 +2285,12 @@ raft_command_initiate(struct raft *raft, static void log_all_commands(struct raft *raft) { - struct raft_command *cmd, *next; - HMAP_FOR_EACH_SAFE (cmd, next, hmap_node, &raft->commands) { + if (!VLOG_IS_DBG_ENABLED()) { + return; + } + + struct raft_command *cmd; + HMAP_FOR_EACH (cmd, hmap_node, &raft->commands) { VLOG_DBG("raft command eid: "UUID_FMT, UUID_ARGS(&cmd->eid)); } } @@ -2398,8 +2444,8 @@ raft_command_complete(struct raft *raft, static void raft_complete_all_commands(struct raft *raft, enum raft_command_status status) { - struct raft_command *cmd, *next; - HMAP_FOR_EACH_SAFE (cmd, next, hmap_node, &raft->commands) { + struct raft_command *cmd; + HMAP_FOR_EACH_SAFE (cmd, hmap_node, &raft->commands) { raft_command_complete(raft, cmd, status); } } @@ -2601,7 +2647,13 @@ raft_become_follower(struct raft *raft) * configuration is already part of the log. Possibly the configuration * log entry will not be committed, but until we know that we must use the * new configuration. Our AppendEntries processing will properly update - * the server configuration later, if necessary. */ + * the server configuration later, if necessary. + * + * Also we do not complete commands here, as they can still be completed + * if their log entries have already been replicated to other servers. + * If the entries were actually committed according to the new leader, our + * AppendEntries processing will complete the corresponding commands. + */ struct raft_server *s; HMAP_FOR_EACH (s, hmap_node, &raft->add_servers) { raft_send_add_server_reply__(raft, &s->sid, s->address, false, @@ -2615,8 +2667,6 @@ raft_become_follower(struct raft *raft) raft_server_destroy(raft->remove_server); raft->remove_server = NULL; } - - raft_complete_all_commands(raft, RAFT_CMD_LOST_LEADERSHIP); } static void @@ -2865,61 +2915,56 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index) return false; } - if (raft->role == RAFT_LEADER) { - while (raft->commit_index < new_commit_index) { - uint64_t index = ++raft->commit_index; - const struct raft_entry *e = raft_get_entry(raft, index); - if (raft_entry_has_data(e)) { - struct raft_command *cmd - = raft_find_command_by_eid(raft, &e->eid); - if (cmd) { - if (!cmd->index) { - VLOG_DBG("Command completed after role change from" - " follower to leader "UUID_FMT, - UUID_ARGS(&e->eid)); - cmd->index = index; - } - raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); + while (raft->commit_index < new_commit_index) { + uint64_t index = ++raft->commit_index; + const struct raft_entry *e = raft_get_entry(raft, index); + + if (raft_entry_has_data(e)) { + struct raft_command *cmd = raft_find_command_by_eid(raft, &e->eid); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + + if (cmd) { + if (!cmd->index && raft->role == RAFT_LEADER) { + VLOG_INFO_RL(&rl, + "command completed after role change from " + "follower to leader (eid: "UUID_FMT", " + "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); + } else if (!cmd->index && raft->role != RAFT_LEADER) { + /* This can happen when leader fail-over before sending + * execute_command_reply. */ + VLOG_INFO_RL(&rl, + "command completed without reply (eid: "UUID_FMT", " + "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); + } else if (cmd->index && raft->role != RAFT_LEADER) { + /* This can happen if current server lost leadership after + * sending append requests to the majority of servers, but + * before receiving majority of append replies. */ + VLOG_INFO_RL(&rl, + "command completed after role change from " + "leader to follower (eid: "UUID_FMT", " + "commit index: %"PRIu64")", UUID_ARGS(&e->eid), index); + /* Clearing 'sid' to avoid sending cmd execution reply. */ + cmd->sid = UUID_ZERO; + } else { + /* (cmd->index && raft->role == RAFT_LEADER) + * Normal command completion on a leader. */ } - } - if (e->election_timer) { - VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, - raft->election_timer, e->election_timer); - raft->election_timer = e->election_timer; - raft->election_timer_new = 0; - raft_update_probe_intervals(raft); - } - if (e->servers) { - /* raft_run_reconfigure() can write a new Raft entry, which can - * reallocate raft->entries, which would invalidate 'e', so - * this case must be last, after the one for 'e->data'. */ - raft_run_reconfigure(raft); + cmd->index = index; + raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); } } - } else { - while (raft->commit_index < new_commit_index) { - uint64_t index = ++raft->commit_index; - const struct raft_entry *e = raft_get_entry(raft, index); - if (e->election_timer) { - VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, - raft->election_timer, e->election_timer); - raft->election_timer = e->election_timer; - raft_update_probe_intervals(raft); - } + if (e->election_timer) { + VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64, + raft->election_timer, e->election_timer); + raft->election_timer = e->election_timer; + raft->election_timer_new = 0; + raft_update_probe_intervals(raft); } - /* Check if any pending command can be completed, and complete it. - * This can happen when leader fail-over before sending - * execute_command_reply. */ - const struct uuid *eid = raft_get_eid(raft, new_commit_index); - struct raft_command *cmd = raft_find_command_by_eid(raft, eid); - if (cmd) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); - VLOG_INFO_RL(&rl, - "Command completed without reply (eid: "UUID_FMT", " - "commit index: %"PRIu64")", - UUID_ARGS(eid), new_commit_index); - cmd->index = new_commit_index; - raft_command_complete(raft, cmd, RAFT_CMD_SUCCESS); + if (e->servers && raft->role == RAFT_LEADER) { + /* raft_run_reconfigure() can write a new Raft entry, which can + * reallocate raft->entries, which would invalidate 'e', so + * this case must be last, after the one for 'e->data'. */ + raft_run_reconfigure(raft); } } @@ -3354,12 +3399,6 @@ raft_find_peer(struct raft *raft, const struct uuid *uuid) return s && !uuid_equals(&raft->sid, &s->sid) ? s : NULL; } -static struct raft_server * -raft_find_new_server(struct raft *raft, const struct uuid *uuid) -{ - return raft_server_find(&raft->add_servers, uuid); -} - /* Figure 3.1: "If there exists an N such that N > commitIndex, a * majority of matchIndex[i] >= N, and log[N].term == currentTerm, set * commitIndex = N (sections 3.5 and 3.6)." */ @@ -4142,6 +4181,10 @@ static void raft_handle_install_snapshot_request( struct raft *raft, const struct raft_install_snapshot_request *rq) { + if (failure_test == FT_CRASH_BEFORE_SEND_SNAPSHOT_REP) { + ovs_fatal(0, "Raft test: crash before sending install_snapshot_reply"); + } + if (raft_handle_install_snapshot_request__(raft, rq)) { union raft_rpc rpy = { .install_snapshot_reply = { @@ -4940,6 +4983,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, failure_test = FT_CRASH_AFTER_SEND_EXEC_REQ; } else if (!strcmp(test, "crash-after-receiving-append-request-update")) { failure_test = FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE; + } else if (!strcmp(test, "crash-before-sending-install-snapshot-reply")) { + failure_test = FT_CRASH_BEFORE_SEND_SNAPSHOT_REP; } else if (!strcmp(test, "delay-election")) { failure_test = FT_DELAY_ELECTION; struct raft *raft; @@ -4952,6 +4997,11 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, failure_test = FT_DONT_SEND_VOTE_REQUEST; } else if (!strcmp(test, "stop-raft-rpc")) { failure_test = FT_STOP_RAFT_RPC; + } else if (!strcmp(test, + "transfer-leadership-after-sending-append-request")) { + failure_test = FT_TRANSFER_LEADERSHIP_AFTER_SEND_APPEND_REQ; + } else if (!strcmp(test, "transfer-leadership")) { + failure_test = FT_TRANSFER_LEADERSHIP; } else if (!strcmp(test, "clear")) { failure_test = FT_NO_TEST; unixctl_command_reply(conn, "test dismissed"); diff --git a/ovsdb/relay.c b/ovsdb/relay.c index ef0e44d340..122ee8c52f 100644 --- a/ovsdb/relay.c +++ b/ovsdb/relay.c @@ -269,9 +269,9 @@ ovsdb_relay_clear(struct ovsdb *db) SHASH_FOR_EACH (table_node, &db->tables) { struct ovsdb_table *table = table_node->data; - struct ovsdb_row *row, *next; + struct ovsdb_row *row; - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { + HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { ovsdb_txn_row_delete(txn, row); } } diff --git a/ovsdb/replication.c b/ovsdb/replication.c index d8b56d8131..477c69d701 100644 --- a/ovsdb/replication.c +++ b/ovsdb/replication.c @@ -549,8 +549,8 @@ reset_database(struct ovsdb *db) /* Delete all rows if the table is not excluded. */ if (!excluded_tables_find(db->schema->name, table_node->name)) { struct ovsdb_table *table = table_node->data; - struct ovsdb_row *row, *next; - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { + struct ovsdb_row *row; + HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { ovsdb_txn_row_delete(txn, row); } } @@ -769,9 +769,9 @@ replication_dbs_destroy(void) return; } - struct shash_node *node, *next; + struct shash_node *node; - SHASH_FOR_EACH_SAFE (node, next, replication_dbs) { + SHASH_FOR_EACH_SAFE (node, replication_dbs) { hmap_remove(&replication_dbs->map, &node->node); struct replication_db *rdb = node->data; if (rdb->active_db_schema) { diff --git a/ovsdb/table.c b/ovsdb/table.c index 455a3663fe..2184701ec1 100644 --- a/ovsdb/table.c +++ b/ovsdb/table.c @@ -309,10 +309,10 @@ void ovsdb_table_destroy(struct ovsdb_table *table) { if (table) { - struct ovsdb_row *row, *next; + struct ovsdb_row *row; size_t i; - HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { + HMAP_FOR_EACH_SAFE (row, hmap_node, &table->rows) { ovsdb_row_destroy(row); } hmap_destroy(&table->rows); diff --git a/ovsdb/transaction-forward.c b/ovsdb/transaction-forward.c index d15f2f1d6d..963e937957 100644 --- a/ovsdb/transaction-forward.c +++ b/ovsdb/transaction-forward.c @@ -126,10 +126,10 @@ ovsdb_txn_forward_steal_reply(struct ovsdb_txn_forward *txn_fwd) void ovsdb_txn_forward_run(struct ovsdb *db, struct ovsdb_cs *cs) { - struct ovsdb_txn_forward *t, *next; + struct ovsdb_txn_forward *t; /* Send all transactions that needs to be forwarded. */ - LIST_FOR_EACH_SAFE (t, next, new_node, &db->txn_forward_new) { + LIST_FOR_EACH_SAFE (t, new_node, &db->txn_forward_new) { if (!ovsdb_cs_may_send_transaction(cs)) { break; } @@ -167,9 +167,9 @@ ovsdb_txn_forward_cancel(struct ovsdb *db, struct ovsdb_txn_forward *txn_fwd) void ovsdb_txn_forward_cancel_all(struct ovsdb *db, bool sent_only) { - struct ovsdb_txn_forward *t, *next; + struct ovsdb_txn_forward *t; - HMAP_FOR_EACH_SAFE (t, next, sent_node, &db->txn_forward_sent) { + HMAP_FOR_EACH_SAFE (t, sent_node, &db->txn_forward_sent) { ovsdb_txn_forward_cancel(db, t); } @@ -177,7 +177,7 @@ ovsdb_txn_forward_cancel_all(struct ovsdb *db, bool sent_only) return; } - LIST_FOR_EACH_SAFE (t, next, new_node, &db->txn_forward_new) { + LIST_FOR_EACH_SAFE (t, new_node, &db->txn_forward_new) { ovsdb_txn_forward_cancel(db, t); } } diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c index db86d847c3..3a6ddfa1df 100644 --- a/ovsdb/transaction.c +++ b/ovsdb/transaction.c @@ -159,15 +159,15 @@ ovsdb_txn_row_abort(struct ovsdb_txn *txn OVS_UNUSED, hmap_replace(&new->table->rows, &new->hmap_node, &old->hmap_node); } - struct ovsdb_weak_ref *weak, *next; - LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { + struct ovsdb_weak_ref *weak; + LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->deleted_refs) { ovs_list_remove(&weak->src_node); ovs_list_init(&weak->src_node); if (hmap_node_is_null(&weak->dst_node)) { ovsdb_weak_ref_destroy(weak); } } - LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { + LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->added_refs) { ovs_list_remove(&weak->src_node); ovs_list_init(&weak->src_node); if (hmap_node_is_null(&weak->dst_node)) { @@ -508,11 +508,11 @@ static struct ovsdb_error * ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row) { - struct ovsdb_weak_ref *weak, *next, *dst_weak; + struct ovsdb_weak_ref *weak, *dst_weak; struct ovsdb_row *dst_row; /* Find and clean up deleted references from destination rows. */ - LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { + LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->deleted_refs) { dst_row = CONST_CAST(struct ovsdb_row *, ovsdb_table_get_row(weak->dst_table, &weak->dst)); if (dst_row) { @@ -529,7 +529,7 @@ ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED, } /* Insert the weak references added in the new version of the row. */ - LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) { + LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->added_refs) { dst_row = CONST_CAST(struct ovsdb_row *, ovsdb_table_get_row(weak->dst_table, &weak->dst)); @@ -597,7 +597,7 @@ find_and_add_weak_ref(struct ovsdb_txn_row *txn_row, static struct ovsdb_error * OVS_WARN_UNUSED_RESULT assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) { - struct ovsdb_weak_ref *weak, *next; + struct ovsdb_weak_ref *weak; struct ovsdb_table *table; struct shash_node *node; @@ -642,7 +642,7 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row) /* Collecting all key-value pairs that references deleted rows. */ ovsdb_datum_init_empty(&deleted_refs); - LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) { + LIST_FOR_EACH_SAFE (weak, src_node, &txn_row->deleted_refs) { if (column->index == weak->column_idx) { ovsdb_datum_add_unsafe(&deleted_refs, &weak->key, &weak->value, &column->type, NULL); @@ -1094,10 +1094,10 @@ static void ovsdb_txn_destroy_cloned(struct ovsdb_txn *txn) { ovs_assert(!txn->db); - struct ovsdb_txn_table *t, *next_txn_table; - LIST_FOR_EACH_SAFE (t, next_txn_table, node, &txn->txn_tables) { - struct ovsdb_txn_row *r, *next_txn_row; - HMAP_FOR_EACH_SAFE (r, next_txn_row, hmap_node, &t->txn_rows) { + struct ovsdb_txn_table *t; + LIST_FOR_EACH_SAFE (t, node, &txn->txn_tables) { + struct ovsdb_txn_row *r; + HMAP_FOR_EACH_SAFE (r, hmap_node, &t->txn_rows) { if (r->old) { ovsdb_row_destroy(r->old); } @@ -1549,19 +1549,19 @@ for_each_txn_row(struct ovsdb_txn *txn, serial++; do { - struct ovsdb_txn_table *t, *next_txn_table; + struct ovsdb_txn_table *t; any_work = false; - LIST_FOR_EACH_SAFE (t, next_txn_table, node, &txn->txn_tables) { + LIST_FOR_EACH_SAFE (t, node, &txn->txn_tables) { if (t->serial != serial) { t->serial = serial; t->n_processed = 0; } while (t->n_processed < hmap_count(&t->txn_rows)) { - struct ovsdb_txn_row *r, *next_txn_row; + struct ovsdb_txn_row *r; - HMAP_FOR_EACH_SAFE (r, next_txn_row, hmap_node, &t->txn_rows) { + HMAP_FOR_EACH_SAFE (r, hmap_node, &t->txn_rows) { if (r->serial != serial) { struct ovsdb_error *error; @@ -1629,8 +1629,8 @@ ovsdb_txn_history_destroy(struct ovsdb *db) return; } - struct ovsdb_txn_history_node *txn_h_node, *next; - LIST_FOR_EACH_SAFE (txn_h_node, next, node, &db->txn_history) { + struct ovsdb_txn_history_node *txn_h_node; + LIST_FOR_EACH_SAFE (txn_h_node, node, &db->txn_history) { ovs_list_remove(&txn_h_node->node); ovsdb_txn_destroy_cloned(txn_h_node->txn); free(txn_h_node); diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c index 726c138bf0..7d3003bca3 100644 --- a/ovsdb/trigger.c +++ b/ovsdb/trigger.c @@ -146,14 +146,14 @@ ovsdb_trigger_prereplace_db(struct ovsdb_trigger *trigger) bool ovsdb_trigger_run(struct ovsdb *db, long long int now) { - struct ovsdb_trigger *t, *next; + struct ovsdb_trigger *t; bool run_triggers = db->run_triggers; db->run_triggers_now = db->run_triggers = false; bool disconnect_all = false; - LIST_FOR_EACH_SAFE (t, next, node, &db->triggers) { + LIST_FOR_EACH_SAFE (t, node, &db->triggers) { if (run_triggers || now - t->created >= t->timeout_msec || t->progress || t->txn_forward) { diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py index 4ecdcaa197..b87099ff52 100644 --- a/python/ovs/db/idl.py +++ b/python/ovs/db/idl.py @@ -140,6 +140,47 @@ class ConditionState(object): return False +class IdlTable(object): + def __init__(self, idl, table): + assert(isinstance(table, ovs.db.schema.TableSchema)) + self._table = table + self.need_table = False + self.rows = custom_index.IndexedRows(self) + self.idl = idl + self._condition_state = ConditionState() + self.columns = {k: IdlColumn(v) for k, v in table.columns.items()} + + def __getattr__(self, attr): + return getattr(self._table, attr) + + @property + def condition_state(self): + # read-only, no setter + return self._condition_state + + @property + def condition(self): + return self.condition_state.latest + + @condition.setter + def condition(self, condition): + assert(isinstance(condition, list)) + self.idl.cond_change(self.name, condition) + + @classmethod + def schema_tables(cls, idl, schema): + return {k: cls(idl, v) for k, v in schema.tables.items()} + + +class IdlColumn(object): + def __init__(self, column): + self._column = column + self.alert = True + + def __getattr__(self, attr): + return getattr(self._column, attr) + + class Idl(object): """Open vSwitch Database Interface Definition Language (OVSDB IDL). @@ -241,7 +282,7 @@ class Idl(object): assert isinstance(schema_helper, SchemaHelper) schema = schema_helper.get_idl_schema() - self.tables = schema.tables + self.tables = IdlTable.schema_tables(self, schema) self.readonly = schema.readonly self._db = schema remotes = self._parse_remotes(remote) @@ -282,15 +323,6 @@ class Idl(object): self.cond_changed = False self.cond_seqno = 0 - for table in schema.tables.values(): - for column in table.columns.values(): - if not hasattr(column, 'alert'): - column.alert = True - table.need_table = False - table.rows = custom_index.IndexedRows(table) - table.idl = self - table.condition = ConditionState() - def _parse_remotes(self, remote): # If remote is - # "tcp:10.0.0.1:6641,unix:/tmp/db.sock,t,s,tcp:10.0.0.2:6642" @@ -330,7 +362,7 @@ class Idl(object): def ack_conditions(self): """Mark all requested table conditions as acked""" for table in self.tables.values(): - table.condition.ack() + table.condition_state.ack() def sync_conditions(self): """Synchronize condition state when the FSM is restarted @@ -356,14 +388,17 @@ class Idl(object): flushing the local cached DB contents. """ ack_all = self.last_id == str(uuid.UUID(int=0)) + if ack_all: + self.cond_changed = False + for table in self.tables.values(): if ack_all: - table.condition.request() - table.condition.ack() + table.condition_state.request() + table.condition_state.ack() else: - if table.condition.reset(): + if table.condition_state.reset(): self.last_id = str(uuid.UUID(int=0)) - self.cond_changed = True + self.cond_changed = True def restart_fsm(self): # Resync data DB table conditions to avoid missing updated due to @@ -482,7 +517,7 @@ class Idl(object): sh.register_table(self._server_db_table) schema = sh.get_idl_schema() self._server_db = schema - self.server_tables = schema.tables + self.server_tables = IdlTable.schema_tables(self, schema) self.__send_server_monitor_request() except error.Error as e: vlog.err("%s: error receiving server schema: %s" @@ -588,10 +623,10 @@ class Idl(object): for table in self.tables.values(): # Always use the most recent conditions set by the IDL client when # requesting monitor_cond_change - if table.condition.new is not None: + if table.condition_state.new is not None: change_requests[table.name] = [ - {"where": table.condition.new}] - table.condition.request() + {"where": table.condition_state.new}] + table.condition_state.request() if not change_requests: return @@ -627,19 +662,20 @@ class Idl(object): cond = [False] # Compare the new condition to the last known condition - if table.condition.latest != cond: - table.condition.init(cond) + if table.condition_state.latest != cond: + table.condition_state.init(cond) self.cond_changed = True # New condition will be sent out after all already requested ones # are acked. - if table.condition.new: - any_reqs = any(t.condition.request for t in self.tables.values()) + if table.condition_state.new: + any_reqs = any(t.condition_state.request + for t in self.tables.values()) return self.cond_seqno + int(any_reqs) + 1 # Already requested conditions should be up to date at # self.cond_seqno + 1 while acked conditions are already up to date - return self.cond_seqno + int(bool(table.condition.requested)) + return self.cond_seqno + int(bool(table.condition_state.requested)) def wait(self, poller): """Arranges for poller.block() to wake up when self.run() has something @@ -811,8 +847,8 @@ class Idl(object): columns.append(column) monitor_request = {"columns": columns} if method in ("monitor_cond", "monitor_cond_since") and ( - not ConditionState.is_true(table.condition.acked)): - monitor_request["where"] = table.condition.acked + not ConditionState.is_true(table.condition_state.acked)): + monitor_request["where"] = table.condition_state.acked monitor_requests[table.name] = [monitor_request] args = [self._db.name, str(self.uuid), monitor_requests] @@ -1148,13 +1184,6 @@ class Idl(object): return True -def _uuid_to_row(atom, base): - if base.ref_table: - return base.ref_table.rows.get(atom) - else: - return atom - - def _row_to_uuid(value): if isinstance(value, Row): return value.uuid @@ -1268,6 +1297,17 @@ class Row(object): data=", ".join("{col}={val}".format(col=c, val=getattr(self, c)) for c in sorted(self._table.columns))) + def _uuid_to_row(self, atom, base): + if base.ref_table: + try: + table = self._idl.tables[base.ref_table.name] + except KeyError as e: + msg = "Table {} is not registered".format(base.ref_table.name) + raise AttributeError(msg) from e + return table.rows.get(atom) + else: + return atom + def __getattr__(self, column_name): assert self._changes is not None assert self._mutations is not None @@ -1309,7 +1349,7 @@ class Row(object): datum = data.Datum.from_python(column.type, dlist, _row_to_uuid) elif column.type.is_map(): - dmap = datum.to_python(_uuid_to_row) + dmap = datum.to_python(self._uuid_to_row) if inserts is not None: dmap.update(inserts) if removes is not None: @@ -1326,7 +1366,7 @@ class Row(object): else: datum = inserts - return datum.to_python(_uuid_to_row) + return datum.to_python(self._uuid_to_row) def __setattr__(self, column_name, value): assert self._changes is not None @@ -1410,7 +1450,7 @@ class Row(object): if value: try: old_value = data.Datum.to_python(self._data[column_name], - _uuid_to_row) + self._uuid_to_row) except error.Error: return if key not in old_value: diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py index c4c6c87e9f..6b0d023ae3 100644 --- a/python/ovs/reconnect.py +++ b/python/ovs/reconnect.py @@ -44,7 +44,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return None @staticmethod @@ -56,7 +56,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return None @staticmethod @@ -68,7 +68,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered + fsm.backoff @staticmethod @@ -80,7 +80,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered + max(1000, fsm.backoff) @staticmethod @@ -92,13 +92,24 @@ class Reconnect(object): is_connected = True @staticmethod - def deadline(fsm): + def deadline(fsm, now): if fsm.probe_interval: base = max(fsm.last_activity, fsm.state_entered) expiration = base + fsm.probe_interval - if (fsm.last_receive_attempt is None or + if (now < expiration or + fsm.last_receive_attempt is None or fsm.last_receive_attempt >= expiration): + # We still have time before the expiration or the time has + # already passed and there was no activity. In the first + # case we need to wait for the expiration, in the second - + # we're already past the deadline. */ return expiration + else: + # Time has already passed, but we didn't attempt to receive + # anything. We need to wake up and try to receive even if + # nothing is pending, so we can update the expiration time + # or transition to a different state. + return now + 1 return None @staticmethod @@ -114,12 +125,15 @@ class Reconnect(object): is_connected = True @staticmethod - def deadline(fsm): + def deadline(fsm, now): if fsm.probe_interval: expiration = fsm.state_entered + fsm.probe_interval - if (fsm.last_receive_attempt is None or + if (now < expiration or + fsm.last_receive_attempt is None or fsm.last_receive_attempt >= expiration): return expiration + else: + return now + 1 return None @staticmethod @@ -134,7 +148,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered @staticmethod @@ -545,7 +559,7 @@ class Reconnect(object): returned if the "probe interval" is nonzero--see self.set_probe_interval()).""" - deadline = self.state.deadline(self) + deadline = self.state.deadline(self, now) if deadline is not None and now >= deadline: return self.state.run(self, now) else: @@ -562,7 +576,7 @@ class Reconnect(object): """Returns the number of milliseconds after which self.run() should be called if nothing else notable happens in the meantime, or None if this is currently unnecessary.""" - deadline = self.state.deadline(self) + deadline = self.state.deadline(self, now) if deadline is not None: remaining = deadline - now return max(0, remaining) diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in index 16ef1ac3ab..d0ae78e4ed 100644 --- a/rhel/openvswitch-fedora.spec.in +++ b/rhel/openvswitch-fedora.spec.in @@ -455,6 +455,7 @@ fi %{_datadir}/openvswitch/scripts/ovs-ctl %{_datadir}/openvswitch/scripts/ovs-kmod-ctl %{_datadir}/openvswitch/scripts/ovs-systemd-reload +%config %{_datadir}/openvswitch/local-config.ovsschema %config %{_datadir}/openvswitch/vswitch.ovsschema %config %{_datadir}/openvswitch/vtep.ovsschema %{_bindir}/ovs-appctl @@ -476,6 +477,7 @@ fi %{_mandir}/man1/ovsdb-server.1* %{_mandir}/man1/ovsdb-tool.1* %{_mandir}/man5/ovsdb-server.5* +%{_mandir}/man5/ovsdb.local-config.5* %{_mandir}/man5/ovs-vswitchd.conf.db.5* %{_mandir}/man5/ovsdb.5* %{_mandir}/man5/vtep.5* diff --git a/rhel/openvswitch.spec.in b/rhel/openvswitch.spec.in index 220e5c7472..2d8ff18bb0 100644 --- a/rhel/openvswitch.spec.in +++ b/rhel/openvswitch.spec.in @@ -229,6 +229,7 @@ exit 0 /usr/share/man/man1/ovsdb-client.1.gz /usr/share/man/man1/ovsdb-server.1.gz /usr/share/man/man1/ovsdb-tool.1.gz +/usr/share/man/man5/ovsdb.local-config.5.gz /usr/share/man/man5/ovsdb-server.5.gz /usr/share/man/man5/ovs-vswitchd.conf.db.5.gz %{_mandir}/man5/ovsdb.5* @@ -262,6 +263,7 @@ exit 0 /usr/share/openvswitch/scripts/ovs-vtep /usr/share/openvswitch/scripts/sysconfig.template /usr/share/openvswitch/scripts/ovs-monitor-ipsec +/usr/share/openvswitch/local-config.ovsschema /usr/share/openvswitch/vswitch.ovsschema /usr/share/openvswitch/vtep.ovsschema %doc NOTICE README.rst NEWS rhel/README.RHEL.rst diff --git a/tests/alb.at b/tests/alb.at index 2bef06f39c..922185d61d 100644 --- a/tests/alb.at +++ b/tests/alb.at @@ -96,6 +96,52 @@ OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ALB - cross-numa]) +OVS_VSWITCHD_START([add-port br0 p0 \ + -- set Interface p0 type=dummy-pmd options:n_rxq=4 \ + -- set Interface p0 options:numa_id=0 \ + -- set Open_vSwitch . other_config:pmd-cpu-mask=0x3 \ + -- set open_vswitch . other_config:pmd-rxq-assign=group \ + -- set open_vswitch . other_config:pmd-rxq-isolate=false \ + -- set open_vswitch . other_config:pmd-auto-lb="true" \ + -- set open_vswitch . other_config:pmd-auto-lb-load-threshold=0], + [], [], [--dummy-numa 1,2,1,2]) +OVS_WAIT_UNTIL([grep "PMD auto load balance is enabled" ovs-vswitchd.log]) +AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg]) + +# no pinned rxqs - cross-numa pmd could change +get_log_next_line_num +ovs-appctl time/warp 600000 10000 +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) + +# all pinned rxqs - cross-numa pmd will not change +AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1,3:1']) +get_log_next_line_num +ovs-appctl time/warp 600000 10000 +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "Variance improvement 0%."]) + +# mix of pinned (non-isolated) and non-pinned rxqs - cross-numa pmd could change +AT_CHECK([ovs-vsctl remove Interface p0 other_config pmd-rxq-affinity]) +AT_CHECK([ovs-vsctl set Interface p0 other_config:pmd-rxq-affinity='0:0,1:0,2:1']) +get_log_next_line_num +ovs-appctl time/warp 600000 10000 +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) + +# mix of pinned (isolated) and non-pinned rxqs - cross-numa pmd could change +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0xf]) +AT_CHECK([ovs-vsctl set Interface p0 options:n_rxq=6]) +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-isolate=true]) +get_log_next_line_num +ovs-appctl time/warp 600000 10000 +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance performing dry run."]) +OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD auto load balance detected cross-numa polling"]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ALB - PMD/RxQ assignment type]) OVS_VSWITCHD_START([add-port br0 p0 \ -- set Interface p0 type=dummy-pmd options:n_rxq=3 \ @@ -197,7 +243,25 @@ get_log_next_line_num AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-rebal-interval="0"]) CHECK_ALB_PARAM([interval], [1 mins], [+$LINENUM]) -# No check for above max as it is only a documented max value and not a hard limit +# Set new value +get_log_next_line_num +AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-rebal-interval="100"]) +CHECK_ALB_PARAM([interval], [100 mins], [+$LINENUM]) + +# Set above max value +get_log_next_line_num +AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-rebal-interval="20001"]) +CHECK_ALB_PARAM([interval], [1 mins], [+$LINENUM]) + +# Set new value +get_log_next_line_num +AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-rebal-interval="1000"]) +CHECK_ALB_PARAM([interval], [1000 mins], [+$LINENUM]) + +# Set Negative value +get_log_next_line_num +AT_CHECK([ovs-vsctl set open_vswitch . other_config:pmd-auto-lb-rebal-interval="-1"]) +CHECK_ALB_PARAM([interval], [1 mins], [+$LINENUM]) OVS_VSWITCHD_STOP AT_CLEANUP diff --git a/tests/classifier.at b/tests/classifier.at index cdcd72c156..f652b59837 100644 --- a/tests/classifier.at +++ b/tests/classifier.at @@ -129,6 +129,31 @@ Datapath actions: 3 OVS_VSWITCHD_STOP(["/'prefixes' with incompatible field: ipv6_label/d"]) AT_CLEANUP +AT_SETUP([flow classifier - ipv6 ND dependency]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 +AT_DATA([flows.txt], [dnl + table=0,priority=100,ipv6,ipv6_src=1000::/10 actions=resubmit(,1) + table=0,priority=0 actions=NORMAL + table=1,priority=110,ipv6,ipv6_dst=1000::3 actions=resubmit(,2) + table=1,priority=100,ipv6,ipv6_dst=1000::4 actions=resubmit(,2) + table=1,priority=0 actions=NORMAL + table=2,priority=120,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1 actions=NORMAL + table=2,priority=100,tcp actions=NORMAL + table=2,priority=100,icmp6 actions=NORMAL + table=2,priority=0 actions=NORMAL +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +# test ICMPv6 echo request (which should have no nd_target field) +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=1,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,icmpv6_type=128,icmpv6_code=0"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=1,dl_src=f6:d2:b0:19:5e:7b,dl_dst=d2:49:19:91:78:fe,ipv6_src=1000::/10,ipv6_dst=1000::4,nw_ttl=0,nw_frag=no +Datapath actions: 100,2 +]) +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([conjunctive match]) AT_SETUP([single conjunctive match]) diff --git a/tests/drop-stats.at b/tests/drop-stats.at index f3e19cd83b..1d3af98dab 100644 --- a/tests/drop-stats.at +++ b/tests/drop-stats.at @@ -83,6 +83,9 @@ AT_CHECK([ ovs-ofctl -Oopenflow13 add-flows br0 flows.txt ovs-ofctl -Oopenflow13 dump-flows br0 | ofctl_strip | sort | grep actions ], [0], [ignore]) +ovs-appctl time/warp 15000 +AT_CHECK([ovs-appctl revalidator/wait]) + AT_CHECK([ ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=3a:6d:d2:09:9c:ab,dst=1e:2c:e9:2a:66:9e),ipv4(src=192.168.10.10,dst=192.168.10.30,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' ], [0], [ignore]) diff --git a/tests/library.at b/tests/library.at index db4997d8f0..6489be2c15 100644 --- a/tests/library.at +++ b/tests/library.at @@ -252,7 +252,7 @@ AT_CHECK([ovstest test-barrier], [0], []) AT_CLEANUP AT_SETUP([rcu]) -AT_CHECK([ovstest test-rcu-quiesce], [0], []) +AT_CHECK([ovstest test-rcu], [0], []) AT_CLEANUP AT_SETUP([stopwatch module]) diff --git a/tests/mcast-snooping.at b/tests/mcast-snooping.at index 757cf7186e..fe475e7b38 100644 --- a/tests/mcast-snooping.at +++ b/tests/mcast-snooping.at @@ -216,3 +216,70 @@ AT_CHECK([ovs-appctl mdb/show br0], [0], [dnl ]) AT_CLEANUP + + +AT_SETUP([mcast - igmp flood for non-snoop enabled]) +OVS_VSWITCHD_START([]) + +AT_CHECK([ + ovs-vsctl set bridge br0 \ + datapath_type=dummy], [0]) + +add_of_ports br0 1 2 + +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) + +ovs-appctl time/stop + +dnl Basic scenario - needs to flood for IGMP followed by unicast ICMP +dnl in reverse direction +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ + '0101000c29a0aa55aa550001080046c00028000040000102d3494565eb4ae0000016940400002200f9020000000104000000e00000fb000000000000']) +AT_CHECK([ovs-appctl netdev-dummy/receive p2 \ + 'aa55aa5500010101000c29a008004500001c00010000400164dc0a0101010a0101020800f7ffffffffff']) + + +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -e .*ipv4 | sort | dnl + strip_stats | strip_used | strip_recirc | dnl + sed -e 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], + [0], [dnl +recirc_id(),in_port(1),eth(src=aa:55:aa:55:00:01,dst=01:01:00:0c:29:a0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:100,2 +recirc_id(),in_port(2),eth(src=01:01:00:0c:29:a0,dst=aa:55:aa:55:00:01),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:1 +]) + +ovs-appctl time/warp 100000 + +dnl Next we should clear the flows and install a complex case +AT_CHECK([ovs-ofctl del-flows br0]) + +AT_DATA([flows.txt], [dnl +table=0, arp actions=NORMAL +table=0, ip,in_port=1 actions=ct(table=1,zone=64000) +table=0, in_port=2 actions=output:1 +table=1, ip,ct_state=+trk+inv actions=drop +table=1 ip,in_port=1,icmp,ct_state=+trk+new actions=output:2 +table=1, in_port=1,ip,ct_state=+trk+new actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) +table=1, in_port=1,ip,ct_state=+trk+est actions=output:2 +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +ovs-appctl time/warp 100000 + +dnl Send the IGMP, followed by a unicast ICMP - ensure we won't black hole +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ + '0101000c29a0aa55aa550001080046c00028000040000102d3494565eb4ae0000016940400002200f9020000000104000000e00000fb000000000000']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ + 'aa55aa550001aa55aa55000208004500001c00010000400164dc0a0101010a0101020800f7ffffffffff']) + + +AT_CHECK([ovs-appctl dpctl/dump-flows | grep -e .*ipv4 | sort | dnl + strip_stats | strip_used | strip_recirc | dnl + sed 's/pid=[[0-9]]*,// + s/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], + [0], [dnl +ct_state(+new-inv+trk),recirc_id(),in_port(1),eth_type(0x0800),ipv4(proto=1,frag=no), packets:0, bytes:0, used:never, actions:2 +ct_state(+new-inv+trk),recirc_id(),in_port(1),eth_type(0x0800),ipv4(proto=2,frag=no), packets:0, bytes:0, used:never, actions:userspace(controller(reason=1,dont_send=0,continuation=0,recirc_id=,rule_cookie=0,controller_id=0,max_len=65535)) +recirc_id(),in_port(1),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:0.0s, actions:ct(zone=64000),recirc() +]) + +AT_CLEANUP diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 7c2edeb9d4..c923ed6606 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -29,6 +29,39 @@ AT_CHECK([ovs-appctl revalidator/wait]) OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif - lldp revalidator event(REV_RECONFIGURE)]) +OVS_VSWITCHD_START( + [add-port br0 p1 -- set interface p1 ofport_request=1 type=dummy] +) +dnl first revalidation triggered by add interface +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +1 +]) + +dnl enable lldp +AT_CHECK([ovs-vsctl set interface p1 lldp:enable=true]) +AT_CHECK([ovs-appctl revalidator/wait]) +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +2 +]) + +dnl disable lldp +AT_CHECK([ovs-vsctl set interface p1 lldp:enable=false]) +AT_CHECK([ovs-appctl revalidator/wait]) +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +3 +]) + +dnl remove lldp, no revalidation as lldp was disabled +AT_CHECK([ovs-vsctl remove interface p1 lldp enable]) +AT_CHECK([ovs-appctl revalidator/wait]) +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +3 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - active-backup bonding (with primary)]) dnl Create br0 with members p1, p2 and p7, creating bond0 with p1 and @@ -81,11 +114,12 @@ recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff: ovs-appctl netdev-dummy/set-admin-state p1 up ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -99,7 +133,6 @@ member p1: enabled member p2: enabled may_enable: true - ]) OVS_VSWITCHD_STOP @@ -129,11 +162,12 @@ ovs-appctl time/warp 100 OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'member p1: disabled'`"]) ovs-appctl netdev-dummy/set-admin-state p1 up ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -150,7 +184,6 @@ member p2: enabled member p3: enabled may_enable: true - ]) dnl Now delete the primary and verify that the output shows that the @@ -171,11 +204,12 @@ ovs-vsctl \ --id=@p1 create Interface name=p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ set Port bond0 interfaces="$uuids, @p1]" ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -192,17 +226,17 @@ member p2: enabled member p3: enabled may_enable: true - ]) dnl Switch to another primary ovs-vsctl set port bond0 other_config:bond-primary=p2 ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -211,25 +245,25 @@ active-backup primary: p2 member p1: enabled - active member may_enable: true member p2: enabled + active member may_enable: true member p3: enabled may_enable: true - ]) dnl Remove the "bond-primary" config directive from the bond. AT_CHECK([ovs-vsctl remove Port bond0 other_config bond-primary]) ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl +OVS_WAIT_UNTIL_EQUAL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: off @@ -238,15 +272,14 @@ active-backup primary: member p1: enabled - active member may_enable: true member p2: enabled + active member may_enable: true member p3: enabled may_enable: true - ]) OVS_VSWITCHD_STOP @@ -5573,7 +5606,36 @@ check_flows () { echo "n_packets=$n" test "$n" = 1 } -OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0]) +OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + +# Checks for regression against a bug in which OVS crashed +# with in_port=OFPP_NONE or in_port=OFPP_CONTROLLER and +# recirculation is involved. +AT_SETUP([ofproto-dpif - packet-out recirculation with OFPP_NONE and OFPP_CONTROLLER]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 + +AT_DATA([flows.txt], [dnl +table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1) +table=1 ip actions=ct(commit),normal +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000 +AT_CHECK([ovs-ofctl packet-out br0 "in_port=none,packet=$packet actions=table"]) +AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller,packet=$packet actions=table"]) + +# Dumps out the flow table, extracts the number of packets that have gone +# through the (single) flow in table 1, and returns success if it's exactly 2. +check_flows () { + n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p') + echo "n_packets=$n" + test "$n" = 2 +} +OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -7600,13 +7662,28 @@ dnl configure bridge IPFIX and ensure that sample action generation works at the dnl datapath level. AT_SETUP([ofproto-dpif - Bridge IPFIX sanity check]) OVS_VSWITCHD_START +dnl first revalidation triggered by add interface +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +1 +]) + add_of_ports br0 1 2 3 +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +2 +]) dnl Sample every packet using bridge-based sampling. AT_CHECK([ovs-vsctl -- set bridge br0 ipfix=@fix -- \ --id=@fix create ipfix targets=\"127.0.0.1:4739\" \ - sampling=1], [0], [ignore]) + sampling=2], [0], [ignore]) +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +3 +]) +AT_CHECK([ovs-vsctl set ipfix `ovs-vsctl get bridge br0 ipfix` sampling=1], [0]) +AT_CHECK([ovs-appctl coverage/read-counter rev_reconfigure], [0], [dnl +4 +]) dnl Send some packets that should be sampled. for i in `seq 1 3`; do AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800)']) diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index 736d9809cb..b18f0fbc1e 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -134,6 +134,21 @@ strip_ufid () { sed 's/mega_ufid:[[-0-9a-f]]* // s/ufid:[[-0-9a-f]]* //' } + +# Strips packets: and bytes: from output +strip_stats () { + sed 's/packets:[[0-9]]*/packets:0/ + s/bytes:[[0-9]]*/bytes:0/' +} + +# Changes all 'recirc(...)' and 'recirc=...' to say 'recirc()' and +# 'recirc=' respectively. This should make output easier to +# compare. +strip_recirc() { + sed 's/recirc_id([[x0-9]]*)/recirc_id()/ + s/recirc_id=[[x0-9]]*/recirc_id=/ + s/recirc([[x0-9]]*)/recirc()/' +} m4_divert_pop([PREPARE_TESTS]) m4_define([TESTABLE_LOG], [-vPATTERN:ANY:'%c|%p|%m']) @@ -175,6 +190,7 @@ m4_define([_OVS_VSWITCHD_START], /dpdk|INFO|DPDK Disabled - Use other_config:dpdk-init to enable/d /netlink_socket|INFO|netlink: could not enable listening to all nsid/d /probe tc:/d +/setting extended ack support failed/d /tc: Using policy/d']]) ]) @@ -239,6 +255,7 @@ check_logs () { /timeval.*context switches: [[0-9]]* voluntary, [[0-9]]* involuntary/d /ovs_rcu.*blocked [[0-9]]* ms waiting for .* to quiesce/d /Dropped [[0-9]]* log messages/d +/setting extended ack support failed/d /|WARN|/p /|ERR|/p /|EMER|/p" ${logs} diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at index 66545da572..e6c5bc6e94 100644 --- a/tests/ovs-macros.at +++ b/tests/ovs-macros.at @@ -259,7 +259,20 @@ dnl Executes shell COMMAND in a loop until it returns zero. If COMMAND does dnl not return zero within a reasonable time limit, executes the commands dnl in IF-FAILED (if provided) and fails the test. m4_define([OVS_WAIT_UNTIL], - [OVS_WAIT([$1], [$2], [AT_LINE], [until $1])]) + [AT_FAIL_IF([test "$#" -ge 3]) + dnl The second argument should not be a number (confused with AT_CHECK ?). + AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null]) + OVS_WAIT([$1], [$2], [AT_LINE], [until $1])]) + +dnl OVS_WAIT_UNTIL_EQUAL(COMMAND, OUTPUT) +dnl +dnl Executes shell COMMAND in a loop until it returns zero and the output +dnl equals OUTPUT. If COMMAND does not return zero or a desired output within +dnl a reasonable time limit, fails the test. +m4_define([OVS_WAIT_UNTIL_EQUAL], + [AT_FAIL_IF([test "$#" -ge 3]) + echo "$2" > wait_until_expected + OVS_WAIT_UNTIL([$1 | diff -u wait_until_expected - ])]) dnl OVS_WAIT_WHILE(COMMAND, [IF-FAILED]) dnl @@ -267,7 +280,10 @@ dnl Executes shell COMMAND in a loop until it returns nonzero. If COMMAND does dnl not return nonzero within a reasonable time limit, executes the commands dnl in IF-FAILED (if provided) and fails the test. m4_define([OVS_WAIT_WHILE], - [OVS_WAIT([if $1; then return 1; else return 0; fi], [$2], + [AT_FAIL_IF([test "$#" -ge 3]) + dnl The second argument should not be a number (confused with AT_CHECK ?). + AT_FAIL_IF([test "$#" -eq 2 && test "$2" -eq "$2" 2>/dev/null]) + OVS_WAIT([if $1; then return 1; else return 0; fi], [$2], [AT_LINE], [while $1])]) dnl OVS_APP_EXIT_AND_WAIT(DAEMON) diff --git a/tests/ovs-vswitchd.at b/tests/ovs-vswitchd.at index bba4fea2bc..977b2eba1f 100644 --- a/tests/ovs-vswitchd.at +++ b/tests/ovs-vswitchd.at @@ -121,6 +121,7 @@ OVS_APP_EXIT_AND_WAIT_BY_TARGET(["`pwd`"/unixctl2], [ovs-vswitchd-2.pid]) # the process. AT_CHECK([sed -n " /|ERR|another ovs-vswitchd process is running/d +/setting extended ack support failed/d /|WARN|/p /|ERR|/p /|EMER|/p" fakelog @@ -148,6 +149,7 @@ AT_CHECK([grep "wakeup due to" ovs-vswitchd.log], [ignore]) # check the log, should not see any WARN/ERR/EMER log. AT_CHECK([sed -n " +/setting extended ack support failed/d /|WARN|/p /|ERR|/p /|EMER|/p" ovs-vswitchd.log diff --git a/tests/ovsdb-client.at b/tests/ovsdb-client.at index 06b671df8c..2d14f1ac26 100644 --- a/tests/ovsdb-client.at +++ b/tests/ovsdb-client.at @@ -3,6 +3,7 @@ AT_BANNER([OVSDB -- ovsdb-client commands]) AT_SETUP([ovsdb-client get-schema-version and get-schema-cksum]) AT_KEYWORDS([ovsdb client positive]) ordinal_schema > schema +on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [ignore], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) AT_CHECK([ovsdb-client get-schema-version unix:socket ordinals], [0], [5.1.3 @@ -14,6 +15,7 @@ AT_CLEANUP AT_SETUP([ovsdb-client needs-conversion (no conversion needed)]) AT_KEYWORDS([ovsdb client file positive]) +on_exit 'kill `cat *.pid`' ordinal_schema > schema touch .db.~lock~ AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) @@ -27,6 +29,7 @@ AT_SETUP([ovsdb-client needs-conversion (conversion needed)]) AT_KEYWORDS([ovsdb client file positive]) ordinal_schema > schema touch .db.~lock~ +on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) sed 's/5\.1\.3/5.1.4/' < schema > schema2 diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index fc6253cfe9..920b833b72 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -1,12 +1,25 @@ OVS_START_SHELL_HELPERS -# ovsdb_check_cluster N_SERVERS SCHEMA_FUNC OUTPUT TRANSACTION... +# ovsdb_check_cluster N_SERVERS SCHEMA_FUNC OUTPUT USE_LOCAL_CONFIG TRANSACTION... ovsdb_check_cluster () { - local n=$1 schema_func=$2 output=$3 - shift; shift; shift + set -x + local n=$1 schema_func=$2 output=$3 local_config=$4 + shift; shift; shift; shift $schema_func > schema schema=`ovsdb-tool schema-name schema` AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) + if test X$local_config == X"yes"; then + for i in `seq $n`; do + AT_CHECK([ovsdb-tool create c$i.db $top_srcdir/ovsdb/local-config.ovsschema], [0], [], [stderr]) + local ctxn="[[\"Local_Config\", + {\"op\": \"insert\", \"table\": \"Config\", + \"row\": {\"connections\": [\"named-uuid\",\"conn$n\"]}}, + {\"op\": \"insert\", \"table\": \"Connection\", \"uuid-name\": \"conn$n\", + \"row\": {\"target\": \"punix:s$i.ovsdb\"}}]]" + + AT_CHECK([ovsdb-tool transact c$i.db "$ctxn"], [0], [ignore], [stderr]) + done + fi AT_CHECK([grep -v 'from ephemeral to persistent' stderr], [1]) cid=`ovsdb-tool db-cid s1.db` for i in `seq 2 $n`; do @@ -15,7 +28,13 @@ ovsdb_check_cluster () { on_exit 'kill `cat *.pid`' for i in `seq $n`; do - AT_CHECK([ovsdb-server -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) + local remote=punix:s$i.ovsdb + local config_db= + if test X$local_config == X"yes"; then + remote=db:Local_Config,Config,connections + config_db=c$i.db + fi + AT_CHECK([ovsdb-server -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=$remote s$i.db $config_db]) done for i in `seq $n`; do AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema connected]) @@ -40,7 +59,7 @@ AT_BANNER([OVSDB - clustered transactions (1 server)]) m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1 - cluster of 1]) AT_KEYWORDS([ovsdb server positive unix cluster cluster1 $5]) - ovsdb_check_cluster 1 "$2" '$4' m4_foreach([txn], [$3], ['txn' ]) + ovsdb_check_cluster 1 "$2" '$4' no m4_foreach([txn], [$3], ['txn' ]) AT_CLEANUP]) EXECUTION_EXAMPLES @@ -49,7 +68,7 @@ AT_BANNER([OVSDB - clustered transactions (3 servers)]) m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1 - cluster of 3]) AT_KEYWORDS([ovsdb server positive unix cluster cluster3 $5]) - ovsdb_check_cluster 3 "$2" '$4' m4_foreach([txn], [$3], ['txn' ]) + ovsdb_check_cluster 3 "$2" '$4' no m4_foreach([txn], [$3], ['txn' ]) AT_CLEANUP]) EXECUTION_EXAMPLES @@ -58,7 +77,16 @@ AT_BANNER([OVSDB - clustered transactions (5 servers)]) m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1 - cluster of 5]) AT_KEYWORDS([ovsdb server positive unix cluster cluster5 $5]) - ovsdb_check_cluster 5 "$2" '$4' m4_foreach([txn], [$3], ['txn' ]) + ovsdb_check_cluster 5 "$2" '$4' no m4_foreach([txn], [$3], ['txn' ]) + AT_CLEANUP]) +EXECUTION_EXAMPLES + +# Test a 3-server cluster using a Local_Config db. +AT_BANNER([OVSDB - clustered transactions Local_Config (3 servers)]) +m4_define([OVSDB_CHECK_EXECUTION], + [AT_SETUP([$1 - cluster of 3]) + AT_KEYWORDS([ovsdb server positive unix cluster cluster3 Local_Config $5]) + ovsdb_check_cluster 3 "$2" '$4' yes m4_foreach([txn], [$3], ['txn' ]) AT_CLEANUP]) EXECUTION_EXAMPLES @@ -400,6 +428,61 @@ done AT_CLEANUP +AT_BANNER([OVSDB - cluster failure while joining]) +AT_SETUP([OVSDB cluster - follower crash while joining]) +AT_KEYWORDS([ovsdb server negative unix cluster join]) + +n=3 +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +ordinal_schema > schema +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db dnl + $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) +cid=`ovsdb-tool db-cid s1.db` +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +for i in `seq 2 $n`; do + AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) +done + +on_exit 'kill `cat *.pid`' + +dnl Starting followers first, so we can configure them to crash on join. +for j in `seq $n`; do + i=$(($n + 1 - $j)) + AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl + --detach --no-chdir --log-file=s$i.log dnl + --pidfile=s$i.pid --unixctl=s$i dnl + --remote=punix:s$i.ovsdb s$i.db]) + if test $i != 1; then + OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s$i dnl + cluster/failure-test crash-before-sending-install-snapshot-reply dnl + | grep -q "engaged"]) + fi +done + +dnl Make sure that followers really crashed. +for i in `seq 2 $n`; do + OVS_WAIT_WHILE([test -s s$i.pid]) +done + +dnl Bring them back. +for i in `seq 2 $n`; do + AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off dnl + --detach --no-chdir --log-file=s$i.log dnl + --pidfile=s$i.pid --unixctl=s$i dnl + --remote=punix:s$i.ovsdb s$i.db]) +done + +dnl Make sure that all servers joined the cluster. +for i in `seq $n`; do + AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) +done + +for i in `seq $n`; do + OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) +done + +AT_CLEANUP + OVS_START_SHELL_HELPERS @@ -413,12 +496,12 @@ ovsdb_cluster_failure_test () { if test "$crash_node" == "1"; then new_leader=$5 fi + log_grep=$6 cp $top_srcdir/vswitchd/vswitch.ovsschema schema schema=`ovsdb-tool schema-name schema` - AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl -ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns -]) + AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) + AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"]) n=3 join_cluster() { @@ -434,7 +517,7 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral start_server() { local i=$1 printf "\ns$i: starting\n" - AT_CHECK([ovsdb-server -vjsonrpc -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) + AT_CHECK([ovsdb-server -vjsonrpc -vraft -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) } connect_server() { local i=$1 @@ -460,14 +543,23 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral fi AT_CHECK([ovs-appctl -t "`pwd`"/s$delay_election_node cluster/failure-test delay-election], [0], [ignore]) fi + + # Initializing the database separately to avoid extra 'wait' operation + # in later transactions. + AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait init], [0], [ignore], [ignore]) + AT_CHECK([ovs-appctl -t "`pwd`"/s$crash_node cluster/failure-test $crash_command], [0], [ignore]) AT_CHECK([ovs-vsctl -v --db="$db" --no-leader-only --no-shuffle-remotes --no-wait create QoS type=x], [0], [ignore], [ignore]) - # Make sure that the node really crashed. - AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore]) - # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed). - if test "$remote_1" = "$crash_node"; then - db=unix:s$remote_2.ovsdb + # Make sure that the node really crashed or has specific log message. + if test -z "$log_grep"; then + AT_CHECK([ls s$crash_node.ovsdb], [2], [ignore], [ignore]) + # XXX: Client will fail if remotes contains unix socket that doesn't exist (killed). + if test "$remote_1" = "$crash_node"; then + db=unix:s$remote_2.ovsdb + fi + else + OVS_WAIT_UNTIL([grep -q "$log_grep" s${crash_node}.log]) fi AT_CHECK([ovs-vsctl --db="$db" --no-leader-only --no-wait --columns=type --bare list QoS], [0], [x ]) @@ -563,6 +655,11 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update AT_CLEANUP +AT_SETUP([OVSDB cluster - txn on leader, leader transfers leadership after sending appendReq]) +AT_KEYWORDS([ovsdb server negative unix cluster pending-txn transfer]) +ovsdb_cluster_failure_test 1 2 1 transfer-leadership-after-sending-append-request -1 "Transferring leadership" +AT_CLEANUP + AT_SETUP([OVSDB cluster - competing candidates]) AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates]) @@ -629,9 +726,8 @@ ovsdb_torture_test () { local variant=$3 # 'kill' and restart or 'remove' and add cp $top_srcdir/vswitchd/vswitch.ovsschema schema schema=`ovsdb-tool schema-name schema` - AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [dnl -ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral to persistent, including 'status' column in 'Manager' table, because clusters do not support ephemeral columns -]) + AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db schema unix:s1.raft], [0], [], [stderr]) + AT_CHECK([sed < stderr "/ovsdb|WARN|schema: changed .* columns in 'Open_vSwitch' database from ephemeral to persistent/d"]) join_cluster() { local i=$1 diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at index 876cb836cd..e672c13b27 100644 --- a/tests/ovsdb-server.at +++ b/tests/ovsdb-server.at @@ -4,7 +4,7 @@ m4_define([OVSDB_SERVER_SHUTDOWN], [OVS_APP_EXIT_AND_WAIT_BY_TARGET([ovsdb-server], [ovsdb-server.pid])]) m4_define([OVSDB_SERVER_SHUTDOWN_N], - [cp pid$1 savepid$1 + [cp $1.pid savepid$1 AT_CHECK([ovs-appctl -t "`pwd`"/unixctl$1 -e exit], [0], [ignore], [ignore]) OVS_WAIT_WHILE([kill -0 `cat savepid$1`], [kill `cat savepid$1`])]) @@ -30,14 +30,13 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_KEYWORDS([ovsdb server positive unix $5]) $2 > schema AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -88,8 +87,7 @@ AT_CHECK([uuidfilt output], [0], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] -]], [], - [test ! -e pid || kill `cat pid`]) +]], []) AT_CLEANUP AT_SETUP([truncating database log with bad transaction]) @@ -136,8 +134,7 @@ AT_CHECK([uuidfilt output], [0], [[[{"uuid":["uuid","<0>"]}] [{"uuid":["uuid","<1>"]}] [{"rows":[{"_uuid":["uuid","<0>"],"_version":["uuid","<2>"],"name":"zero","number":0},{"_uuid":["uuid","<1>"],"_version":["uuid","<3>"],"name":"one","number":1}]}] -]], [], - [test ! -e pid || kill `cat pid`]) +]], []) AT_CLEANUP dnl CHECK_DBS([databases]) @@ -159,6 +156,7 @@ ordinal_schema > schema1 constraint_schema > schema2 AT_CHECK([ovsdb-tool create db1 schema1], [0], [ignore], [ignore]) AT_CHECK([ovsdb-tool create db2 schema2], [0], [ignore], [ignore]) +on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:db.sock db1 db2], [0], [ignore], [ignore]) CHECK_DBS([constraints ordinals @@ -166,7 +164,7 @@ ordinals AT_CHECK( [[ovstest test-jsonrpc request unix:db.sock get_schema [\"nonexistent\"]]], [0], [[{"error":{"details":"get_schema request specifies unknown database nonexistent","error":"unknown database","syntax":"[\"nonexistent\"]"},"id":0,"result":null} -]], [], [test ! -e pid || kill `cat pid`]) +]], []) OVSDB_SERVER_SHUTDOWN AT_CLEANUP @@ -393,7 +391,7 @@ AT_CHECK( "table": "Manager", "uuid-name": "x", "row": {"target": "punix:socket2"}}]']], [0], [ignore], [ignore]) -on_exit 'kill `cat ovsdb-server.pid`' +on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=db:mydb,Root,managers --remote=db:mydb,Root,manager_options --log-file db], [0], [ignore], [ignore]) ovs-appctl -t ovsdb-server time/warp 6000 1000 AT_CHECK( @@ -686,6 +684,7 @@ ovsdb_check_online_compaction() { ovsdb-tool create-cluster db schema unix:s1.raft fi]) dnl Start ovsdb-server. + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server -vvlog:off -vconsole:off --detach --no-chdir --pidfile --remote=punix:socket --log-file db], [0]) AT_CHECK([ovsdb_client_wait unix:socket ordinals connected]) AT_CAPTURE_FILE([ovsdb-server.log]) @@ -832,7 +831,7 @@ _uuid name number <0> five 5 <1> four 4 <2> three 3 -], [], [test ! -e pid || kill `cat pid`]) +], []) OVSDB_SERVER_SHUTDOWN } OVS_END_SHELL_HELPERS @@ -1319,15 +1318,14 @@ m4_define([OVSDB_CHECK_EXECUTION], $2 > schema PKIDIR=$abs_top_builddir/tests AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:127.0.0.1 db], [0], [ignore], [ignore]) PARSE_LISTENING_PORT([ovsdb-server.log], [SSL_PORT]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:127.0.0.1:$SSL_PORT 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -1356,16 +1354,15 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_SKIP_IF([test $HAVE_IPV6 = no]) $2 > schema PKIDIR=$abs_top_builddir/tests + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --private-key=$PKIDIR/testpki-privkey2.pem --certificate=$PKIDIR/testpki-cert2.pem --ca-cert=$PKIDIR/testpki-cacert.pem --remote=pssl:0:[[::1]] db], [0], [ignore], [ignore]) PARSE_LISTENING_PORT([ovsdb-server.log], [SSL_PORT]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:[[::1]]:$SSL_PORT 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client --private-key=$PKIDIR/testpki-privkey.pem --certificate=$PKIDIR/testpki-cert.pem --ca-cert=$PKIDIR/testpki-cacert.pem transact ssl:[[::1]]:$SSL_PORT 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -1392,16 +1389,15 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_KEYWORDS([ovsdb server positive tcp $5]) $2 > schema PKIDIR=$abs_top_builddir/tests + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --remote=ptcp:0:127.0.0.1 db], [0], [ignore], [ignore]) PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client transact tcp:127.0.0.1:$TCP_PORT 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -1429,16 +1425,15 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_SKIP_IF([test $HAVE_IPV6 = no]) $2 > schema PKIDIR=$abs_top_builddir/tests + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --log-file --detach --no-chdir --pidfile --remote=ptcp:0:[[::1]] db], [0], [ignore], [ignore]) PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT]) m4_foreach([txn], [$3], - [AT_CHECK([ovsdb-client transact tcp:[[::1]]:$TCP_PORT 'txn'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`]) + [AT_CHECK([ovsdb-client transact tcp:[[::1]]:$TCP_PORT 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) - AT_CHECK([uuidfilt output], [0], [$4], [ignore], - [test ! -e pid || kill `cat pid`]) + AT_CHECK([uuidfilt output], [0], [$4], [ignore]) OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) @@ -1518,9 +1513,9 @@ m4_define([OVSDB_CHECK_EXECUTION], target=4 $2 > schema schema_name=`ovsdb-tool schema-name schema` + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) - on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log dnl --pidfile --remote=punix:db1.sock db1 ], [0], [ignore], [ignore]) @@ -1576,12 +1571,11 @@ m4_define([OVSDB_CHECK_EXECUTION], AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) i - on_exit 'test ! -e pid || kill `cat pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) - on_exit 'test ! -e pid2 || kill `cat pid2`' + AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) m4_foreach([txn], [$3], [AT_CHECK([ovsdb-client transact 'txn'], [0], [stdout], [ignore]) @@ -1622,11 +1616,10 @@ m4_define([OVSDB_CHECK_REPLICATION], AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) + on_exit 'kill `cat *.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) - on_exit 'test ! -e pid || kill `cat pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock --sync-exclude-tables=mydb:b db2], [0], [ignore], [ignore]) - on_exit 'test ! -e pid2 || kill `cat pid2`' + AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock --sync-exclude-tables=mydb:b db2], [0], [ignore], [ignore]) m4_foreach([txn], [$3], [AT_CHECK([ ovsdb-client transact 'txn' ], [0], [stdout], [ignore]) @@ -1694,6 +1687,7 @@ AT_CLEANUP #ovsdb-server/set-sync-exclude-tables command AT_SETUP([ovsdb-server/set-sync-exclude-tables]) +on_exit 'kill `cat *.pid`' AT_KEYWORDS([ovsdb server replication set-exclude-tables]) AT_SKIP_IF([test $DIFF_SUPPORTS_NORMAL_FORMAT = no]) @@ -1702,12 +1696,10 @@ AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-sync-exclude-tables mydb:b], [0], [ignore], [ignore], [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) +AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/set-sync-exclude-tables mydb:b], [0], [ignore], [ignore]) AT_CHECK([ovsdb-client transact unix:db.sock \ '[["mydb", @@ -1716,11 +1708,9 @@ AT_CHECK([ovsdb-client transact unix:db.sock \ "row": {"number": 0, "name": "zero"}}, {"op": "insert", "table": "b", - "row": {"number": 1, "name": "one"}}]]'], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) + "row": {"number": 1, "name": "one"}}]]'], [0], [stdout], [ignore]) -AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout], [ignore], - [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) +AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout], [ignore]) cat stdout > dump1 OVS_WAIT_UNTIL([ ovsdb-client dump unix:db2.sock | grep zero ]) AT_CHECK([ovsdb-client dump unix:db2.sock], [0], [stdout], [ignore]) @@ -1744,16 +1734,15 @@ AT_CLEANUP #ovsdb-server/connect-active-ovsdb-server AT_SETUP([ovsdb-server/connect-active-server]) +on_exit 'kill `cat *.pid`' AT_KEYWORDS([ovsdb server replication connect-active-server]) replication_schema > schema AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 db2], [0], [ignore], [ignore]) dnl Try to connect without specifying the active server. AT_CHECK([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/connect-active-ovsdb-server], [0], @@ -1783,6 +1772,7 @@ AT_CLEANUP #ovsdb-server/disconnect-active-server command AT_SETUP([ovsdb-server/disconnect-active-server]) +on_exit 'kill `cat *.pid`' AT_KEYWORDS([ovsdb server replication disconnect-active-server]) AT_SKIP_IF([test $DIFF_SUPPORTS_NORMAL_FORMAT = no]) @@ -1791,10 +1781,8 @@ AT_CHECK([ovsdb-tool create db1 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-tool create db2 schema], [0], [stdout], [ignore]) AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock db1], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=pid2 --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl=unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) AT_CHECK([ovsdb-client transact unix:db.sock \ '[["mydb", @@ -1840,7 +1828,7 @@ AT_CHECK([uuidfilt output], [0], [7,9c7,8 --- > _uuid name number > ----- ---- ------ -], [ignore], [test ! -e pid || kill `cat pid`; test ! -e pid2 || kill `cat pid2`]) +], [ignore]) dnl The backup server now become active, and can accept write transactions. AT_CHECK([ovsdb-client transact unix:db2.sock \ @@ -1891,13 +1879,12 @@ dnl Start both 'db1' and 'db2' in backup mode. Let them backup from each dnl other. This is not an supported operation state, but to simulate a start dnl up condition where an HA manger can select which one to be an active dnl server soon after. -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --sync-from=unix:db2.sock --active ], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' +on_exit 'kill `cat *.pid`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --sync-from=unix:db2.sock --active ], [0], [ignore], [ignore]) AT_CHECK([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/connect-active-ovsdb-server]) -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 --sync-from=unix:db.sock db2], [0], [ignore], [ignore]) dnl dnl make sure both servers reached the replication state @@ -1965,8 +1952,8 @@ AT_CHECK([ovsdb-tool transact db \ "row": {"number": 9, "name": "nine"}}]]'], [0], [ignore], [ignore]) dnl Start 'db', then try to be a back up server of itself. -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db --sync-from=unix:db.sock --active ], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' +on_exit 'kill `cat *.pid`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db --sync-from=unix:db.sock --active ], [0], [ignore], [ignore]) dnl Save the current content AT_CHECK([ovsdb-client dump unix:db.sock], [0], [stdout]) @@ -1984,6 +1971,7 @@ AT_CHECK([diff dump1 dump2]) AT_CLEANUP AT_SETUP([ovsdb-server/read-only db:ptcp connection]) +on_exit 'kill `cat *.pid`' AT_KEYWORDS([ovsdb server read-only]) AT_DATA([schema], [[{"name": "mydb", @@ -2072,12 +2060,10 @@ AT_CHECK([ovsdb-tool transact db2 \ "row": {"number": 10, "name": "ten"}}]]'], [0], [ignore], [ignore]) dnl Start both 'db1' and 'db2'. -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile="`pwd`"/pid --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore]) -on_exit 'test ! -e pid || kill `cat pid`' - +on_exit 'kill `cat *.pid`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server1.log --pidfile --remote=punix:db.sock --unixctl="`pwd`"/unixctl db1 --active ], [0], [ignore], [ignore]) -AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile="`pwd`"/pid2 --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore]) -on_exit 'test ! -e pid2 || kill `cat pid2`' +AT_CHECK([ovsdb-server --detach --no-chdir --log-file=ovsdb-server2.log --pidfile=2.pid --remote=punix:db2.sock --unixctl="`pwd`"/unixctl2 db2], [0], [ignore], [ignore]) OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl ovsdb-server/sync-status |grep active]) OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/unixctl2 ovsdb-server/sync-status |grep active]) @@ -2177,7 +2163,7 @@ dnl Starting a dummy server only to reserve some tcp port. AT_CHECK([cp db db.tmp]) AT_CHECK([ovsdb-server -vfile -vvlog:off --log-file=listener.log dnl --detach --no-chdir dnl - --pidfile=pid2 --unixctl=unixctl2 dnl + --pidfile=2.pid --unixctl=unixctl2 dnl --remote=ptcp:0:127.0.0.1 dnl db.tmp], [0], [stdout], [stderr]) PARSE_LISTENING_PORT([listener.log], [BAD_TCP_PORT]) diff --git a/tests/pmd.at b/tests/pmd.at index a2f9d34a2a..3962dd2bd9 100644 --- a/tests/pmd.at +++ b/tests/pmd.at @@ -199,7 +199,7 @@ pmd thread numa_id core_id : OVS_VSWITCHD_STOP AT_CLEANUP -AT_SETUP([PMD - pmd-cpu-mask - NUMA]) +AT_SETUP([PMD - pmd-cpu-mask - dual NUMA]) OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy-pmd options:n_rxq=8 options:numa_id=1 -- set Open_vSwitch . other_config:pmd-cpu-mask=1], [], [], [--dummy-numa 1,1,0,0]) @@ -359,6 +359,44 @@ pmd thread numa_id 1 core_id 0: OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([PMD - pmd-cpu-mask - multi NUMA]) +OVS_VSWITCHD_START([add-port br0 p0 \ + -- set Interface p0 type=dummy-pmd options:n_rxq=4 \ + -- set Interface p0 options:numa_id=0 \ + -- set Open_vSwitch . other_config:pmd-cpu-mask=0xf \ + -- set open_vswitch . other_config:pmd-rxq-assign=cycles], + [], [], [--dummy-numa 1,2,1,2]) + +TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=group]) + +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using group algorithm"]) +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) + +# check all pmds from both non-local numas are assigned an rxq +AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) + +TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=cycles]) + +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using cycles algorithm"]) +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) + +# check all pmds from both non-local numas are assigned an rxq +AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) + +TMP=$(($(cat ovs-vswitchd.log | wc -l | tr -d [[:blank:]])+1)) +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:pmd-rxq-assign=roundrobin]) + +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "Performing pmd to rx queue assignment using roundrobin algorithm"]) +OVS_WAIT_UNTIL([tail -n +$TMP ovs-vswitchd.log | grep "There's no available (non-isolated) pmd thread on numa node 0."]) + +# check all pmds from both non-local numas are assigned an rxq +AT_CHECK([test `ovs-appctl dpif-netdev/pmd-rxq-show | awk '/AVAIL$/ { printf("%s\t", $0); next } 1' | parse_pmd_rxq_show_group | wc -l` -eq 4]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([PMD - stats]) OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 ofport_request=7 type=dummy-pmd options:n_rxq=4], [], [], [DUMMY_NUMA]) @@ -1075,15 +1113,15 @@ AT_SETUP([PMD - dpif configuration]) OVS_VSWITCHD_START([], [], [], [--dummy-numa 0,0]) AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd]) +AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar], [0], [dnl +DPIF implementation set to dpif_scalar. +]) + AT_CHECK([ovs-vsctl show], [], [stdout]) AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-get | grep "dpif_scalar"], [], [dnl dpif_scalar (pmds: 0) ]) -AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_scalar], [0], [dnl -DPIF implementation set to dpif_scalar. -]) - OVS_VSWITCHD_STOP AT_CLEANUP @@ -1092,13 +1130,6 @@ OVS_VSWITCHD_START([], [], [], [--dummy-numa 0,0]) AT_CHECK([ovs-vsctl add-port br0 p1 -- set Interface p1 type=dummy-pmd]) AT_CHECK([ovs-vsctl show], [], [stdout]) -AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-prio-get | grep generic], [], [dnl - 1 : generic -]) - -AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-prio-get | grep autovalidator], [], [dnl - 0 : autovalidator -]) AT_CHECK([ovs-appctl dpif-netdev/subtable-lookup-prio-set autovalidator 3], [0], [dnl Lookup priority change affected 0 dpcls ports and 0 subtables. diff --git a/tests/reconnect.at b/tests/reconnect.at index 0f74709f5a..5bca84351c 100644 --- a/tests/reconnect.at +++ b/tests/reconnect.at @@ -39,8 +39,19 @@ run connected # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) +# Timeout should be scheduled to the next probe interval. timeout +run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout +run + +# Short time advance past the original probe interval, but not expired still. +timeout +run # Now disable the receive-attempted feature and timeout again. receive-attempted LLONG_MAX @@ -67,18 +78,37 @@ connected last connected 0 ms ago, connected 0 ms total # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) -timeout - no timeout - -# Now disable the receive-attempted feature and timeout again. -receive-attempted LLONG_MAX +# Timeout should be scheduled to the next probe interval. timeout advance 5000 ms ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout + advance 1 ms + +### t=6001 ### + in ACTIVE for 5001 ms (0 ms backoff) +run + +# Short time advance past the original probe interval, but not expired still. +timeout + advance 1 ms + +### t=6002 ### + in ACTIVE for 5002 ms (0 ms backoff) +run + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX +timeout + advance 0 ms +run should send probe in IDLE for 0 ms (0 ms backoff) @@ -86,7 +116,7 @@ run timeout advance 5000 ms -### t=11000 ### +### t=11002 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect @@ -94,7 +124,7 @@ disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected - disconnected at 11000 ms (0 ms ago) + disconnected at 11002 ms (0 ms ago) ]) ###################################################################### @@ -111,8 +141,19 @@ run connected # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) +# Timeout should be scheduled to the next probe interval. +timeout +run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout +run + +# Short time advance past the original probe interval, but not expired still. timeout +run # Now disable the receive-attempted feature and timeout again. receive-attempted LLONG_MAX @@ -148,18 +189,37 @@ connected last connected 0 ms ago, connected 0 ms total # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) -timeout - no timeout - -# Now disable the receive-attempted feature and timeout again. -receive-attempted LLONG_MAX +# Timeout should be scheduled to the next probe interval. timeout advance 5000 ms ### t=6500 ### in ACTIVE for 5000 ms (0 ms backoff) run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout + advance 1 ms + +### t=6501 ### + in ACTIVE for 5001 ms (0 ms backoff) +run + +# Short time advance past the original probe interval, but not expired still. +timeout + advance 1 ms + +### t=6502 ### + in ACTIVE for 5002 ms (0 ms backoff) +run + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX +timeout + advance 0 ms +run should send probe in IDLE for 0 ms (0 ms backoff) @@ -167,7 +227,7 @@ run timeout advance 5000 ms -### t=11500 ### +### t=11502 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect @@ -175,7 +235,7 @@ disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected - disconnected at 11500 ms (0 ms ago) + disconnected at 11502 ms (0 ms ago) ]) ###################################################################### @@ -1271,14 +1331,14 @@ activity created 1000, last activity 3000, last connected 2000 # Connection times out. -timeout - no timeout -receive-attempted LLONG_MAX timeout advance 5000 ms ### t=8000 ### in ACTIVE for 6000 ms (1000 ms backoff) +receive-attempted LLONG_MAX +timeout + advance 0 ms run should send probe in IDLE for 0 ms (1000 ms backoff) diff --git a/tests/system-common-macros.at b/tests/system-common-macros.at index 19a0b125b9..8b9f5c7525 100644 --- a/tests/system-common-macros.at +++ b/tests/system-common-macros.at @@ -281,6 +281,14 @@ m4_define([OVS_START_L7], # m4_define([OFPROTO_CLEAR_DURATION_IDLE], [[sed -e 's/duration=.*s,/duration=,/g' -e 's/idle_age=[0-9]*,/idle_age=,/g']]) +# OVS_CHECK_TUNNEL_TSO() +# +# Macro to be used in general tunneling tests that could be also +# used by system-tso. In that case, tunneling is not supported and +# the test should be skipped. +m4_define([OVS_CHECK_TUNNEL_TSO], + [m4_ifdef([CHECK_SYSTEM_TSO], [AT_SKIP_IF(:)])]) + # OVS_CHECK_VXLAN() # # Do basic check for vxlan functionality, skip the test if it's not there. diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at index c3ee6990ca..7d2715c4a7 100644 --- a/tests/system-dpdk.at +++ b/tests/system-dpdk.at @@ -237,6 +237,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout]) AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl ]) +AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl +DPIF implementation set to dpif_avx512. +]) + AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl Miniflow extract implementation set to autovalidator. ]) @@ -265,6 +269,10 @@ AT_CHECK([ovs-vsctl show], [], [stdout]) AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl ]) +AT_CHECK([ovs-appctl dpif-netdev/dpif-impl-set dpif_avx512], [0], [dnl +DPIF implementation set to dpif_avx512. +]) + AT_CHECK([ovs-appctl dpif-netdev/miniflow-parser-set autovalidator], [0], [dnl Miniflow extract implementation set to autovalidator. ]) diff --git a/tests/system-route.at b/tests/system-route.at index 1714273e35..270956d13f 100644 --- a/tests/system-route.at +++ b/tests/system-route.at @@ -14,10 +14,9 @@ dnl Add ip address. AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout]) dnl Check that OVS catches route updates. -OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [0], [dnl -Cached: 10.0.0.17/24 dev p1-route SRC 10.0.0.17 -Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local -]) +OVS_WAIT_UNTIL_EQUAL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [dnl +Cached: 10.0.0.0/24 dev p1-route SRC 10.0.0.17 +Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local]) dnl Delete ip address. AT_CHECK([ip addr del 10.0.0.17/24 dev p1-route], [0], [stdout]) diff --git a/tests/system-traffic.at b/tests/system-traffic.at index f22d86e466..36e10aa4a8 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -218,6 +218,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over vxlan tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_VXLAN() OVS_TRAFFIC_VSWITCHD_START() @@ -258,7 +259,55 @@ NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PI OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([datapath - ping vlan over vxlan tunnel]) +OVS_CHECK_TUNNEL_TSO() +OVS_CHECK_VXLAN() + +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-underlay]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay "actions=normal"]) + +ADD_NAMESPACES(at_ns0) + +dnl Set up underlay link from host into the namespace using veth pair. +ADD_VETH(p0, at_ns0, br-underlay, "172.31.2.1/24") +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) +AT_CHECK([ip link set dev br-underlay up]) + +dnl Set up tunnel endpoints on OVS outside the namespace and with a native +dnl linux device inside the namespace. +ADD_OVS_TUNNEL([vxlan], [br0], [at_vxlan0], [172.31.1.1], [10.1.1.100/24]) +ADD_NATIVE_TUNNEL([vxlan], [at_vxlan1], [at_ns0], [172.31.1.100], [10.2.1.1/24], + [id 0 dstport 4789]) + +AT_CHECK([ovs-vsctl set port br0 tag=100]) +AT_CHECK([ovs-vsctl set port br-underlay tag=42]) + +ADD_VLAN(at_vxlan1, at_ns0, 100, "10.1.1.1/24") +ADD_VLAN(p0, at_ns0, 42, "172.31.1.1/24") + +dnl First, check the underlay +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +dnl Okay, now check the overlay with different packet sizes +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([datapath - ping over vxlan6 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_VXLAN_UDP6ZEROCSUM() OVS_TRAFFIC_VSWITCHD_START() @@ -302,6 +351,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over gre tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() @@ -343,6 +393,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6gre L2 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -383,6 +434,7 @@ AT_CLEANUP AT_SETUP([datapath - ping over erspan v1 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -419,6 +471,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over erspan v2 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -455,6 +508,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6erspan v1 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -494,6 +548,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6erspan v2 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_KERNEL_EXCL(3, 10, 4, 15) OVS_CHECK_GRE() OVS_CHECK_ERSPAN() @@ -534,6 +589,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over geneve tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_GENEVE() OVS_TRAFFIC_VSWITCHD_START() @@ -575,6 +631,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over geneve tunnel, delete flow regression]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_GENEVE() OVS_TRAFFIC_VSWITCHD_START() @@ -629,6 +686,7 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/|ERR|/d AT_CLEANUP AT_SETUP([datapath - flow resume with geneve tun_metadata]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_GENEVE() OVS_TRAFFIC_VSWITCHD_START() @@ -680,6 +738,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over geneve6 tunnel]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_GENEVE_UDP6ZEROCSUM() OVS_TRAFFIC_VSWITCHD_START() @@ -723,6 +782,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over gre tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -769,6 +829,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over erspan v1 tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -817,6 +878,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over erspan v2 tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -870,6 +932,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6erspan v1 tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -925,6 +988,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([datapath - ping over ip6erspan v2 tunnel by simulated packets]) +OVS_CHECK_TUNNEL_TSO() OVS_CHECK_MIN_KERNEL(3, 10) OVS_TRAFFIC_VSWITCHD_START() @@ -4100,15 +4164,15 @@ action=normal AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) -AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88debdnl +AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70605b02c4020010001000000000000000000000020200100010000000000000000000000101100000134e88debdnl "16161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161616161"dnldnlactions=ct(table=1)"]) -AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88debdnl +AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f70602682c402001000100000000000000000000002020010001000000000000000000000010110005a834e88debdnlactions=ct(table=1)"]) -AT_CHECK([ovs-ofctl packet-out br0 "packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033ddnl +AT_CHECK([ovs-ofctl packet-out br0 "in_port=42,packet=52540003287c525400444ab586dd6006f706033d1140200100010000000000000000000000202001000100000000000000000000001013891389033ddnla, actions=ct(table=1)"]) AT_CHECK([ovs-appctl dpctl/dump-flows | head -2 | tail -1 | grep -q -e ["]udp[(]src=5001["]]) @@ -6454,7 +6518,7 @@ on_exit 'ovs-appctl revalidator/purge' on_exit 'ovs-appctl dpif/dump-flows br0' dnl Should work with the virtual IP address through NAT -for i in 1 2 3 4 5 6 7 8 9 10 11 12; do +for i in $(seq 1 50); do echo Request $i NS_CHECK_EXEC([at_ns1], [wget 10.1.1.64 -t 5 -T 1 --retry-connrefused -v -o wget$i.log]) done @@ -6743,6 +6807,132 @@ AT_CHECK([ovs-ofctl dump-flows br0 | grep table=2, | OFPROTO_CLEAR_DURATION_IDLE OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - can match and clear ct_state from outside OVS]) +CHECK_CONNTRACK_LOCAL_STACK() +OVS_CHECK_TUNNEL_TSO() +OVS_CHECK_GENEVE() + +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-underlay], [set bridge br-underlay other-config:hwaddr=\"f0:00:00:01:01:02\"]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) +AT_CHECK([ovs-ofctl add-flow br-underlay "priority=100,ct_state=+trk,actions=ct_clear,resubmit(,0)"]) +AT_CHECK([ovs-ofctl add-flow br-underlay "priority=10,actions=normal"]) + +ADD_NAMESPACES(at_ns0) + +dnl Set up underlay link from host into the namespace using veth pair. +ADD_VETH(p0, at_ns0, br-underlay, "172.31.1.1/24", "f0:00:00:01:01:01") +AT_CHECK([ip addr add dev br-underlay "172.31.1.100/24"]) +AT_CHECK([ip link set dev br-underlay up]) + +dnl Set up tunnel endpoints on OVS outside the namespace and with a native +dnl linux device inside the namespace. +ADD_OVS_TUNNEL([geneve], [br0], [at_gnv0], [172.31.1.1], [10.1.1.100/24]) +ADD_NATIVE_TUNNEL([geneve], [ns_gnv0], [at_ns0], [172.31.1.100], [10.1.1.1/24], + [vni 0]) + +dnl First, check the underlay +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 172.31.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl Okay, now check the overlay +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.100 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +dnl Confirm that the ct_state and ct_clear action found its way to the dp +AT_CHECK([ovs-appctl dpctl/dump-flows --names | grep ct_clear | sort | dnl + grep 'eth(src=f0:00:00:01:01:02,dst=f0:00:00:01:01:01)' | dnl + strip_stats | strip_used | dnl + sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], + [0], [dnl +recirc_id(0),in_port(br-underlay),ct_state(+trk),eth(src=f0:00:00:01:01:02,dst=f0:00:00:01:01:01),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:0.0s, actions:ct_clear,ovs-p0 +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_BANNER([IGMP]) + +AT_SETUP([IGMP - flood under normal action]) + +OVS_TRAFFIC_VSWITCHD_START() +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p1, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01") +ADD_VETH(p2, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02") + +AT_CHECK([ovs-ofctl add-flow br0 "actions=NORMAL"]) + +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 01 00 5e 01 01 03 dnl +f0 00 00 01 01 01 08 00 46 c0 00 28 00 00 40 00 01 02 d3 49 45 65 eb 4a e0 dnl +00 00 16 94 04 00 00 22 00 f9 02 00 00 00 01 04 00 00 00 e0 00 00 fb 00 00 dnl +00 00 00 00 > /dev/null]) + +AT_CHECK([ovs-appctl dpctl/dump-flows --names | grep -e .*ipv4 | sort | dnl + strip_stats | strip_used | strip_recirc | dnl + sed 's/,packet_type(ns=[[0-9]]*,id=[[0-9]]*),/,/'], + [0], [dnl +recirc_id(),in_port(ovs-p1),eth(src=f0:00:00:01:01:01,dst=01:00:5e:01:01:03),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:br0,ovs-p2 +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([IGMP - forward with ICMP]) + +OVS_TRAFFIC_VSWITCHD_START() +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p1, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01") +ADD_VETH(p2, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02") + +AT_DATA([flows.txt], [dnl +table=0, arp actions=NORMAL +table=0, ip,in_port=1 actions=ct(table=1,zone=64000) +table=0, in_port=2 actions=output:1 +table=1, ip,ct_state=+trk+inv actions=drop +table=1 ip,in_port=1,icmp,ct_state=+trk+new actions=output:2 +table=1, in_port=1,ip,ct_state=+trk+new actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) +table=1, in_port=1,ip,ct_state=+trk+est actions=output:2 +]) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +dnl Send the IGMP, followed by a unicast ICMP - ensure we won't black hole + +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 f0 00 00 01 01 02 dnl +f0 00 00 01 01 01 08 00 46 c0 00 28 00 00 40 00 01 02 d3 49 45 65 eb 4a e0 dnl +00 00 16 94 04 00 00 22 00 f9 02 00 00 00 01 04 00 00 00 e0 00 00 fb 00 00 dnl +00 00 00 00 > /dev/null]) + +NS_CHECK_EXEC([at_ns0], [$PYTHON3 $srcdir/sendpkt.py p1 f0 00 00 01 01 02 dnl +f0 00 00 01 01 01 08 00 45 00 00 1c 00 01 00 00 40 01 64 dc 0a 01 01 01 0a dnl +01 01 02 08 00 f7 ff ff ff ff ff > /dev/null]) + +sleep 1 + +dnl Prefer the OpenFlow rules, because different datapaths will behave slightly +dnl differently with respect to the exact dp rules. +dnl +dnl This is also why we clear n_bytes / n_packets - some kernels with ipv6 +dnl enabled will bump some of these counters non-deterministically + +AT_CHECK([ovs-ofctl dump-flows br0 | grep -v NXST | dnl + strip_duration | grep -v arp | grep -v n_packets=0 | dnl + grep -v 'in_port=2 actions=output:1' | dnl + sed 's/n_bytes=[[0-9]]*/n_bytes=0/ + s/idle_age=[[0-9]]*/idle_age=0/ + s/n_packets=[[1-9]]/n_packets=0/' | sort], [0], [dnl + cookie=0x0, table=0, n_packets=0, n_bytes=0, idle_age=0, ip,in_port=1 actions=ct(table=1,zone=64000) + cookie=0x0, table=1, n_packets=0, n_bytes=0, idle_age=0, ct_state=+new+trk,icmp,in_port=1 actions=output:2 + cookie=0x0, table=1, n_packets=0, n_bytes=0, idle_age=0, ct_state=+new+trk,ip,in_port=1 actions=controller(userdata=00.de.ad.be.ef.ca.fe.01) +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([802.1ad]) AT_SETUP([802.1ad - vlan_limit]) diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at index 406334f3e0..1a80047619 100644 --- a/tests/system-tso-macros.at +++ b/tests/system-tso-macros.at @@ -29,3 +29,5 @@ m4_define([CONFIGURE_VETH_OFFLOADS], [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])] [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])] ) + +m4_define([CHECK_SYSTEM_TSO], []) diff --git a/tests/test-cmap.c b/tests/test-cmap.c index 0705475606..588a5dea63 100644 --- a/tests/test-cmap.c +++ b/tests/test-cmap.c @@ -74,6 +74,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, cmap_values[i++] = e->value; } assert(i == n); + assert(e == NULL); /* Here we test iteration with cmap_next_position() */ i = 0; @@ -107,6 +108,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, count += e->value == values[i]; } assert(count == 1); + assert(e == NULL); } /* Check that all the values are there in batched lookup. */ @@ -130,6 +132,7 @@ check_cmap(struct cmap *cmap, const int values[], size_t n, CMAP_NODE_FOR_EACH (e, node, nodes[k]) { count += e->value == values[i + k]; } + assert(e == NULL); } assert(count == j); /* j elements in a batch. */ } @@ -584,7 +587,7 @@ benchmark_hmap(void) { struct helement *elements; struct hmap hmap; - struct helement *e, *next; + struct helement *e; struct timeval start; pthread_t *threads; struct hmap_aux aux; @@ -622,7 +625,7 @@ benchmark_hmap(void) /* Destruction. */ xgettimeofday(&start); - HMAP_FOR_EACH_SAFE (e, next, node, &hmap) { + HMAP_FOR_EACH_SAFE (e, node, &hmap) { hmap_remove(&hmap, &e->node); } hmap_destroy(&hmap); diff --git a/tests/test-hash.c b/tests/test-hash.c index 5d3f8ea43f..aec5f580bb 100644 --- a/tests/test-hash.c +++ b/tests/test-hash.c @@ -55,6 +55,9 @@ set_bit128(ovs_u128 *values, int bit, int n_bits) static uint64_t get_range128(ovs_u128 *value, int ofs, uint64_t mask) { + if (ofs == 0) { + return value->u64.lo & mask; + } return ((ofs < 64 ? (value->u64.lo >> ofs) : 0) & mask) | ((ofs <= 64 ? (value->u64.hi << (64 - ofs)) : (value->u64.hi >> (ofs - 64)) & mask)); } diff --git a/tests/test-hindex.c b/tests/test-hindex.c index af06be5fcc..cc2b1b8bd9 100644 --- a/tests/test-hindex.c +++ b/tests/test-hindex.c @@ -265,6 +265,43 @@ test_hindex_for_each_safe(hash_func *hash) i = 0; n_remaining = n; HINDEX_FOR_EACH_SAFE (e, next, node, &hindex) { + if (hindex_next(&hindex, &e->node) == NULL) { + assert(next == NULL); + } else { + assert(&next->node == hindex_next(&hindex, &e->node)); + } + assert(i < n); + if (pattern & (1ul << e->value)) { + size_t j; + hindex_remove(&hindex, &e->node); + for (j = 0; ; j++) { + assert(j < n_remaining); + if (values[j] == e->value) { + values[j] = values[--n_remaining]; + break; + } + } + } + check_hindex(&hindex, values, n_remaining, hash); + i++; + } + assert(i == n); + assert(next == NULL); + + for (i = 0; i < n; i++) { + if (pattern & (1ul << i)) { + n_remaining++; + } + } + assert(n == n_remaining); + hindex_destroy(&hindex); + + /* Test short version (without the next variable). */ + make_hindex(&hindex, elements, values, n, hash); + + i = 0; + n_remaining = n; + HINDEX_FOR_EACH_SAFE (e, node, &hindex) { assert(i < n); if (pattern & (1ul << e->value)) { size_t j; diff --git a/tests/test-hmap.c b/tests/test-hmap.c index 9259b0b3fc..e50c7c3807 100644 --- a/tests/test-hmap.c +++ b/tests/test-hmap.c @@ -62,6 +62,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n, hmap_values[i++] = e->value; } assert(i == n); + assert(e == NULL); memcpy(sort_values, values, sizeof *sort_values * n); qsort(sort_values, n, sizeof *sort_values, compare_ints); @@ -82,6 +83,7 @@ check_hmap(struct hmap *hmap, const int values[], size_t n, count += e->value == values[i]; } assert(count == 1); + assert(e == NULL); } /* Check counters. */ @@ -243,6 +245,44 @@ test_hmap_for_each_safe(hash_func *hash) i = 0; n_remaining = n; HMAP_FOR_EACH_SAFE (e, next, node, &hmap) { + if (hmap_next(&hmap, &e->node) == NULL) { + assert(next == NULL); + } else { + assert(&next->node == hmap_next(&hmap, &e->node)); + } + assert(i < n); + if (pattern & (1ul << e->value)) { + size_t j; + hmap_remove(&hmap, &e->node); + for (j = 0; ; j++) { + assert(j < n_remaining); + if (values[j] == e->value) { + values[j] = values[--n_remaining]; + break; + } + } + } + check_hmap(&hmap, values, n_remaining, hash); + i++; + } + assert(i == n); + assert(next == NULL); + assert(e == NULL); + + for (i = 0; i < n; i++) { + if (pattern & (1ul << i)) { + n_remaining++; + } + } + assert(n == n_remaining); + hmap_destroy(&hmap); + + /* Test short version (without next variable). */ + make_hmap(&hmap, elements, values, n, hash); + + i = 0; + n_remaining = n; + HMAP_FOR_EACH_SAFE (e, node, &hmap) { assert(i < n); if (pattern & (1ul << e->value)) { size_t j; @@ -259,6 +299,7 @@ test_hmap_for_each_safe(hash_func *hash) i++; } assert(i == n); + assert(e == NULL); for (i = 0; i < n; i++) { if (pattern & (1ul << i)) { @@ -308,6 +349,7 @@ test_hmap_for_each_pop(hash_func *hash) i++; } assert(i == n); + assert(e == NULL); hmap_destroy(&hmap); } diff --git a/tests/test-list.c b/tests/test-list.c index 6f1fb059bc..2c6c444488 100644 --- a/tests/test-list.c +++ b/tests/test-list.c @@ -61,7 +61,7 @@ check_list(struct ovs_list *list, const int values[], size_t n) assert(e->value == values[i]); i++; } - assert(&e->node == list); + assert(e == NULL); assert(i == n); i = 0; @@ -70,7 +70,7 @@ check_list(struct ovs_list *list, const int values[], size_t n) assert(e->value == values[n - i - 1]); i++; } - assert(&e->node == list); + assert(e == NULL); assert(i == n); assert(ovs_list_is_empty(list) == !n); @@ -135,6 +135,13 @@ test_list_for_each_safe(void) values_idx = 0; n_remaining = n; LIST_FOR_EACH_SAFE (e, next, node, &list) { + /* "next" is valid as long as it's not pointing to &list. */ + if (&e->node == list.prev) { + assert(next == NULL); + } else { + assert(&next->node == e->node.next); + } + assert(i < n); if (pattern & (1ul << i)) { ovs_list_remove(&e->node); @@ -148,7 +155,8 @@ test_list_for_each_safe(void) i++; } assert(i == n); - assert(&e->node == &list); + assert(e == NULL); + assert(next == NULL); for (i = 0; i < n; i++) { if (pattern & (1ul << i)) { @@ -156,6 +164,35 @@ test_list_for_each_safe(void) } } assert(n == n_remaining); + + /* Test short version (without next variable). */ + make_list(&list, elements, values, n); + + i = 0; + values_idx = 0; + n_remaining = n; + LIST_FOR_EACH_SAFE (e, node, &list) { + assert(i < n); + if (pattern & (1ul << i)) { + ovs_list_remove(&e->node); + n_remaining--; + memmove(&values[values_idx], &values[values_idx + 1], + sizeof *values * (n_remaining - values_idx)); + } else { + values_idx++; + } + + check_list(&list, values, n_remaining); + i++; + } + assert(i == n); + assert(e == NULL); + + for (i = 0; i < n; i++) { + if (pattern & (1ul << i)) { + n_remaining++; + } + } } } } diff --git a/tests/test-rcu.c b/tests/test-rcu.c index 965f3c49f3..bb17092bf0 100644 --- a/tests/test-rcu.c +++ b/tests/test-rcu.c @@ -35,7 +35,7 @@ quiescer_main(void *aux OVS_UNUSED) } static void -test_rcu_quiesce(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) +test_rcu_quiesce(void) { pthread_t quiescer; @@ -48,4 +48,29 @@ test_rcu_quiesce(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) xpthread_join(quiescer, NULL); } -OVSTEST_REGISTER("test-rcu-quiesce", test_rcu_quiesce); +static void +add_count(void *_count) +{ + unsigned *count = (unsigned *)_count; + (*count) ++; +} + +static void +test_rcu_barrier(void) +{ + unsigned count = 0; + for (int i = 0; i < 10; i ++) { + ovsrcu_postpone(add_count, &count); + } + + ovsrcu_barrier(); + ovs_assert(count == 10); +} + +static void +test_rcu(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { + test_rcu_quiesce(); + test_rcu_barrier(); +} + +OVSTEST_REGISTER("test-rcu", test_rcu); diff --git a/tests/test-util.c b/tests/test-util.c index f0fd042108..7d899fbbfd 100644 --- a/tests/test-util.c +++ b/tests/test-util.c @@ -43,17 +43,16 @@ check_log_2_floor(uint32_t x, int n) static void test_log_2_floor(struct ovs_cmdl_context *ctx OVS_UNUSED) { - int n; - - for (n = 0; n < 32; n++) { + for (uint32_t n = 0; n < 32; n++) { /* Check minimum x such that f(x) == n. */ - check_log_2_floor(1 << n, n); + check_log_2_floor(UINT32_C(1) << n, n); /* Check maximum x such that f(x) == n. */ - check_log_2_floor((1 << n) | ((1 << n) - 1), n); + check_log_2_floor((UINT32_C(1) << n) | ((UINT32_C(1) << n) - 1), n); /* Check a random value in the middle. */ - check_log_2_floor((random_uint32() & ((1 << n) - 1)) | (1 << n), n); + check_log_2_floor((random_uint32() & ((UINT32_C(1) << n) - 1)) + | (UINT32_C(1) << n), n); } /* log_2_floor(0) is undefined, so don't check it. */ @@ -86,7 +85,7 @@ test_ctz(struct ovs_cmdl_context *ctx OVS_UNUSED) for (n = 0; n < 32; n++) { /* Check minimum x such that f(x) == n. */ - check_ctz32(1 << n, n); + check_ctz32(UINT32_C(1) << n, n); /* Check maximum x such that f(x) == n. */ check_ctz32(UINT32_MAX << n, n); diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index 57589758f4..c63344196b 100644 --- a/tests/tunnel-push-pop.at +++ b/tests/tunnel-push-pop.at @@ -546,6 +546,28 @@ AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port [[37]]' | sort], [0], [dnl port 7: rx pkts=5, bytes=434, drop=?, errs=?, frame=?, over=?, crc=? ]) +dnl Send out packets received from L3GRE tunnel back to L3GRE tunnel +AT_CHECK([ovs-ofctl del-flows int-br]) +AT_CHECK([ovs-ofctl add-flow int-br "in_port=7,actions=set_field:3->in_port,7"]) +AT_CHECK([ovs-vsctl -- set Interface br0 options:pcap=br0.pcap]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) +AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) + +ovs-appctl time/warp 1000 + +AT_CHECK([ovs-pcap p0.pcap > p0.pcap.txt 2>&1]) +AT_CHECK([tail -6 p0.pcap.txt], [0], [dnl +aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +aa55aa550000001b213cab6408004500007079464000402fba630101025c0101025820000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +001b213cab64aa55aa55000008004500007000004000402f33aa010102580101025c20000800000001c845000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +]) + + dnl Check decapsulation of Geneve packet with options AT_CAPTURE_FILE([ofctl_monitor.log]) AT_CHECK([ovs-ofctl monitor int-br 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) @@ -565,8 +587,8 @@ icmp,vlan_tci=0x0000,dl_src=be:b6:f4:e1:49:4a,dl_dst=fe:71:d8:83:72:4f,nw_src=30 AT_CHECK([ovs-ofctl dump-ports int-br | grep 'port 5'], [0], [dnl port 5: rx pkts=1, bytes=98, drop=?, errs=?, frame=?, over=?, crc=? ]) -AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl -tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=2,rule_cookie=0,controller_id=0,max_len=65535)) +AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)' | sed -e 's/recirc_id=[[0-9]]*/recirc_id=/g'], [0], [dnl +tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=,rule_cookie=0,controller_id=0,max_len=65535)) ]) dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated @@ -842,3 +864,54 @@ Datapath actions: 7 OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([tunnel_push_pop - VXLAN access port]) + +dnl Create bridge that has a MAC address. +OVS_VSWITCHD_START([set bridge br0 datapath_type=dummy dnl + -- set Interface br0 other-config:hwaddr=aa:55:aa:55:00:00]) +AT_CHECK([ovs-vsctl add-port br0 p8 dnl + -- set Interface p8 type=dummy ofport_request=8]) + +dnl Create another bridge. +AT_CHECK([ovs-vsctl add-br ovs-tun0 -- set bridge ovs-tun0 datapath_type=dummy]) + +dnl Add VXLAN port to this bridge. +AT_CHECK([ovs-vsctl add-port ovs-tun0 tun0 dnl + -- set int tun0 type=vxlan options:remote_ip=10.0.0.11 dnl + -- add-port ovs-tun0 p7 dnl + -- set interface p7 type=dummy ofport_request=7]) + +dnl Set VLAN tags, so that br0 and its port p8 have the same tag, +dnl but ovs-tun0's port p7 has a different tag. +AT_CHECK([ovs-vsctl set port p8 tag=42 dnl + -- set port br0 tag=42 dnl + -- set port p7 tag=200]) + +dnl Set IP address and route for br0. +AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 10.0.0.2/24], [0], [OK +]) +AT_CHECK([ovs-appctl ovs/route/add 10.0.0.11/24 br0], [0], [OK +]) + +dnl Send an ARP reply to port b8 on br0, so that packets will be forwarded +dnl to learned port. +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p8 'in_port(8),dnl + eth(src=aa:55:aa:66:00:00,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl + arp(sip=10.0.0.11,tip=10.0.0.2,op=2,sha=aa:55:aa:66:00:00,tha=00:00:00:00:00:00)']) + +AT_CHECK([ovs-appctl ofproto/trace ovs-tun0 in_port=p7], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], [dnl +Megaflow: recirc_id=0,eth,in_port=7,dl_src=00:00:00:00:00:00,dnl +dl_dst=00:00:00:00:00:00,dl_type=0x0000 +Datapath actions: push_vlan(vid=200,pcp=0),1,clone(tnl_push(tnl_port(4789),dnl +header(size=50,type=4,eth(dst=aa:55:aa:66:00:00,src=aa:55:aa:55:00:00,dnl +dl_type=0x0800),ipv4(src=10.0.0.2,dst=10.0.0.11,proto=17,tos=0,ttl=64,dnl +frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x0)),dnl +out_port(100)),8) +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/tunnel.at b/tests/tunnel.at index b8ae7caa9b..fd482aa872 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -126,7 +126,7 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"]) AT_CHECK([ovs-appctl dpctl/dump-flows | tail -1], [0], [dnl -tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 +tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 ]) OVS_VSWITCHD_STOP diff --git a/utilities/gdb/ovs_gdb.py b/utilities/gdb/ovs_gdb.py index 62928d50fc..763ece2a78 100644 --- a/utilities/gdb/ovs_gdb.py +++ b/utilities/gdb/ovs_gdb.py @@ -1391,7 +1391,8 @@ class CmdDumpPackets(gdb.Command): print("Error, unsupported argument type: {}".format(str(val.type))) return - tcpdump(pkt_list, args=tcpdump_args) + stdout = tcpdump(pkt_list, args=tcpdump_args, getfd=True, quiet=True) + gdb.write(stdout.read().decode("utf8", "replace")) def extract_pkt(self, pkt): pkt_fields = pkt.type.keys() diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index ede7f1e61a..6771973ae9 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -730,12 +730,12 @@ static void bundle_print_errors(struct ovs_list *errors, struct ovs_list *requests, const char *vconn_name) { - struct ofpbuf *error, *next; + struct ofpbuf *error; struct ofpbuf *bmsg; INIT_CONTAINER(bmsg, requests, list_node); - LIST_FOR_EACH_SAFE (error, next, list_node, errors) { + LIST_FOR_EACH_SAFE (error, list_node, errors) { const struct ofp_header *error_oh = error->data; ovs_be32 error_xid = error_oh->xid; enum ofperr ofperr; diff --git a/utilities/ovs-save b/utilities/ovs-save index fb2025b765..a190902f4d 100755 --- a/utilities/ovs-save +++ b/utilities/ovs-save @@ -102,7 +102,7 @@ save_interfaces () { get_highest_ofp_version() { ovs-vsctl get bridge "$1" protocols | \ sed 's/[][]//g' | sed 's/\ //g' | \ - awk -F ',' '{ print (NF>1)? $(NF) : "OpenFlow14" }' + awk -F ',' '{ print (NF>0)? $(NF) : "OpenFlow14" }' } save_flows () { diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in index 82d1bedfa6..7fd26e4055 100755 --- a/utilities/ovs-tcpdump.in +++ b/utilities/ovs-tcpdump.in @@ -165,6 +165,9 @@ class OVSDB(object): self._idl_conn = idl.Idl(db_sock, schema) OVSDB.wait_for_db_change(self._idl_conn) # Initial Sync with DB + def close_idl(self): + self._idl_conn.close() + def _get_schema(self): error, strm = Stream.open_block(Stream.open(self._db_sock)) if error: @@ -403,7 +406,8 @@ def py_which(executable): def main(): - db_sock = 'unix:@RUNDIR@/db.sock' + rundir = os.environ.get('OVS_RUNDIR', '@RUNDIR@') + db_sock = 'unix:%s' % os.path.join(rundir, "db.sock") interface = None tcpdargs = [] @@ -500,6 +504,8 @@ def main(): pass sys.exit(1) + ovsdb.close_idl() + pipes = _doexec(*([dump_cmd, '-i', mirror_interface] + tcpdargs)) try: while pipes.poll() is None: @@ -512,6 +518,7 @@ def main(): if pipes.poll() is None: pipes.terminate() + ovsdb = OVSDB(db_sock) ovsdb.destroy_mirror(interface, ovsdb.port_bridge(interface)) ovsdb.destroy_port(mirror_interface, ovsdb.port_bridge(interface)) if tap_created is True: diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c index 37cc72d401..1032089fc2 100644 --- a/utilities/ovs-vsctl.c +++ b/utilities/ovs-vsctl.c @@ -1100,14 +1100,14 @@ cmd_emer_reset(struct ctl_context *ctx) const struct ovsrec_bridge *br; const struct ovsrec_port *port; const struct ovsrec_interface *iface; - const struct ovsrec_mirror *mirror, *next_mirror; - const struct ovsrec_controller *ctrl, *next_ctrl; - const struct ovsrec_manager *mgr, *next_mgr; - const struct ovsrec_netflow *nf, *next_nf; - const struct ovsrec_ssl *ssl, *next_ssl; - const struct ovsrec_sflow *sflow, *next_sflow; - const struct ovsrec_ipfix *ipfix, *next_ipfix; - const struct ovsrec_flow_sample_collector_set *fscset, *next_fscset; + const struct ovsrec_mirror *mirror; + const struct ovsrec_controller *ctrl; + const struct ovsrec_manager *mgr; + const struct ovsrec_netflow *nf; + const struct ovsrec_ssl *ssl; + const struct ovsrec_sflow *sflow; + const struct ovsrec_ipfix *ipfix; + const struct ovsrec_flow_sample_collector_set *fscset; /* Reset the Open_vSwitch table. */ ovsrec_open_vswitch_set_manager_options(vsctl_ctx->ovs, NULL, 0); @@ -1145,35 +1145,35 @@ cmd_emer_reset(struct ctl_context *ctx) ovsrec_interface_set_ingress_policing_burst(iface, 0); } - OVSREC_MIRROR_FOR_EACH_SAFE (mirror, next_mirror, idl) { + OVSREC_MIRROR_FOR_EACH_SAFE (mirror, idl) { ovsrec_mirror_delete(mirror); } - OVSREC_CONTROLLER_FOR_EACH_SAFE (ctrl, next_ctrl, idl) { + OVSREC_CONTROLLER_FOR_EACH_SAFE (ctrl, idl) { ovsrec_controller_delete(ctrl); } - OVSREC_MANAGER_FOR_EACH_SAFE (mgr, next_mgr, idl) { + OVSREC_MANAGER_FOR_EACH_SAFE (mgr, idl) { ovsrec_manager_delete(mgr); } - OVSREC_NETFLOW_FOR_EACH_SAFE (nf, next_nf, idl) { + OVSREC_NETFLOW_FOR_EACH_SAFE (nf, idl) { ovsrec_netflow_delete(nf); } - OVSREC_SSL_FOR_EACH_SAFE (ssl, next_ssl, idl) { + OVSREC_SSL_FOR_EACH_SAFE (ssl, idl) { ovsrec_ssl_delete(ssl); } - OVSREC_SFLOW_FOR_EACH_SAFE (sflow, next_sflow, idl) { + OVSREC_SFLOW_FOR_EACH_SAFE (sflow, idl) { ovsrec_sflow_delete(sflow); } - OVSREC_IPFIX_FOR_EACH_SAFE (ipfix, next_ipfix, idl) { + OVSREC_IPFIX_FOR_EACH_SAFE (ipfix, idl) { ovsrec_ipfix_delete(ipfix); } - OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, next_fscset, idl) { + OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, idl) { ovsrec_flow_sample_collector_set_delete(fscset); } @@ -1510,13 +1510,13 @@ cmd_add_br(struct ctl_context *ctx) static void del_port(struct vsctl_context *vsctl_ctx, struct vsctl_port *port) { - struct vsctl_iface *iface, *next_iface; + struct vsctl_iface *iface; bridge_delete_port((port->bridge->parent ? port->bridge->parent->br_cfg : port->bridge->br_cfg), port->port_cfg); - LIST_FOR_EACH_SAFE (iface, next_iface, ifaces_node, &port->ifaces) { + LIST_FOR_EACH_SAFE (iface, ifaces_node, &port->ifaces) { del_cached_iface(vsctl_ctx, iface); } del_cached_port(vsctl_ctx, port); @@ -1525,19 +1525,19 @@ del_port(struct vsctl_context *vsctl_ctx, struct vsctl_port *port) static void del_bridge(struct vsctl_context *vsctl_ctx, struct vsctl_bridge *br) { - struct vsctl_bridge *child, *next_child; - struct vsctl_port *port, *next_port; - const struct ovsrec_flow_sample_collector_set *fscset, *next_fscset; + struct vsctl_bridge *child; + struct vsctl_port *port; + const struct ovsrec_flow_sample_collector_set *fscset; - HMAP_FOR_EACH_SAFE (child, next_child, children_node, &br->children) { + HMAP_FOR_EACH_SAFE (child, children_node, &br->children) { del_bridge(vsctl_ctx, child); } - LIST_FOR_EACH_SAFE (port, next_port, ports_node, &br->ports) { + LIST_FOR_EACH_SAFE (port, ports_node, &br->ports) { del_port(vsctl_ctx, port); } - OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, next_fscset, + OVSREC_FLOW_SAMPLE_COLLECTOR_SET_FOR_EACH_SAFE (fscset, vsctl_ctx->base.idl) { if (fscset->bridge == br->br_cfg) { ovsrec_flow_sample_collector_set_delete(fscset); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 5223aa8970..e328d8ead1 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -543,13 +543,13 @@ bridge_exit(bool delete_datapath) if_notifier_destroy(ifnotifier); seq_destroy(ifaces_changed); - struct datapath *dp, *next; - HMAP_FOR_EACH_SAFE (dp, next, node, &all_datapaths) { + struct datapath *dp; + HMAP_FOR_EACH_SAFE (dp, node, &all_datapaths) { datapath_destroy(dp); } - struct bridge *br, *next_br; - HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) { + struct bridge *br; + HMAP_FOR_EACH_SAFE (br, node, &all_bridges) { bridge_destroy(br, delete_datapath); } @@ -716,8 +716,8 @@ static void datapath_destroy(struct datapath *dp) { if (dp) { - struct ct_zone *ct_zone, *next; - HMAP_FOR_EACH_SAFE (ct_zone, next, node, &dp->ct_zones) { + struct ct_zone *ct_zone; + HMAP_FOR_EACH_SAFE (ct_zone, node, &dp->ct_zones) { ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone_id); ct_zone_remove_and_destroy(dp, ct_zone); } @@ -733,7 +733,7 @@ datapath_destroy(struct datapath *dp) static void ct_zones_reconfigure(struct datapath *dp, struct ovsrec_datapath *dp_cfg) { - struct ct_zone *ct_zone, *next; + struct ct_zone *ct_zone; /* Add new 'ct_zone's or update existing 'ct_zone's based on the database * state. */ @@ -760,7 +760,7 @@ ct_zones_reconfigure(struct datapath *dp, struct ovsrec_datapath *dp_cfg) } /* Purge 'ct_zone's no longer found in the database. */ - HMAP_FOR_EACH_SAFE (ct_zone, next, node, &dp->ct_zones) { + HMAP_FOR_EACH_SAFE (ct_zone, node, &dp->ct_zones) { if (ct_zone->last_used != idl_seqno) { ofproto_ct_del_zone_timeout_policy(dp->type, ct_zone->zone_id); ct_zone_remove_and_destroy(dp, ct_zone); @@ -788,7 +788,7 @@ dp_capability_reconfigure(struct datapath *dp, static void datapath_reconfigure(const struct ovsrec_open_vswitch *cfg) { - struct datapath *dp, *next; + struct datapath *dp; /* Add new 'datapath's or update existing ones. */ for (size_t i = 0; i < cfg->n_datapaths; i++) { @@ -805,7 +805,7 @@ datapath_reconfigure(const struct ovsrec_open_vswitch *cfg) } /* Purge deleted 'datapath's. */ - HMAP_FOR_EACH_SAFE (dp, next, node, &all_datapaths) { + HMAP_FOR_EACH_SAFE (dp, node, &all_datapaths) { if (dp->last_used != idl_seqno) { datapath_destroy(dp); } @@ -816,7 +816,7 @@ static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) { struct sockaddr_in *managers; - struct bridge *br, *next; + struct bridge *br; int sflow_bridge_number; size_t n_managers; @@ -875,7 +875,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) * - Create ofprotos that are missing. * * - Add ports that are missing. */ - HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) { + HMAP_FOR_EACH_SAFE (br, node, &all_bridges) { if (!br->ofproto) { int error; @@ -1020,7 +1020,7 @@ bridge_delete_or_reconfigure_ports(struct bridge *br) struct ofproto_port_dump dump; struct sset ofproto_ports; - struct port *port, *port_next; + struct port *port; /* List of "ofp_port"s to delete. We make a list instead of deleting them * right away because ofproto implementations aren't necessarily able to @@ -1132,10 +1132,10 @@ bridge_delete_or_reconfigure_ports(struct bridge *br) * device destroyed via "tunctl -d", a physical Ethernet device * whose module was just unloaded via "rmmod", or a virtual NIC for a * VM whose VM was just terminated. */ - HMAP_FOR_EACH_SAFE (port, port_next, hmap_node, &br->ports) { - struct iface *iface, *iface_next; + HMAP_FOR_EACH_SAFE (port, hmap_node, &br->ports) { + struct iface *iface; - LIST_FOR_EACH_SAFE (iface, iface_next, port_elem, &port->ifaces) { + LIST_FOR_EACH_SAFE (iface, port_elem, &port->ifaces) { if (!sset_contains(&ofproto_ports, iface->name)) { iface_destroy__(iface); } @@ -1967,7 +1967,7 @@ port_is_bond_fake_iface(const struct port *port) static void add_del_bridges(const struct ovsrec_open_vswitch *cfg) { - struct bridge *br, *next; + struct bridge *br; struct shash_node *node; struct shash new_br; size_t i; @@ -1993,7 +1993,7 @@ add_del_bridges(const struct ovsrec_open_vswitch *cfg) /* Get rid of deleted bridges or those whose types have changed. * Update 'cfg' of bridges that still exist. */ - HMAP_FOR_EACH_SAFE (br, next, node, &all_bridges) { + HMAP_FOR_EACH_SAFE (br, node, &all_bridges) { br->cfg = shash_find_data(&new_br, br->name); if (!br->cfg || strcmp(br->type, ofproto_normalize_type( br->cfg->datapath_type))) { @@ -3266,13 +3266,13 @@ bridge_run(void) if (ovsdb_idl_is_lock_contended(idl)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - struct bridge *br, *next_br; + struct bridge *br; VLOG_ERR_RL(&rl, "another ovs-vswitchd process is running, " "disabling this process (pid %ld) until it goes away", (long int) getpid()); - HMAP_FOR_EACH_SAFE (br, next_br, node, &all_bridges) { + HMAP_FOR_EACH_SAFE (br, node, &all_bridges) { bridge_destroy(br, false); } /* Since we will not be running system_stats_run() in this process @@ -3594,13 +3594,13 @@ static void bridge_destroy(struct bridge *br, bool del) { if (br) { - struct mirror *mirror, *next_mirror; - struct port *port, *next_port; + struct mirror *mirror; + struct port *port; - HMAP_FOR_EACH_SAFE (port, next_port, hmap_node, &br->ports) { + HMAP_FOR_EACH_SAFE (port, hmap_node, &br->ports) { port_destroy(port); } - HMAP_FOR_EACH_SAFE (mirror, next_mirror, hmap_node, &br->mirrors) { + HMAP_FOR_EACH_SAFE (mirror, hmap_node, &br->mirrors) { mirror_destroy(mirror); } @@ -3746,11 +3746,11 @@ static void bridge_del_ports(struct bridge *br, const struct shash *wanted_ports) { struct shash_node *port_node; - struct port *port, *next; + struct port *port; /* Get rid of deleted ports. * Get rid of deleted interfaces on ports that still exist. */ - HMAP_FOR_EACH_SAFE (port, next, hmap_node, &br->ports) { + HMAP_FOR_EACH_SAFE (port, hmap_node, &br->ports) { port->cfg = shash_find_data(wanted_ports, port->name); if (!port->cfg) { port_destroy(port); @@ -4211,7 +4211,7 @@ bridge_configure_aa(struct bridge *br) const struct ovsdb_datum *mc; struct ovsrec_autoattach *auto_attach = br->cfg->auto_attach; struct aa_settings aa_s; - struct aa_mapping *m, *next; + struct aa_mapping *m; size_t i; if (!auto_attach) { @@ -4227,7 +4227,7 @@ bridge_configure_aa(struct bridge *br) mc = ovsrec_autoattach_get_mappings(auto_attach, OVSDB_TYPE_INTEGER, OVSDB_TYPE_INTEGER); - HMAP_FOR_EACH_SAFE (m, next, hmap_node, &br->mappings) { + HMAP_FOR_EACH_SAFE (m, hmap_node, &br->mappings) { union ovsdb_atom atom; atom.integer = m->isid; @@ -4341,12 +4341,12 @@ static void bridge_aa_refresh_queued(struct bridge *br) { struct ovs_list *list = xmalloc(sizeof *list); - struct bridge_aa_vlan *node, *next; + struct bridge_aa_vlan *node; ovs_list_init(list); ofproto_aa_vlan_get_queued(br->ofproto, list); - LIST_FOR_EACH_SAFE (node, next, list_node, list) { + LIST_FOR_EACH_SAFE (node, list_node, list) { struct port *port; VLOG_INFO("ifname=%s, vlan=%u, oper=%u", node->port_name, node->vlan, @@ -4387,7 +4387,7 @@ port_create(struct bridge *br, const struct ovsrec_port *cfg) static void port_del_ifaces(struct port *port) { - struct iface *iface, *next; + struct iface *iface; struct sset new_ifaces; size_t i; @@ -4398,7 +4398,7 @@ port_del_ifaces(struct port *port) } /* Get rid of deleted interfaces. */ - LIST_FOR_EACH_SAFE (iface, next, port_elem, &port->ifaces) { + LIST_FOR_EACH_SAFE (iface, port_elem, &port->ifaces) { if (!sset_contains(&new_ifaces, iface->name)) { iface_destroy(iface); } @@ -4412,13 +4412,13 @@ port_destroy(struct port *port) { if (port) { struct bridge *br = port->bridge; - struct iface *iface, *next; + struct iface *iface; if (br->ofproto) { ofproto_bundle_unregister(br->ofproto, port); } - LIST_FOR_EACH_SAFE (iface, next, port_elem, &port->ifaces) { + LIST_FOR_EACH_SAFE (iface, port_elem, &port->ifaces) { iface_destroy__(iface); } @@ -5013,12 +5013,12 @@ bridge_configure_mirrors(struct bridge *br) { const struct ovsdb_datum *mc; unsigned long *flood_vlans; - struct mirror *m, *next; + struct mirror *m; size_t i; /* Get rid of deleted mirrors. */ mc = ovsrec_bridge_get_mirrors(br->cfg, OVSDB_TYPE_UUID); - HMAP_FOR_EACH_SAFE (m, next, hmap_node, &br->mirrors) { + HMAP_FOR_EACH_SAFE (m, hmap_node, &br->mirrors) { union ovsdb_atom atom; atom.uuid = m->uuid; diff --git a/vtep/vtep-ctl.c b/vtep/vtep-ctl.c index ab552457d9..99c4adcd53 100644 --- a/vtep/vtep-ctl.c +++ b/vtep/vtep-ctl.c @@ -801,16 +801,16 @@ vtep_ctl_context_invalidate_cache(struct ctl_context *ctx) SHASH_FOR_EACH (node, &vtepctl_ctx->lswitches) { struct vtep_ctl_lswitch *ls = node->data; - struct shash_node *node2, *next_node2; + struct shash_node *node2; shash_destroy(&ls->ucast_local); shash_destroy(&ls->ucast_remote); - SHASH_FOR_EACH_SAFE (node2, next_node2, &ls->mcast_local) { + SHASH_FOR_EACH_SAFE (node2, &ls->mcast_local) { struct vtep_ctl_mcast_mac *mcast_mac = node2->data; - struct vtep_ctl_ploc *ploc, *next_ploc; + struct vtep_ctl_ploc *ploc; - LIST_FOR_EACH_SAFE (ploc, next_ploc, locators_node, + LIST_FOR_EACH_SAFE (ploc, locators_node, &mcast_mac->locators) { free(ploc); } @@ -818,11 +818,11 @@ vtep_ctl_context_invalidate_cache(struct ctl_context *ctx) } shash_destroy(&ls->mcast_local); - SHASH_FOR_EACH_SAFE (node2, next_node2, &ls->mcast_remote) { + SHASH_FOR_EACH_SAFE (node2, &ls->mcast_remote) { struct vtep_ctl_mcast_mac *mcast_mac = node2->data; - struct vtep_ctl_ploc *ploc, *next_ploc; + struct vtep_ctl_ploc *ploc; - LIST_FOR_EACH_SAFE (ploc, next_ploc, locators_node, + LIST_FOR_EACH_SAFE (ploc, locators_node, &mcast_mac->locators) { free(ploc); } @@ -1229,9 +1229,9 @@ del_port(struct vtep_ctl_context *vtepctl_ctx, struct vtep_ctl_port *port) static void del_pswitch(struct vtep_ctl_context *vtepctl_ctx, struct vtep_ctl_pswitch *ps) { - struct vtep_ctl_port *port, *next_port; + struct vtep_ctl_port *port; - LIST_FOR_EACH_SAFE (port, next_port, ports_node, &ps->ports) { + LIST_FOR_EACH_SAFE (port, ports_node, &ps->ports) { del_port(vtepctl_ctx, port); } diff --git a/xenserver/openvswitch-xen.spec.in b/xenserver/openvswitch-xen.spec.in index 4d21c6364f..ae22f2f5c4 100644 --- a/xenserver/openvswitch-xen.spec.in +++ b/xenserver/openvswitch-xen.spec.in @@ -457,6 +457,7 @@ exit 0 /usr/share/openvswitch/scripts/ovs-lib /usr/share/openvswitch/scripts/ovs-vtep /usr/share/openvswitch/vswitch.ovsschema +/usr/share/openvswitch/local-config.ovsschema /usr/share/openvswitch/vtep.ovsschema /usr/sbin/ovs-bugtool /usr/sbin/ovs-vswitchd @@ -479,6 +480,7 @@ exit 0 /usr/share/man/man1/ovsdb-client.1.gz /usr/share/man/man1/ovsdb-server.1.gz /usr/share/man/man1/ovsdb-tool.1.gz +/usr/share/man/man5/ovsdb.local-config.5.gz /usr/share/man/man5/ovsdb-server.5.gz /usr/share/man/man5/ovs-vswitchd.conf.db.5.gz /usr/share/man/man5/vtep.5.gz