diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh
index 863f023888..1e7565b8bb 100755
--- a/.ci/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -246,8 +246,8 @@ if [ "$ASAN" ]; then
export ASAN_OPTIONS='detect_leaks=1'
# -O2 generates few false-positive memory leak reports in test-ovsdb
# application, so lowering optimizations to -O1 here.
- CLFAGS_ASAN="-O1 -fno-omit-frame-pointer -fno-common -fsanitize=address"
- CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} ${CLFAGS_ASAN}"
+ CFLAGS_ASAN="-O1 -fno-omit-frame-pointer -fno-common -fsanitize=address"
+ CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} ${CFLAGS_ASAN}"
fi
save_OPTS="${OPTS} $*"
diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh
index c55125cf78..c0b7473eda 100755
--- a/.ci/linux-prepare.sh
+++ b/.ci/linux-prepare.sh
@@ -20,9 +20,13 @@ cd sparse
make -j4 HAVE_LLVM= HAVE_SQLITE= install
cd ..
+# Installing wheel separately because it may be needed to build some
+# of the packages during dependency backtracking and pip >= 22.0 will
+# abort backtracking on build failures:
+# https://github.com/pypa/pip/issues/10655
+pip3 install --disable-pip-version-check --user wheel
pip3 install --disable-pip-version-check --user \
- flake8 hacking sphinx pyOpenSSL wheel setuptools
-pip3 install --user --upgrade docutils
+ flake8 'hacking>=3.0' sphinx setuptools
pip3 install --user 'meson==0.47.1'
if [ "$M32" ]; then
diff --git a/.cirrus.yml b/.cirrus.yml
index 358f2ba256..a7ae793bc4 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -5,11 +5,11 @@ freebsd_build_task:
image_family: freebsd-12-2-snap
image_family: freebsd-11-4-snap
cpu: 4
- memory: 8G
+ memory: 4G
env:
DEPENDENCIES: automake libtool gmake gcc wget openssl python3
- PY_DEPS: sphinx|openssl
+ PY_DEPS: sphinx
matrix:
COMPILER: gcc
COMPILER: clang
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
index e2350c6d9d..7434ad18ec 100644
--- a/.github/workflows/build-and-test.yml
+++ b/.github/workflows/build-and-test.yml
@@ -127,7 +127,7 @@ jobs:
- name: set up python
uses: actions/setup-python@v2
with:
- python-version: '3.x'
+ python-version: '3.9'
- name: create ci signature file for the dpdk cache key
if: matrix.dpdk != '' || matrix.dpdk_shared != ''
@@ -215,7 +215,7 @@ jobs:
- name: set up python
uses: actions/setup-python@v2
with:
- python-version: '3.x'
+ python-version: '3.9'
- name: install dependencies
run: brew install automake libtool
- name: prepare
diff --git a/.travis.yml b/.travis.yml
index 51d0511080..c7aeede06e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -17,7 +17,6 @@ addons:
- libjemalloc-dev
- libnuma-dev
- libpcap-dev
- - python3-openssl
- python3-pip
- python3-sphinx
- libelf-dev
diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index 68c9867b19..64bc577e0b 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -205,8 +205,8 @@ Q: What DPDK version does each Open vSwitch release work with?
2.10.x 17.11.10
2.11.x 18.11.9
2.12.x 18.11.9
- 2.13.x 19.11.8
- 2.14.x 19.11.8
+ 2.13.x 19.11.10
+ 2.14.x 19.11.10
2.15.x 20.11.1
2.16.x 20.11.1
============ ========
diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst
index d8fa931fab..1dbead3276 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -219,7 +219,7 @@ To verify hugepage configuration::
Mount the hugepages, if not already mounted by default::
- $ mount -t hugetlbfs none /dev/hugepages``
+ $ mount -t hugetlbfs none /dev/hugepages
.. note::
diff --git a/Documentation/topics/dpdk/pmd.rst b/Documentation/topics/dpdk/pmd.rst
index 95fa7af128..c1a35eb13a 100644
--- a/Documentation/topics/dpdk/pmd.rst
+++ b/Documentation/topics/dpdk/pmd.rst
@@ -31,17 +31,19 @@ input ports for packets, classifying packets once received, and executing
actions on the packets once they are classified.
PMD threads utilize Receive (Rx) and Transmit (Tx) queues, commonly known as
-*rxq*\s and *txq*\s. While Tx queue configuration happens automatically, Rx
-queues can be configured by the user. This can happen in one of two ways:
+*rxq*\s and *txq*\s to receive and send packets from/to an interface.
-- For physical interfaces, configuration is done using the
- :program:`ovs-appctl` utility.
+- For physical interfaces, the number of Tx Queues is automatically configured
+ based on the number of PMD thread cores. The number of Rx queues can be
+ configured with::
-- For virtual interfaces, configuration is done using the :program:`ovs-appctl`
- utility, but this configuration must be reflected in the guest configuration
- (e.g. QEMU command line arguments).
+ $ ovs-vsctl set Interface <interface_name> options:n_rxq=N
-The :program:`ovs-appctl` utility also provides a number of commands for
+- For virtual interfaces, the number of Tx and Rx queues are configured by
+ libvirt/QEMU and enabled/disabled in the guest. Refer to :doc:'vhost-user'
+ for more information.
+
+The :program:`ovs-appctl` utility provides a number of commands for
querying PMD threads and their respective queues. This, and all of the above,
is discussed here.
diff --git a/NEWS b/NEWS
index 559a51ba3f..ecaf9ffd78 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,17 @@
+v2.16.3 - xx xxx xxxx
+---------------------
+ - Python:
+ * For SSL support, the use of the pyOpenSSL library has been replaced
+ with the native 'ssl' module.
+
+v2.16.2 - 17 Dec 2021
+---------------------
+ - Bug fixes
+
+v2.16.1 - 21 Oct 2021
+---------------------
+ - Bug fixes
+
v2.16.0 - 16 Aug 2021
---------------------
- Removed support for 1024-bit Diffie-Hellman key exchange, which is now
diff --git a/acinclude.m4 b/acinclude.m4
index dba365ea1a..1a43d7feaa 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -77,7 +77,7 @@ dnl Checks if compiler and binutils supports AVX512.
AC_DEFUN([OVS_CHECK_AVX512], [
OVS_CHECK_BINUTILS_AVX512
OVS_CHECK_CC_OPTION(
- [-mavx512f], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no])
+ [-mavx512f -mavx512vpopcntdq], [ovs_have_cc_mavx512f=yes], [ovs_have_cc_mavx512f=no])
AM_CONDITIONAL([HAVE_AVX512F], [test $ovs_have_cc_mavx512f = yes])
if test "$ovs_have_cc_mavx512f" = yes; then
AC_DEFINE([HAVE_AVX512F], [1],
diff --git a/configure.ac b/configure.ac
index 16b32be965..92817b62e1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
# limitations under the License.
AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.16.0, bugs@openvswitch.org)
+AC_INIT(openvswitch, 2.16.3, bugs@openvswitch.org)
AC_CONFIG_SRCDIR([datapath/datapath.c])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([build-aux])
diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index e130c2f966..218e7db814 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -1112,9 +1112,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx,
* should split the function and refactor. */
if (!bufferData) {
EthHdr *ethHdr = (EthHdr *)bufferStart;
- /* If the frame is not VLAN make it a no op */
if (ethHdr->Type != ETH_TYPE_802_1PQ_NBO) {
- return NDIS_STATUS_SUCCESS;
+ OVS_LOG_ERROR("Invalid ethHdr type %u, nbl %p", ethHdr->Type, ovsFwdCtx->curNbl);
+ return NDIS_STATUS_INVALID_PACKET;
}
}
RtlMoveMemory(bufferStart + shiftLength, bufferStart, shiftOffset);
@@ -1137,6 +1137,9 @@ OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx,
static __inline NDIS_STATUS
OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
{
+ NDIS_STATUS status;
+ OVS_PACKET_HDR_INFO* layers = &ovsFwdCtx->layers;
+
/*
* Declare a dummy vlanTag structure since we need to compute the size
* of shiftLength. The NDIS one is a unionized structure.
@@ -1145,7 +1148,15 @@ OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
UINT32 shiftLength = sizeof(vlanTag.TagHeader);
UINT32 shiftOffset = sizeof(DL_EUI48) + sizeof(DL_EUI48);
- return OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, NULL);
+ status = OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength,
+ NULL);
+
+ if (status == NDIS_STATUS_SUCCESS) {
+ layers->l3Offset -= (UINT16) shiftLength;
+ layers->l4Offset -= (UINT16) shiftLength;
+ }
+
+ return status;
}
@@ -1516,6 +1527,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
csumInfo.Value = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl,
TcpIpChecksumNetBufferListInfo);
+
/*
* Adjust the IP header inline as dictated by the action, and also update
* the IP and the TCP checksum for the data modified.
@@ -1524,6 +1536,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
* ChecksumUpdate32(). Ignoring this for now, since for the most common
* case, we only update the TTL.
*/
+ /*Only tx direction the checksum value will be reset to be PseudoChecksum*/
if (isSource) {
addrField = &ipHdr->saddr;
@@ -1540,7 +1553,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
((BOOLEAN)csumInfo.Receive.UdpChecksumSucceeded ||
(BOOLEAN)csumInfo.Receive.UdpChecksumFailed);
}
- if (l4Offload) {
+ if (isTx && l4Offload) {
*checkField = IPPseudoChecksum(&newAddr, &ipHdr->daddr,
tcpHdr ? IPPROTO_TCP : IPPROTO_UDP,
ntohs(ipHdr->tot_len) - ipHdr->ihl * 4);
@@ -1561,7 +1574,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
(BOOLEAN)csumInfo.Receive.UdpChecksumFailed);
}
- if (l4Offload) {
+ if (isTx && l4Offload) {
*checkField = IPPseudoChecksum(&ipHdr->saddr, &newAddr,
tcpHdr ? IPPROTO_TCP : IPPROTO_UDP,
ntohs(ipHdr->tot_len) - ipHdr->ihl * 4);
@@ -1570,7 +1583,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
if (*addrField != newAddr) {
UINT32 oldAddr = *addrField;
- if (checkField && *checkField != 0 && !l4Offload) {
+ if ((checkField && *checkField != 0) && (!l4Offload || !isTx)) {
/* Recompute total checksum. */
*checkField = ChecksumUpdate32(*checkField, oldAddr,
newAddr);
@@ -1579,11 +1592,12 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
ipHdr->check = ChecksumUpdate32(ipHdr->check, oldAddr,
newAddr);
}
+
*addrField = newAddr;
}
if (portField && *portField != newPort) {
- if (checkField && !l4Offload) {
+ if ((checkField) && (!l4Offload || !isTx)) {
/* Recompute total checksum. */
*checkField = ChecksumUpdate16(*checkField, *portField,
newPort);
@@ -1698,6 +1712,15 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
ipHdr->ttl = ipAttr->ipv4_ttl;
key->ipKey.nwTtl = ipAttr->ipv4_ttl;
}
+ if (ipHdr->dscp != (ipAttr->ipv4_tos & 0xfc)) {
+ /* ECN + DSCP */
+ UINT8 newTos = (ipHdr->tos & 0x3) | (ipAttr->ipv4_tos & 0xfc);
+ if (ipHdr->check != 0) {
+ ipHdr->check = ChecksumUpdate16(ipHdr->check, ipHdr->tos, newTos);
+ }
+ ipHdr->tos = newTos;
+ key->ipKey.nwTos = newTos;
+ }
return NDIS_STATUS_SUCCESS;
}
@@ -1792,9 +1815,11 @@ OvsExecuteRecirc(OvsForwardingContext *ovsFwdCtx,
}
if (newNbl) {
- deferredAction = OvsAddDeferredActions(newNbl, key, NULL);
+ deferredAction = OvsAddDeferredActions(newNbl, key, &(ovsFwdCtx->layers),
+ NULL);
} else {
- deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key, NULL);
+ deferredAction = OvsAddDeferredActions(ovsFwdCtx->curNbl, key,
+ &(ovsFwdCtx->layers), NULL);
}
if (deferredAction) {
@@ -1964,7 +1989,7 @@ OvsExecuteSampleAction(OvsForwardingContext *ovsFwdCtx,
return STATUS_SUCCESS;
}
- if (!OvsAddDeferredActions(newNbl, key, a)) {
+ if (!OvsAddDeferredActions(newNbl, key, &(ovsFwdCtx->layers), a)) {
OVS_LOG_INFO(
"Deferred actions limit reached, dropping sample action.");
OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
@@ -2100,6 +2125,7 @@ OvsDoExecuteActions(POVS_SWITCH_CONTEXT switchContext,
*/
status = OvsPopVlanInPktBuf(&ovsFwdCtx);
if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_ERROR("OVS-pop vlan action failed status = %lu", status);
dropReason = L"OVS-pop vlan action failed";
goto dropit;
}
@@ -2349,7 +2375,7 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
if (status == STATUS_SUCCESS) {
status = OvsProcessDeferredActions(switchContext, completionList,
- portNo, sendFlags, layers);
+ portNo, sendFlags);
}
return status;
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index 2610d626a0..7f1d2fb412 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -493,15 +493,32 @@ static __inline NDIS_STATUS
OvsDetectCtPacket(OvsForwardingContext *fwdCtx,
OvsFlowKey *key)
{
+ NDIS_STATUS status = NDIS_STATUS_SUCCESS;
+ OvsFlowKey newFlowKey = { 0 };
+
switch (ntohs(key->l2.dlType)) {
case ETH_TYPE_IPV4:
if (key->ipKey.nwFrag != OVS_FRAG_TYPE_NONE) {
- return OvsProcessIpv4Fragment(fwdCtx->switchContext,
+ status = OvsProcessIpv4Fragment(fwdCtx->switchContext,
&fwdCtx->curNbl,
fwdCtx->completionList,
fwdCtx->fwdDetail->SourcePortId,
&fwdCtx->layers,
key->tunKey.tunnelId);
+ if (status == NDIS_STATUS_SUCCESS) {
+ /* After the Ipv4 Fragment is reassembled, update flow key as
+ L3 and L4 headers are not correct */
+ status =
+ OvsExtractFlow(fwdCtx->curNbl, fwdCtx->srcVportNo,
+ &newFlowKey, &fwdCtx->layers,
+ fwdCtx->tunKey.dst != 0 ? &fwdCtx->tunKey : NULL);
+ if (status != NDIS_STATUS_SUCCESS) {
+ OVS_LOG_ERROR("Extract flow failed Nbl %p", fwdCtx->curNbl);
+ return status;
+ }
+ *key = newFlowKey;
+ }
+ return status;
}
if (key->ipKey.nwProto == IPPROTO_TCP
|| key->ipKey.nwProto == IPPROTO_UDP
@@ -609,6 +626,31 @@ OvsReverseIcmpType(UINT8 type)
}
}
+static __inline void
+OvsPickupCtTupleAsLookupKey(POVS_CT_KEY ctKey, UINT16 zone, OvsFlowKey *flowKey)
+{
+ UINT32 ipAddrSrc = 0, ipAddrDst = 0;
+
+ if (!flowKey || !ctKey) return;
+
+ if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
+ ipAddrSrc = flowKey->ct.tuple_ipv4.ipv4_src;
+ ipAddrDst = flowKey->ct.tuple_ipv4.ipv4_dst;
+
+ if ((ipAddrSrc > 0 && ipAddrDst > 0) &&
+ (zone == flowKey->ct.zone)) {
+ /* if the ct tuple_ipv4 in flowKey is not null and ct.zone is same with
+ * zone parameter pickup the tuple_ipv4 value as the lookup key
+ */
+ ctKey->src.addr.ipv4 = flowKey->ct.tuple_ipv4.ipv4_src;
+ ctKey->dst.addr.ipv4 = flowKey->ct.tuple_ipv4.ipv4_dst;
+ ctKey->nw_proto = flowKey->ct.tuple_ipv4.ipv4_proto;
+ ctKey->src.port = flowKey->ct.tuple_ipv4.src_port;
+ ctKey->dst.port = flowKey->ct.tuple_ipv4.dst_port;
+ }
+ }
+}
+
static __inline NDIS_STATUS
OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
UINT16 zone,
@@ -629,6 +671,7 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
ctx->key.src.port = flowKey->ipKey.l4.tpSrc;
ctx->key.dst.port = flowKey->ipKey.l4.tpDst;
+
if (flowKey->ipKey.nwProto == IPPROTO_ICMP) {
ICMPHdr icmpStorage;
const ICMPHdr *icmp;
@@ -683,6 +726,10 @@ OvsCtSetupLookupCtx(OvsFlowKey *flowKey,
/* Translate address first for reverse NAT */
ctx->key = natEntry->ctEntry->key;
OvsCtKeyReverse(&ctx->key);
+ } else {
+ if (flowKey->l2.dlType == htons(ETH_TYPE_IPV4)) {
+ OvsPickupCtTupleAsLookupKey(&(ctx->key), zone, flowKey);
+ }
}
ctx->hash = OvsCtHashKey(&ctx->key);
diff --git a/datapath-windows/ovsext/Recirc.c b/datapath-windows/ovsext/Recirc.c
index 2febf060dd..7a688c8742 100644
--- a/datapath-windows/ovsext/Recirc.c
+++ b/datapath-windows/ovsext/Recirc.c
@@ -277,16 +277,23 @@ OvsDeferredActionsQueuePush(POVS_DEFERRED_ACTION_QUEUE queue)
POVS_DEFERRED_ACTION
OvsAddDeferredActions(PNET_BUFFER_LIST nbl,
OvsFlowKey *key,
+ POVS_PACKET_HDR_INFO layers,
const PNL_ATTR actions)
{
POVS_DEFERRED_ACTION_QUEUE queue = OvsDeferredActionsQueueGet();
POVS_DEFERRED_ACTION deferredAction = NULL;
+ OVS_PACKET_HDR_INFO layersInit = { 0 };
deferredAction = OvsDeferredActionsQueuePush(queue);
if (deferredAction) {
deferredAction->nbl = nbl;
deferredAction->actions = actions;
deferredAction->key = *key;
+ if (layers) {
+ deferredAction->layers = *layers;
+ } else {
+ deferredAction->layers = layersInit;
+ }
}
return deferredAction;
@@ -303,15 +310,17 @@ NDIS_STATUS
OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
OvsCompletionList *completionList,
UINT32 portNo,
- ULONG sendFlags,
- OVS_PACKET_HDR_INFO *layers)
+ ULONG sendFlags)
{
NDIS_STATUS status = NDIS_STATUS_SUCCESS;
POVS_DEFERRED_ACTION_QUEUE queue = OvsDeferredActionsQueueGet();
POVS_DEFERRED_ACTION deferredAction = NULL;
+ POVS_PACKET_HDR_INFO layersDeferred = NULL;
/* Process all deferred actions. */
while ((deferredAction = OvsDeferredActionsQueuePop(queue)) != NULL) {
+ layersDeferred = &(deferredAction->layers);
+
if (deferredAction->actions) {
status = OvsDoExecuteActions(switchContext,
completionList,
@@ -319,7 +328,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
portNo,
sendFlags,
&deferredAction->key, NULL,
- layers, deferredAction->actions,
+ layersDeferred, deferredAction->actions,
NlAttrGetSize(deferredAction->actions));
} else {
status = OvsDoRecirc(switchContext,
@@ -327,7 +336,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
deferredAction->nbl,
&deferredAction->key,
portNo,
- layers);
+ layersDeferred);
}
}
diff --git a/datapath-windows/ovsext/Recirc.h b/datapath-windows/ovsext/Recirc.h
index 2b314ce274..b2d02a65c2 100644
--- a/datapath-windows/ovsext/Recirc.h
+++ b/datapath-windows/ovsext/Recirc.h
@@ -18,6 +18,7 @@
#define __RECIRC_H_ 1
#include "Actions.h"
+#include "NetProto.h"
#define DEFERRED_ACTION_QUEUE_SIZE 10
#define DEFERRED_ACTION_EXEC_LEVEL 4
@@ -26,6 +27,7 @@ typedef struct _OVS_DEFERRED_ACTION {
PNET_BUFFER_LIST nbl;
PNL_ATTR actions;
OvsFlowKey key;
+ OVS_PACKET_HDR_INFO layers;
} OVS_DEFERRED_ACTION, *POVS_DEFERRED_ACTION;
/*
@@ -39,8 +41,7 @@ NDIS_STATUS
OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
OvsCompletionList *completionList,
UINT32 portNo,
- ULONG sendFlags,
- OVS_PACKET_HDR_INFO *layers);
+ ULONG sendFlags);
/*
* --------------------------------------------------------------------------
@@ -52,6 +53,7 @@ OvsProcessDeferredActions(POVS_SWITCH_CONTEXT switchContext,
POVS_DEFERRED_ACTION
OvsAddDeferredActions(PNET_BUFFER_LIST packet,
OvsFlowKey *key,
+ POVS_PACKET_HDR_INFO layers,
const PNL_ATTR actions);
/*
diff --git a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
index 4cce92f66c..bc18c56b81 100644
--- a/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
+++ b/datapath/linux/compat/include/net/netfilter/nf_conntrack_core.h
@@ -108,7 +108,14 @@ static inline bool rpl_nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
static inline unsigned int
rpl_nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
- return nf_conntrack_in(state->net, state->pf, state->hook, skb);
+ int err;
+
+ /* Repeat if requested, see nf_iterate(). */
+ do {
+ err = nf_conntrack_in(state->net, state->pf, state->hook, skb);
+ } while (err == NF_REPEAT);
+
+ return err;
}
#define nf_conntrack_in rpl_nf_conntrack_in
#endif /* HAVE_NF_CONNTRACK_IN_TAKES_NF_HOOK_STATE */
diff --git a/debian/changelog b/debian/changelog
index 239d210b96..9fa140c6ff 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,21 @@
+openvswitch (2.16.3-1) unstable; urgency=low
+ [ Open vSwitch team ]
+ * New upstream version
+
+ -- Open vSwitch team <dev@openvswitch.org> Fri, 17 Dec 2021 22:14:03 +0100
+
+openvswitch (2.16.2-1) unstable; urgency=low
+ [ Open vSwitch team ]
+ * New upstream version
+
+ -- Open vSwitch team <dev@openvswitch.org> Fri, 17 Dec 2021 22:14:03 +0100
+
+openvswitch (2.16.1-1) unstable; urgency=low
+ [ Open vSwitch team ]
+ * New upstream version
+
+ -- Open vSwitch team <dev@openvswitch.org> Thu, 21 Oct 2021 23:58:12 +0200
+
openvswitch (2.16.0-1) unstable; urgency=low
* New upstream version
diff --git a/include/openvswitch/json.h b/include/openvswitch/json.h
index 73b562e03d..0831a9cee1 100644
--- a/include/openvswitch/json.h
+++ b/include/openvswitch/json.h
@@ -50,7 +50,9 @@ enum json_type {
JSON_INTEGER, /* 123. */
JSON_REAL, /* 123.456. */
JSON_STRING, /* "..." */
- JSON_N_TYPES
+ JSON_N_TYPES,
+ JSON_SERIALIZED_OBJECT, /* Internal type to hold serialized version of
+ * data of other types. */
};
const char *json_type_to_string(enum json_type);
@@ -70,7 +72,7 @@ struct json {
struct json_array array;
long long int integer;
double real;
- char *string;
+ char *string; /* JSON_STRING or JSON_SERIALIZED_OBJECT. */
};
};
@@ -78,6 +80,7 @@ struct json *json_null_create(void);
struct json *json_boolean_create(bool);
struct json *json_string_create(const char *);
struct json *json_string_create_nocopy(char *);
+struct json *json_serialized_object_create(const struct json *);
struct json *json_integer_create(long long int);
struct json *json_real_create(double);
@@ -99,6 +102,7 @@ void json_object_put_format(struct json *,
OVS_PRINTF_FORMAT(3, 4);
const char *json_string(const struct json *);
+const char *json_serialized_object(const struct json *);
struct json_array *json_array(const struct json *);
struct shash *json_object(const struct json *);
bool json_boolean(const struct json *);
@@ -125,6 +129,7 @@ struct json *json_parser_finish(struct json_parser *);
void json_parser_abort(struct json_parser *);
struct json *json_from_string(const char *string);
+struct json *json_from_serialized_object(const struct json *);
struct json *json_from_file(const char *file_name);
struct json *json_from_stream(FILE *stream);
diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h
index 95e52e3587..045dce8f5f 100644
--- a/include/openvswitch/meta-flow.h
+++ b/include/openvswitch/meta-flow.h
@@ -2305,6 +2305,7 @@ void mf_set_flow_value_masked(const struct mf_field *,
const union mf_value *mask,
struct flow *);
bool mf_is_tun_metadata(const struct mf_field *);
+bool mf_is_frozen_metadata(const struct mf_field *);
bool mf_is_pipeline_field(const struct mf_field *);
bool mf_is_set(const struct mf_field *, const struct flow *);
void mf_mask_field(const struct mf_field *, struct flow_wildcards *);
diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 89a36fe17b..a8b0705d9f 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -202,18 +202,18 @@ conn prevent_unencrypted_vxlan
"""
auth_tmpl = {"psk": Template("""\
- left=0.0.0.0
+ left=%any
right=$remote_ip
authby=psk"""),
"pki_remote": Template("""\
- left=0.0.0.0
+ left=%any
right=$remote_ip
leftid=$local_name
rightid=$remote_name
leftcert=$certificate
rightcert=$remote_cert"""),
"pki_ca": Template("""\
- left=0.0.0.0
+ left=%any
right=$remote_ip
leftid=$local_name
rightid=$remote_name
@@ -299,11 +299,11 @@ conn prevent_unencrypted_vxlan
def config_tunnel(self, tunnel):
if tunnel.conf["psk"]:
- self.secrets_file.write('0.0.0.0 %s : PSK "%s"\n' %
+ self.secrets_file.write('%%any %s : PSK "%s"\n' %
(tunnel.conf["remote_ip"], tunnel.conf["psk"]))
auth_section = self.auth_tmpl["psk"].substitute(tunnel.conf)
else:
- self.secrets_file.write("0.0.0.0 %s : RSA %s\n" %
+ self.secrets_file.write("%%any %s : RSA %s\n" %
(tunnel.conf["remote_ip"],
tunnel.conf["private_key"]))
if tunnel.conf["remote_cert"]:
diff --git a/lib/bfd.c b/lib/bfd.c
index 3c965699ac..9698576d07 100644
--- a/lib/bfd.c
+++ b/lib/bfd.c
@@ -131,16 +131,17 @@ enum diag {
* | Required Min Echo RX Interval |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */
struct msg {
- uint8_t vers_diag; /* Version and diagnostic. */
- uint8_t flags; /* 2bit State field followed by flags. */
- uint8_t mult; /* Fault detection multiplier. */
- uint8_t length; /* Length of this BFD message. */
- ovs_be32 my_disc; /* My discriminator. */
- ovs_be32 your_disc; /* Your discriminator. */
- ovs_be32 min_tx; /* Desired minimum tx interval. */
- ovs_be32 min_rx; /* Required minimum rx interval. */
- ovs_be32 min_rx_echo; /* Required minimum echo rx interval. */
+ uint8_t vers_diag; /* Version and diagnostic. */
+ uint8_t flags; /* 2bit State field followed by flags. */
+ uint8_t mult; /* Fault detection multiplier. */
+ uint8_t length; /* Length of this BFD message. */
+ ovs_16aligned_be32 my_disc; /* My discriminator. */
+ ovs_16aligned_be32 your_disc; /* Your discriminator. */
+ ovs_16aligned_be32 min_tx; /* Desired minimum tx interval. */
+ ovs_16aligned_be32 min_rx; /* Required minimum rx interval. */
+ ovs_16aligned_be32 min_rx_echo; /* Required minimum echo rx interval. */
};
+
BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg));
#define DIAG_MASK 0x1f
@@ -634,9 +635,9 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p,
msg->mult = bfd->mult;
msg->length = BFD_PACKET_LEN;
- msg->my_disc = htonl(bfd->disc);
- msg->your_disc = htonl(bfd->rmt_disc);
- msg->min_rx_echo = htonl(0);
+ put_16aligned_be32(&msg->my_disc, htonl(bfd->disc));
+ put_16aligned_be32(&msg->your_disc, htonl(bfd->rmt_disc));
+ put_16aligned_be32(&msg->min_rx_echo, htonl(0));
if (bfd_in_poll(bfd)) {
min_tx = bfd->poll_min_tx;
@@ -646,8 +647,8 @@ bfd_put_packet(struct bfd *bfd, struct dp_packet *p,
min_rx = bfd->min_rx;
}
- msg->min_tx = htonl(min_tx * 1000);
- msg->min_rx = htonl(min_rx * 1000);
+ put_16aligned_be32(&msg->min_tx, htonl(min_tx * 1000));
+ put_16aligned_be32(&msg->min_rx, htonl(min_rx * 1000));
bfd->flags &= ~FLAG_FINAL;
*oam = bfd->oam;
@@ -781,12 +782,12 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
goto out;
}
- if (!msg->my_disc) {
+ if (!get_16aligned_be32(&msg->my_disc)) {
log_msg(VLL_WARN, msg, "NULL my_disc", bfd);
goto out;
}
- pkt_your_disc = ntohl(msg->your_disc);
+ pkt_your_disc = ntohl(get_16aligned_be32(&msg->your_disc));
if (pkt_your_disc) {
/* Technically, we should use the your discriminator field to figure
* out which 'struct bfd' this packet is destined towards. That way a
@@ -806,7 +807,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
bfd_status_changed(bfd);
}
- bfd->rmt_disc = ntohl(msg->my_disc);
+ bfd->rmt_disc = ntohl(get_16aligned_be32(&msg->my_disc));
bfd->rmt_state = rmt_state;
bfd->rmt_flags = flags;
bfd->rmt_diag = msg->vers_diag & DIAG_MASK;
@@ -834,7 +835,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
bfd->rmt_mult = msg->mult;
}
- rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1);
+ rmt_min_rx = MAX(ntohl(get_16aligned_be32(&msg->min_rx)) / 1000, 1);
if (bfd->rmt_min_rx != rmt_min_rx) {
bfd->rmt_min_rx = rmt_min_rx;
if (bfd->next_tx) {
@@ -843,7 +844,7 @@ bfd_process_packet(struct bfd *bfd, const struct flow *flow,
log_msg(VLL_INFO, msg, "New remote min_rx", bfd);
}
- bfd->rmt_min_tx = MAX(ntohl(msg->min_tx) / 1000, 1);
+ bfd->rmt_min_tx = MAX(ntohl(get_16aligned_be32(&msg->min_tx)) / 1000, 1);
bfd->detect_time = bfd_rx_interval(bfd) * bfd->rmt_mult + time_msec();
if (bfd->state == STATE_ADMIN_DOWN) {
@@ -1105,10 +1106,14 @@ log_msg(enum vlog_level level, const struct msg *p, const char *message,
bfd_diag_str(p->vers_diag & DIAG_MASK),
bfd_state_str(p->flags & STATE_MASK),
p->mult, p->length, bfd_flag_str(p->flags & FLAGS_MASK),
- ntohl(p->my_disc), ntohl(p->your_disc),
- ntohl(p->min_tx), ntohl(p->min_tx) / 1000,
- ntohl(p->min_rx), ntohl(p->min_rx) / 1000,
- ntohl(p->min_rx_echo), ntohl(p->min_rx_echo) / 1000);
+ ntohl(get_16aligned_be32(&p->my_disc)),
+ ntohl(get_16aligned_be32(&p->your_disc)),
+ ntohl(get_16aligned_be32(&p->min_tx)),
+ ntohl(get_16aligned_be32(&p->min_tx)) / 1000,
+ ntohl(get_16aligned_be32(&p->min_rx)),
+ ntohl(get_16aligned_be32(&p->min_rx)) / 1000,
+ ntohl(get_16aligned_be32(&p->min_rx_echo)),
+ ntohl(get_16aligned_be32(&p->min_rx_echo)) / 1000);
bfd_put_details(&ds, bfd);
VLOG(level, "%s", ds_cstr(&ds));
ds_destroy(&ds);
diff --git a/lib/db-ctl-base.c b/lib/db-ctl-base.c
index 77cc76a9f6..7074561588 100644
--- a/lib/db-ctl-base.c
+++ b/lib/db-ctl-base.c
@@ -247,15 +247,15 @@ record_id_equals(const union ovsdb_atom *name, enum ovsdb_atomic_type type,
const char *record_id)
{
if (type == OVSDB_TYPE_STRING) {
- if (!strcmp(name->string, record_id)) {
+ if (!strcmp(name->s->string, record_id)) {
return true;
}
struct uuid uuid;
size_t len = strlen(record_id);
if (len >= 4
- && uuid_from_string(&uuid, name->string)
- && !strncmp(name->string, record_id, len)) {
+ && uuid_from_string(&uuid, name->s->string)
+ && !strncmp(name->s->string, record_id, len)) {
return true;
}
@@ -314,15 +314,19 @@ get_row_by_id(struct ctl_context *ctx,
row, id->name_column, key, value);
/* Extract the name from the column. */
- const union ovsdb_atom *name;
+ const union ovsdb_atom *name = NULL;
if (!id->key) {
name = datum->n == 1 ? &datum->keys[0] : NULL;
} else {
- const union ovsdb_atom key_atom
- = { .string = CONST_CAST(char *, id->key) };
- unsigned int i = ovsdb_datum_find_key(datum, &key_atom,
- OVSDB_TYPE_STRING);
- name = i == UINT_MAX ? NULL : &datum->values[i];
+ union ovsdb_atom key_atom = {
+ .s = ovsdb_atom_string_create(CONST_CAST(char *, id->key)) };
+ unsigned int i;
+
+ if (ovsdb_datum_find_key(datum, &key_atom,
+ OVSDB_TYPE_STRING, &i)) {
+ name = &datum->values[i];
+ }
+ ovsdb_atom_destroy(&key_atom, OVSDB_TYPE_STRING);
}
if (!name) {
continue;
@@ -819,14 +823,14 @@ check_condition(const struct ovsdb_idl_table_class *table,
goto out;
}
- idx = ovsdb_datum_find_key(have_datum,
- &want_key, column->type.key.type);
- if (idx == UINT_MAX && !is_set_operator(operator)) {
+ bool found = ovsdb_datum_find_key(have_datum, &want_key,
+ column->type.key.type, &idx);
+ if (!found && !is_set_operator(operator)) {
retval = false;
} else {
struct ovsdb_datum a;
- if (idx != UINT_MAX) {
+ if (found) {
a.n = 1;
a.keys = &have_datum->values[idx];
a.values = NULL;
@@ -992,9 +996,8 @@ cmd_get(struct ctl_context *ctx)
return;
}
- idx = ovsdb_datum_find_key(datum, &key,
- column->type.key.type);
- if (idx == UINT_MAX) {
+ if (!ovsdb_datum_find_key(datum, &key,
+ column->type.key.type, &idx)) {
if (must_exist) {
ctl_error(
ctx, "no key \"%s\" in %s record \"%s\" column %s",
@@ -1375,7 +1378,7 @@ set_column(const struct ovsdb_idl_table_class *table,
ovsdb_atom_destroy(&value, column->type.value.type);
ovsdb_datum_union(&datum, ovsdb_idl_read(row, column),
- &column->type, false);
+ &column->type);
ovsdb_idl_txn_verify(row, column);
ovsdb_idl_txn_write(row, column, &datum);
} else {
@@ -1514,7 +1517,7 @@ cmd_add(struct ctl_context *ctx)
ovsdb_datum_destroy(&old, &column->type);
return;
}
- ovsdb_datum_union(&old, &add, type, false);
+ ovsdb_datum_union(&old, &add, type);
ovsdb_datum_destroy(&add, type);
}
if (old.n > type->n_max) {
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 72f6d09ac7..35c72542a2 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -294,7 +294,7 @@ dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom)
void
dp_packet_prealloc_tailroom(struct dp_packet *b, size_t size)
{
- if (size > dp_packet_tailroom(b)) {
+ if ((size && !dp_packet_base(b)) || (size > dp_packet_tailroom(b))) {
dp_packet_resize(b, dp_packet_headroom(b), MAX(size, 64));
}
}
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 08d93c2779..3dc582fbfd 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -199,6 +199,7 @@ struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t,
void dp_packet_resize(struct dp_packet *b, size_t new_headroom,
size_t new_tailroom);
static inline void dp_packet_delete(struct dp_packet *);
+static inline void dp_packet_swap(struct dp_packet *, struct dp_packet *);
static inline void *dp_packet_at(const struct dp_packet *, size_t offset,
size_t size);
@@ -256,6 +257,18 @@ dp_packet_delete(struct dp_packet *b)
}
}
+/* Swaps content of two packets. */
+static inline void
+dp_packet_swap(struct dp_packet *a, struct dp_packet *b)
+{
+ ovs_assert(a->source == DPBUF_MALLOC || a->source == DPBUF_STUB);
+ ovs_assert(b->source == DPBUF_MALLOC || b->source == DPBUF_STUB);
+ struct dp_packet c = *a;
+
+ *a = *b;
+ *b = c;
+}
+
/* If 'b' contains at least 'offset + size' bytes of data, returns a pointer to
* byte 'offset'. Otherwise, returns a null pointer. */
static inline void *
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index b7d577870d..fe24f9abdf 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -83,7 +83,7 @@ bool
dpdk_get_cpu_has_isa(const char *arch OVS_UNUSED,
const char *feature OVS_UNUSED)
{
- VLOG_ERR_ONCE("DPDK not supported in this version of Open vSwitch, "
+ VLOG_DBG_ONCE("DPDK not supported in this version of Open vSwitch, "
"cannot use CPU flag based optimizations");
return false;
}
diff --git a/lib/dpif-netdev-extract-avx512.c b/lib/dpif-netdev-extract-avx512.c
index ec64419e38..77f28810c7 100644
--- a/lib/dpif-netdev-extract-avx512.c
+++ b/lib/dpif-netdev-extract-avx512.c
@@ -157,10 +157,19 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
0, 0, 0, 0, /* Src IP */ \
0, 0, 0, 0, /* Dst IP */
-#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xFE, 0xFF, 0xFF)
+#define PATTERN_IPV4_MASK PATTERN_IPV4_GEN(0xFF, 0xBF, 0xFF, 0xFF)
#define PATTERN_IPV4_UDP PATTERN_IPV4_GEN(0x45, 0, 0, 0x11)
#define PATTERN_IPV4_TCP PATTERN_IPV4_GEN(0x45, 0, 0, 0x06)
+#define PATTERN_TCP_GEN(data_offset) \
+ 0, 0, 0, 0, /* sport, dport */ \
+ 0, 0, 0, 0, /* sequence number */ \
+ 0, 0, 0, 0, /* ack number */ \
+ data_offset, /* data offset: used to verify = 5, options not supported */
+
+#define PATTERN_TCP_MASK PATTERN_TCP_GEN(0xF0)
+#define PATTERN_TCP PATTERN_TCP_GEN(0x50)
+
#define NU 0
#define PATTERN_IPV4_UDP_SHUFFLE \
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NU, NU, /* Ether */ \
@@ -217,6 +226,25 @@ _mm512_maskz_permutexvar_epi8_wrap(__mmask64 kmask, __m512i idx, __m512i a)
#define PATTERN_DT1Q_IPV4_TCP_KMASK \
(KMASK_ETHER | (KMASK_DT1Q << 16) | (KMASK_IPV4 << 24) | (KMASK_TCP << 40))
+/* Miniflow Strip post-processing masks.
+ * This allows unsetting specific bits from the resulting miniflow. It is used
+ * for e.g. IPv4 where the "DF" bit is never pushed to the miniflow itself.
+ * The NC define is for "No Change", allowing the bits to pass through.
+ */
+#define NC 0xFF
+
+#define PATTERN_STRIP_IPV4_MASK \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, 0xBF, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
+
+#define PATTERN_STRIP_DOT1Q_IPV4_MASK \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, 0xBF, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, \
+ NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC, NC
+
/* This union allows initializing static data as u8, but easily loading it
* into AVX512 registers too. The union ensures proper alignment for the zmm.
*/
@@ -241,8 +269,9 @@ struct mfex_profile {
union mfex_data probe_mask;
union mfex_data probe_data;
- /* Required for reshaping packet into miniflow. */
+ /* Required for reshaping packet into miniflow and post-processing it. */
union mfex_data store_shuf;
+ union mfex_data strip_mask;
__mmask64 store_kmsk;
/* Constant data to set in mf.bits and dp_packet data on hit. */
@@ -310,6 +339,7 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
.probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_UDP},
.store_shuf.u8_data = { PATTERN_IPV4_UDP_SHUFFLE },
+ .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
.store_kmsk = PATTERN_IPV4_UDP_KMASK,
.mf_bits = { 0x18a0000000000000, 0x0000000000040401},
@@ -320,10 +350,19 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
},
[PROFILE_ETH_IPV4_TCP] = {
- .probe_mask.u8_data = { PATTERN_ETHERTYPE_MASK PATTERN_IPV4_MASK },
- .probe_data.u8_data = { PATTERN_ETHERTYPE_IPV4 PATTERN_IPV4_TCP},
+ .probe_mask.u8_data = {
+ PATTERN_ETHERTYPE_MASK
+ PATTERN_IPV4_MASK
+ PATTERN_TCP_MASK
+ },
+ .probe_data.u8_data = {
+ PATTERN_ETHERTYPE_IPV4
+ PATTERN_IPV4_TCP
+ PATTERN_TCP
+ },
.store_shuf.u8_data = { PATTERN_IPV4_TCP_SHUFFLE },
+ .strip_mask.u8_data = { PATTERN_STRIP_IPV4_MASK },
.store_kmsk = PATTERN_IPV4_TCP_KMASK,
.mf_bits = { 0x18a0000000000000, 0x0000000000044401},
@@ -342,6 +381,7 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
},
.store_shuf.u8_data = { PATTERN_DT1Q_IPV4_UDP_SHUFFLE },
+ .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
.store_kmsk = PATTERN_DT1Q_IPV4_UDP_KMASK,
.mf_bits = { 0x38a0000000000000, 0x0000000000040401},
@@ -353,20 +393,27 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
[PROFILE_ETH_VLAN_IPV4_TCP] = {
.probe_mask.u8_data = {
- PATTERN_ETHERTYPE_MASK PATTERN_DT1Q_MASK PATTERN_IPV4_MASK
+ PATTERN_ETHERTYPE_MASK
+ PATTERN_DT1Q_MASK
+ PATTERN_IPV4_MASK
+ PATTERN_TCP_MASK
},
.probe_data.u8_data = {
- PATTERN_ETHERTYPE_DT1Q PATTERN_DT1Q_IPV4 PATTERN_IPV4_TCP
+ PATTERN_ETHERTYPE_DT1Q
+ PATTERN_DT1Q_IPV4
+ PATTERN_IPV4_TCP
+ PATTERN_TCP
},
.store_shuf.u8_data = { PATTERN_DT1Q_IPV4_TCP_SHUFFLE },
+ .strip_mask.u8_data = { PATTERN_STRIP_DOT1Q_IPV4_MASK },
.store_kmsk = PATTERN_DT1Q_IPV4_TCP_KMASK,
.mf_bits = { 0x38a0000000000000, 0x0000000000044401},
.dp_pkt_offs = {
14, UINT16_MAX, 18, 38,
},
- .dp_pkt_min_size = 46,
+ .dp_pkt_min_size = 58,
},
};
@@ -374,16 +421,31 @@ static const struct mfex_profile mfex_profiles[PROFILE_COUNT] =
/* Protocol specific helper functions, for calculating offsets/lenghts. */
static int32_t
mfex_ipv4_set_l2_pad_size(struct dp_packet *pkt, struct ip_header *nh,
- uint32_t len_from_ipv4)
+ uint32_t len_from_ipv4, uint32_t next_proto_len)
{
- /* Handle dynamic l2_pad_size. */
- uint16_t tot_len = ntohs(nh->ip_tot_len);
- if (OVS_UNLIKELY(tot_len > len_from_ipv4 ||
- (len_from_ipv4 - tot_len) > UINT16_MAX)) {
- return -1;
- }
- dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - tot_len);
- return 0;
+ /* Handle dynamic l2_pad_size; note that avx512 has already validated
+ * the IP->ihl field to be 5, so 20 bytes of IP header (no options).
+ */
+ uint16_t ip_tot_len = ntohs(nh->ip_tot_len);
+
+ /* Error if IP total length is greater than remaining packet size. */
+ bool err_ip_tot_len_too_high = ip_tot_len > len_from_ipv4;
+
+ /* Error if IP total length is less than the size of the IP header
+ * itself, and the size of the next-protocol this profile matches on.
+ */
+ bool err_ip_tot_len_too_low =
+ (IP_HEADER_LEN + next_proto_len) > ip_tot_len;
+
+ /* Ensure the l2 pad size will not overflow. */
+ bool err_len_u16_overflow = (len_from_ipv4 - ip_tot_len) > UINT16_MAX;
+
+ if (OVS_UNLIKELY(err_ip_tot_len_too_high || err_ip_tot_len_too_low ||
+ err_len_u16_overflow)) {
+ return -1;
+ }
+ dp_packet_set_l2_pad_size(pkt, len_from_ipv4 - ip_tot_len);
+ return 0;
}
/* Fixup the VLAN CFI and PCP, reading the PCP from the input to this function,
@@ -433,6 +495,7 @@ mfex_avx512_process(struct dp_packet_batch *packets,
__m512i v_vals = _mm512_loadu_si512(&profile->probe_data);
__m512i v_mask = _mm512_loadu_si512(&profile->probe_mask);
__m512i v_shuf = _mm512_loadu_si512(&profile->store_shuf);
+ __m512i v_strp = _mm512_loadu_si512(&profile->strip_mask);
__mmask64 k_shuf = profile->store_kmsk;
__m128i v_bits = _mm_loadu_si128((void *) &profile->mf_bits);
@@ -450,10 +513,17 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Load packet data and probe with AVX512 mask & compare. */
const uint8_t *pkt = dp_packet_data(packet);
- __m512i v_pkt0 = _mm512_loadu_si512(pkt);
+ __m512i v_pkt0;
+ if (size >= 64) {
+ v_pkt0 = _mm512_loadu_si512(pkt);
+ } else {
+ uint64_t load_kmask = (1ULL << size) - 1;
+ v_pkt0 = _mm512_maskz_loadu_epi8(load_kmask, pkt);
+ }
+
__m512i v_pkt0_masked = _mm512_and_si512(v_pkt0, v_mask);
__mmask64 k_cmp = _mm512_cmpeq_epi8_mask(v_pkt0_masked, v_vals);
- if (k_cmp != UINT64_MAX) {
+ if (OVS_UNLIKELY(k_cmp != UINT64_MAX)) {
continue;
}
@@ -481,8 +551,9 @@ mfex_avx512_process(struct dp_packet_batch *packets,
v_blk0 = _mm512_maskz_permutex2var_epi8_skx(k_shuf, v_pkt0,
v_shuf, v512_zeros);
}
- _mm512_storeu_si512(&blocks[2], v_blk0);
+ __m512i v_blk0_strip = _mm512_and_si512(v_blk0, v_strp);
+ _mm512_storeu_si512(&blocks[2], v_blk0_strip);
/* Perform "post-processing" per profile, handling details not easily
* handled in the above generic AVX512 code. Examples include TCP flag
@@ -498,7 +569,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
+ TCP_HEADER_LEN)) {
continue;
}
@@ -512,7 +584,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
uint32_t size_from_ipv4 = size - VLAN_ETH_HEADER_LEN;
struct ip_header *nh = (void *)&pkt[VLAN_ETH_HEADER_LEN];
- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
+ UDP_HEADER_LEN)) {
continue;
}
} break;
@@ -525,7 +598,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Handle dynamic l2_pad_size. */
uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)];
- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
+ TCP_HEADER_LEN)) {
continue;
}
} break;
@@ -534,7 +608,8 @@ mfex_avx512_process(struct dp_packet_batch *packets,
/* Handle dynamic l2_pad_size. */
uint32_t size_from_ipv4 = size - sizeof(struct eth_header);
struct ip_header *nh = (void *)&pkt[sizeof(struct eth_header)];
- if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4)) {
+ if (mfex_ipv4_set_l2_pad_size(packet, nh, size_from_ipv4,
+ UDP_HEADER_LEN)) {
continue;
}
diff --git a/lib/dpif-netdev-private-dfc.h b/lib/dpif-netdev-private-dfc.h
index 92092ebec9..3dfc91f0fe 100644
--- a/lib/dpif-netdev-private-dfc.h
+++ b/lib/dpif-netdev-private-dfc.h
@@ -59,7 +59,8 @@ extern "C" {
* Thread-safety
* =============
*
- * Each pmd_thread has its own private exact match cache.
+ * Each pmd_thread has its own private exact match cache and signature match
+ * cache.
* If dp_netdev_input is not called from a pmd thread, a mutex is used.
*/
diff --git a/lib/dpif-netdev-private-dpcls.h b/lib/dpif-netdev-private-dpcls.h
index 7c4a840cb1..0d5da73c7a 100644
--- a/lib/dpif-netdev-private-dpcls.h
+++ b/lib/dpif-netdev-private-dpcls.h
@@ -83,8 +83,10 @@ struct dpcls_subtable {
/* The lookup function to use for this subtable. If there is a known
* property of the subtable (eg: only 3 bits of miniflow metadata is
* used for the lookup) then this can point at an optimized version of
- * the lookup function for this particular subtable. */
- dpcls_subtable_lookup_func lookup_func;
+ * the lookup function for this particular subtable. The lookup function
+ * can be used at any time by a PMD thread, so it's declared as an atomic
+ * here to prevent garbage from being read. */
+ ATOMIC(dpcls_subtable_lookup_func) lookup_func;
/* Caches the masks to match a packet to, reducing runtime calculations. */
uint64_t *mf_masks;
diff --git a/lib/dpif-netdev-private-thread.h b/lib/dpif-netdev-private-thread.h
index a782d9678a..ac4885538c 100644
--- a/lib/dpif-netdev-private-thread.h
+++ b/lib/dpif-netdev-private-thread.h
@@ -78,10 +78,10 @@ struct dp_netdev_pmd_thread {
struct ovs_refcount ref_cnt; /* Every reference must be refcount'ed. */
struct cmap_node node; /* In 'dp->poll_threads'. */
- /* Per thread exact-match cache. Note, the instance for cpu core
- * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
- * need to be protected by 'non_pmd_mutex'. Every other instance
- * will only be accessed by its own pmd thread. */
+ /* Per thread exact match cache and signature match cache. Note, the
+ * instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
+ * threads, and thusly need to be protected by 'non_pmd_mutex'. Every
+ * other instance will only be accessed by its own pmd thread. */
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct dfc_cache flow_cache;
/* Flow-Table and classifiers
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index bddce75b63..0b47c1a486 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -984,7 +984,9 @@ dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc OVS_UNUSED,
if (!cls) {
continue;
}
+ ovs_mutex_lock(&pmd->flow_mutex);
uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls);
+ ovs_mutex_unlock(&pmd->flow_mutex);
if (subtbl_changes) {
lookup_dpcls_changed++;
lookup_subtable_changed += subtbl_changes;
@@ -4061,7 +4063,10 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
flow_hash_5tuple(execute->flow, 0));
}
- dp_packet_batch_init_packet(&pp, execute->packet);
+ /* Making a copy because the packet might be stolen during the execution
+ * and caller might still need it. */
+ struct dp_packet *packet_clone = dp_packet_clone(execute->packet);
+ dp_packet_batch_init_packet(&pp, packet_clone);
dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
execute->actions, execute->actions_len);
dp_netdev_pmd_flush_output_packets(pmd, true);
@@ -4071,6 +4076,24 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
dp_netdev_pmd_unref(pmd);
}
+ if (dp_packet_batch_size(&pp) == 1) {
+ /* Packet wasn't dropped during the execution. Swapping content with
+ * the original packet, because the caller might expect actions to
+ * modify it. Uisng the packet from a batch instead of 'packet_clone'
+ * because it maybe stolen and replaced by other packet, e.g. by
+ * the fragmentation engine. */
+ dp_packet_swap(execute->packet, pp.packets[0]);
+ dp_packet_delete_batch(&pp, true);
+ } else if (dp_packet_batch_size(&pp)) {
+ /* FIXME: We have more packets than expected. Likely, we got IP
+ * fragments of the reassembled packet. Dropping them here as we have
+ * no way to get them to the caller. It might be that all the required
+ * actions with them are already executed, but it also might not be a
+ * case, e.g. if dpif_netdev_execute() called to execute a single
+ * tunnel push. */
+ dp_packet_delete_batch(&pp, true);
+ }
+
return 0;
}
@@ -8942,9 +8965,12 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
/* Get the preferred subtable search function for this (u0,u1) subtable.
* The function is guaranteed to always return a valid implementation, and
- * possibly an ISA optimized, and/or specialized implementation.
+ * possibly an ISA optimized, and/or specialized implementation. Initialize
+ * the subtable search function atomically to avoid garbage data being read
+ * by the PMD thread.
*/
- subtable->lookup_func = dpcls_subtable_get_best_impl(unit0, unit1);
+ atomic_init(&subtable->lookup_func,
+ dpcls_subtable_get_best_impl(unit0, unit1));
cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
/* Add the new subtable at the end of the pvector (with no hits yet) */
@@ -8973,6 +8999,10 @@ dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
/* Checks for the best available implementation for each subtable lookup
* function, and assigns it as the lookup function pointer for each subtable.
* Returns the number of subtables that have changed lookup implementation.
+ * This function requires holding a flow_mutex when called. This is to make
+ * sure modifications done by this function are not overwritten. This could
+ * happen if dpcls_sort_subtable_vector() is called at the same time as this
+ * function.
*/
static uint32_t
dpcls_subtable_lookup_reprobe(struct dpcls *cls)
@@ -8985,10 +9015,13 @@ dpcls_subtable_lookup_reprobe(struct dpcls *cls)
uint32_t u0_bits = subtable->mf_bits_set_unit0;
uint32_t u1_bits = subtable->mf_bits_set_unit1;
void *old_func = subtable->lookup_func;
- subtable->lookup_func = dpcls_subtable_get_best_impl(u0_bits, u1_bits);
+
+ /* Set the subtable lookup function atomically to avoid garbage data
+ * being read by the PMD thread. */
+ atomic_store_relaxed(&subtable->lookup_func,
+ dpcls_subtable_get_best_impl(u0_bits, u1_bits));
subtables_changed += (old_func != subtable->lookup_func);
}
- pvector_publish(pvec);
return subtables_changed;
}
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 34fc042373..5f4b60c5a6 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -84,6 +84,8 @@ enum { MAX_PORTS = USHRT_MAX };
#define EPOLLEXCLUSIVE (1u << 28)
#endif
+#define OVS_DP_F_UNSUPPORTED (1 << 31);
+
/* This PID is not used by the kernel datapath when using dispatch per CPU,
* but it is required to be set (not zero). */
#define DPIF_NETLINK_PER_CPU_PID UINT32_MAX
@@ -382,36 +384,62 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
dp_request.cmd = OVS_DP_CMD_SET;
}
- /* The Open vSwitch kernel module has two modes for dispatching upcalls:
- * per-vport and per-cpu.
- *
- * When dispatching upcalls per-vport, the kernel will
- * send the upcall via a Netlink socket that has been selected based on the
- * vport that received the packet that is causing the upcall.
- *
- * When dispatching upcall per-cpu, the kernel will send the upcall via
- * a Netlink socket that has been selected based on the cpu that received
- * the packet that is causing the upcall.
- *
- * First we test to see if the kernel module supports per-cpu dispatching
- * (the preferred method). If it does not support per-cpu dispatching, we
- * fall back to the per-vport dispatch mode.
+ /* Some older kernels will not reject unknown features. This will cause
+ * 'ovs-vswitchd' to incorrectly assume a feature is supported. In order to
+ * test for that, we attempt to set a feature that we know is not supported
+ * by any kernel. If this feature is not rejected, we can assume we are
+ * running on one of these older kernels.
*/
dp_request.user_features |= OVS_DP_F_UNALIGNED;
- dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS;
- dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
+ dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
+ dp_request.user_features |= OVS_DP_F_UNSUPPORTED;
error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
if (error) {
- dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
+ /* The Open vSwitch kernel module has two modes for dispatching
+ * upcalls: per-vport and per-cpu.
+ *
+ * When dispatching upcalls per-vport, the kernel will
+ * send the upcall via a Netlink socket that has been selected based on
+ * the vport that received the packet that is causing the upcall.
+ *
+ * When dispatching upcall per-cpu, the kernel will send the upcall via
+ * a Netlink socket that has been selected based on the cpu that
+ * received the packet that is causing the upcall.
+ *
+ * First we test to see if the kernel module supports per-cpu
+ * dispatching (the preferred method). If it does not support per-cpu
+ * dispatching, we fall back to the per-vport dispatch mode.
+ */
+ dp_request.user_features &= ~OVS_DP_F_UNSUPPORTED;
+ dp_request.user_features |= OVS_DP_F_UNALIGNED;
+ dp_request.user_features &= ~OVS_DP_F_VPORT_PIDS;
+ dp_request.user_features |= OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
+ error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
+ if (error) {
+ dp_request.user_features &= ~OVS_DP_F_DISPATCH_UPCALL_PER_CPU;
+ dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
+ error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
+ }
+ if (error) {
+ return error;
+ }
+
+ error = open_dpif(&dp, dpifp);
+ dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING);
+ } else {
+ VLOG_INFO("Kernel does not correctly support feature negotiation. "
+ "Using standard features.");
+ dp_request.cmd = OVS_DP_CMD_SET;
+ dp_request.user_features = 0;
+ dp_request.user_features |= OVS_DP_F_UNALIGNED;
dp_request.user_features |= OVS_DP_F_VPORT_PIDS;
error = dpif_netlink_dp_transact(&dp_request, &dp, &buf);
- }
- if (error) {
- return error;
+ if (error) {
+ return error;
+ }
+ error = open_dpif(&dp, dpifp);
}
- error = open_dpif(&dp, dpifp);
- dpif_netlink_set_features(*dpifp, OVS_DP_F_TC_RECIRC_SHARING);
ofpbuf_delete(buf);
if (create) {
diff --git a/lib/flow.c b/lib/flow.c
index 89837de95d..a021bc0eba 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -1006,14 +1006,18 @@ miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) {
if (OVS_LIKELY(size >= TCP_HEADER_LEN)) {
const struct tcp_header *tcp = data;
-
- miniflow_push_be32(mf, arp_tha.ea[2], 0);
- miniflow_push_be32(mf, tcp_flags,
- TCP_FLAGS_BE32(tcp->tcp_ctl));
- miniflow_push_be16(mf, tp_src, tcp->tcp_src);
- miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
- miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
- miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+ size_t tcp_hdr_len = TCP_OFFSET(tcp->tcp_ctl) * 4;
+
+ if (OVS_LIKELY(tcp_hdr_len >= TCP_HEADER_LEN)
+ && OVS_LIKELY(size >= tcp_hdr_len)) {
+ miniflow_push_be32(mf, arp_tha.ea[2], 0);
+ miniflow_push_be32(mf, tcp_flags,
+ TCP_FLAGS_BE32(tcp->tcp_ctl));
+ miniflow_push_be16(mf, tp_src, tcp->tcp_src);
+ miniflow_push_be16(mf, tp_dst, tcp->tcp_dst);
+ miniflow_push_be16(mf, ct_tp_src, ct_tp_src);
+ miniflow_push_be16(mf, ct_tp_dst, ct_tp_dst);
+ }
}
} else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) {
if (OVS_LIKELY(size >= UDP_HEADER_LEN)) {
diff --git a/lib/ipf.c b/lib/ipf.c
index d9f781147a..507db2aea2 100644
--- a/lib/ipf.c
+++ b/lib/ipf.c
@@ -943,6 +943,8 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb,
ovs_mutex_lock(&ipf->ipf_lock);
if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) {
dp_packet_batch_refill(pb, pkt, pb_idx);
+ } else {
+ dp_packet_delete(pkt);
}
ovs_mutex_unlock(&ipf->ipf_lock);
} else {
@@ -1152,52 +1154,56 @@ ipf_post_execute_reass_pkts(struct ipf *ipf,
* NETDEV_MAX_BURST. */
DP_PACKET_BATCH_REFILL_FOR_EACH (pb_idx, pb_cnt, pkt, pb) {
if (rp && pkt == rp->list->reass_execute_ctx) {
+ const struct ipf_frag *frag_0 = &rp->list->frag_list[0];
+ void *l4_frag = dp_packet_l4(frag_0->pkt);
+ void *l4_reass = dp_packet_l4(pkt);
+ memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt));
+
for (int i = 0; i <= rp->list->last_inuse_idx; i++) {
- rp->list->frag_list[i].pkt->md.ct_label = pkt->md.ct_label;
- rp->list->frag_list[i].pkt->md.ct_mark = pkt->md.ct_mark;
- rp->list->frag_list[i].pkt->md.ct_state = pkt->md.ct_state;
- rp->list->frag_list[i].pkt->md.ct_zone = pkt->md.ct_zone;
- rp->list->frag_list[i].pkt->md.ct_orig_tuple_ipv6 =
+ const struct ipf_frag *frag_i = &rp->list->frag_list[i];
+
+ frag_i->pkt->md.ct_label = pkt->md.ct_label;
+ frag_i->pkt->md.ct_mark = pkt->md.ct_mark;
+ frag_i->pkt->md.ct_state = pkt->md.ct_state;
+ frag_i->pkt->md.ct_zone = pkt->md.ct_zone;
+ frag_i->pkt->md.ct_orig_tuple_ipv6 =
pkt->md.ct_orig_tuple_ipv6;
if (pkt->md.ct_orig_tuple_ipv6) {
- rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv6 =
+ frag_i->pkt->md.ct_orig_tuple.ipv6 =
pkt->md.ct_orig_tuple.ipv6;
} else {
- rp->list->frag_list[i].pkt->md.ct_orig_tuple.ipv4 =
+ frag_i->pkt->md.ct_orig_tuple.ipv4 =
pkt->md.ct_orig_tuple.ipv4;
}
- }
-
- const struct ipf_frag *frag_0 = &rp->list->frag_list[0];
- void *l4_frag = dp_packet_l4(frag_0->pkt);
- void *l4_reass = dp_packet_l4(pkt);
- memcpy(l4_frag, l4_reass, dp_packet_l4_size(frag_0->pkt));
-
- if (v6) {
- struct ovs_16aligned_ip6_hdr *l3_frag
- = dp_packet_l3(frag_0->pkt);
- struct ovs_16aligned_ip6_hdr *l3_reass = dp_packet_l3(pkt);
- l3_frag->ip6_src = l3_reass->ip6_src;
- l3_frag->ip6_dst = l3_reass->ip6_dst;
- } else {
- struct ip_header *l3_frag = dp_packet_l3(frag_0->pkt);
- struct ip_header *l3_reass = dp_packet_l3(pkt);
- if (!dp_packet_hwol_is_ipv4(frag_0->pkt)) {
- ovs_be32 reass_ip =
- get_16aligned_be32(&l3_reass->ip_src);
- ovs_be32 frag_ip =
- get_16aligned_be32(&l3_frag->ip_src);
-
- l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
- frag_ip, reass_ip);
- reass_ip = get_16aligned_be32(&l3_reass->ip_dst);
- frag_ip = get_16aligned_be32(&l3_frag->ip_dst);
- l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
- frag_ip, reass_ip);
+ if (v6) {
+ struct ovs_16aligned_ip6_hdr *l3_frag
+ = dp_packet_l3(frag_i->pkt);
+ struct ovs_16aligned_ip6_hdr *l3_reass
+ = dp_packet_l3(pkt);
+ l3_frag->ip6_src = l3_reass->ip6_src;
+ l3_frag->ip6_dst = l3_reass->ip6_dst;
+ } else {
+ struct ip_header *l3_frag = dp_packet_l3(frag_i->pkt);
+ struct ip_header *l3_reass = dp_packet_l3(pkt);
+ if (!dp_packet_hwol_is_ipv4(frag_i->pkt)) {
+ ovs_be32 reass_ip =
+ get_16aligned_be32(&l3_reass->ip_src);
+ ovs_be32 frag_ip =
+ get_16aligned_be32(&l3_frag->ip_src);
+
+ l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
+ frag_ip,
+ reass_ip);
+ reass_ip = get_16aligned_be32(&l3_reass->ip_dst);
+ frag_ip = get_16aligned_be32(&l3_frag->ip_dst);
+ l3_frag->ip_csum = recalc_csum32(l3_frag->ip_csum,
+ frag_ip,
+ reass_ip);
+ }
+
+ l3_frag->ip_src = l3_reass->ip_src;
+ l3_frag->ip_dst = l3_reass->ip_dst;
}
-
- l3_frag->ip_src = l3_reass->ip_src;
- l3_frag->ip_dst = l3_reass->ip_dst;
}
ipf_completed_list_add(&ipf->frag_complete_list, rp->list);
diff --git a/lib/json.c b/lib/json.c
index 32d25003b8..0baf7c622c 100644
--- a/lib/json.c
+++ b/lib/json.c
@@ -146,6 +146,7 @@ json_type_to_string(enum json_type type)
case JSON_STRING:
return "string";
+ case JSON_SERIALIZED_OBJECT:
case JSON_N_TYPES:
default:
return "<invalid>";
@@ -180,6 +181,14 @@ json_string_create(const char *s)
return json_string_create_nocopy(xstrdup(s));
}
+struct json *
+json_serialized_object_create(const struct json *src)
+{
+ struct json *json = json_create(JSON_SERIALIZED_OBJECT);
+ json->string = json_to_string(src, JSSF_SORT);
+ return json;
+}
+
struct json *
json_array_create_empty(void)
{
@@ -309,6 +318,13 @@ json_string(const struct json *json)
return json->string;
}
+const char *
+json_serialized_object(const struct json *json)
+{
+ ovs_assert(json->type == JSON_SERIALIZED_OBJECT);
+ return json->string;
+}
+
struct json_array *
json_array(const struct json *json)
{
@@ -362,6 +378,7 @@ json_destroy(struct json *json)
break;
case JSON_STRING:
+ case JSON_SERIALIZED_OBJECT:
free(json->string);
break;
@@ -422,6 +439,9 @@ json_deep_clone(const struct json *json)
case JSON_STRING:
return json_string_create(json->string);
+ case JSON_SERIALIZED_OBJECT:
+ return json_serialized_object_create(json);
+
case JSON_NULL:
case JSON_FALSE:
case JSON_TRUE:
@@ -521,6 +541,7 @@ json_hash(const struct json *json, size_t basis)
return json_hash_array(&json->array, basis);
case JSON_STRING:
+ case JSON_SERIALIZED_OBJECT:
return hash_string(json->string, basis);
case JSON_NULL:
@@ -596,6 +617,7 @@ json_equal(const struct json *a, const struct json *b)
return json_equal_array(&a->array, &b->array);
case JSON_STRING:
+ case JSON_SERIALIZED_OBJECT:
return !strcmp(a->string, b->string);
case JSON_NULL:
@@ -1072,6 +1094,14 @@ json_from_string(const char *string)
return json_parser_finish(p);
}
+/* Parses data of JSON_SERIALIZED_OBJECT to the real JSON. */
+struct json *
+json_from_serialized_object(const struct json *json)
+{
+ ovs_assert(json->type == JSON_SERIALIZED_OBJECT);
+ return json_from_string(json->string);
+}
+
/* Reads the file named 'file_name', parses its contents as a JSON object or
* array, and returns a newly allocated 'struct json'. The caller must free
* the returned structure with json_destroy() when it is no longer needed.
@@ -1563,6 +1593,10 @@ json_serialize(const struct json *json, struct json_serializer *s)
json_serialize_string(json->string, ds);
break;
+ case JSON_SERIALIZED_OBJECT:
+ ds_put_cstr(ds, json->string);
+ break;
+
case JSON_N_TYPES:
default:
OVS_NOT_REACHED();
@@ -1696,14 +1730,30 @@ json_serialize_string(const char *string, struct ds *ds)
{
uint8_t c;
uint8_t c2;
+ size_t count;
const char *escape;
+ const char *start;
ds_put_char(ds, '"');
+ count = 0;
+ start = string;
while ((c = *string++) != '\0') {
- escape = chars_escaping[c];
- while ((c2 = *escape++) != '\0') {
- ds_put_char(ds, c2);
+ if (c >= ' ' && c != '"' && c != '\\') {
+ count++;
+ } else {
+ if (count) {
+ ds_put_buffer(ds, start, count);
+ count = 0;
+ }
+ start = string;
+ escape = chars_escaping[c];
+ while ((c2 = *escape++) != '\0') {
+ ds_put_char(ds, c2);
+ }
}
}
+ if (count) {
+ ds_put_buffer(ds, start, count);
+ }
ds_put_char(ds, '"');
}
diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c
index 18afbab9a7..dfeb2a8002 100644
--- a/lib/lldp/lldp.c
+++ b/lib/lldp/lldp.c
@@ -146,7 +146,9 @@ static void
lldp_tlv_end(struct dp_packet *p, unsigned int start)
{
ovs_be16 *tlv = dp_packet_at_assert(p, start, 2);
- *tlv |= htons((dp_packet_size(p) - (start + 2)) & 0x1ff);
+ put_unaligned_be16(tlv,
+ get_unaligned_be16(tlv)
+ | htons((dp_packet_size(p) - (start + 2)) & 0x1ff));
}
int
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index c808d205d5..e03cd8d0c5 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -1788,6 +1788,19 @@ mf_is_tun_metadata(const struct mf_field *mf)
mf->id < MFF_TUN_METADATA0 + TUN_METADATA_NUM_OPTS;
}
+bool
+mf_is_frozen_metadata(const struct mf_field *mf)
+{
+ if (mf->id >= MFF_TUN_ID && mf->id <= MFF_IN_PORT_OXM) {
+ return true;
+ }
+
+ if (mf->id >= MFF_REG0 && mf->id < MFF_ETH_SRC) {
+ return true;
+ }
+ return false;
+}
+
bool
mf_is_pipeline_field(const struct mf_field *mf)
{
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 45a96b9be2..ca92c947a2 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -961,14 +961,6 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
rte_eth_dev_info_get(dev->port_id, &info);
- /* As of DPDK 19.11, it is not allowed to set a mq_mode for
- * virtio PMD driver. */
- if (!strcmp(info.driver_name, "net_virtio")) {
- conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
- } else {
- conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
- }
-
/* As of DPDK 17.11.1 a few PMDs require to explicitly enable
* scatter to support jumbo RX.
* Setting scatter for the device is done after checking for
@@ -1000,6 +992,11 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
/* Limit configured rss hash functions to only those supported
* by the eth device. */
conf.rx_adv_conf.rss_conf.rss_hf &= info.flow_type_rss_offloads;
+ if (conf.rx_adv_conf.rss_conf.rss_hf == 0) {
+ conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+ } else {
+ conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+ }
/* A device may report more queues than it makes available (this has
* been observed for Intel xl710, which reserves some of them for
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 60dd138914..97bd21be4a 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -627,6 +627,7 @@ netdev_linux_notify_sock(void)
if (!error) {
size_t i;
+ nl_sock_listen_all_nsid(sock, true);
for (i = 0; i < ARRAY_SIZE(mcgroups); i++) {
error = nl_sock_join_mcgroup(sock, mcgroups[i]);
if (error) {
@@ -636,7 +637,6 @@ netdev_linux_notify_sock(void)
}
}
}
- nl_sock_listen_all_nsid(sock, true);
ovsthread_once_done(&once);
}
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 7729a90608..65f028ba02 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -2941,7 +2941,7 @@ odp_nsh_key_from_attr__(const struct nlattr *attr, bool is_mask,
const struct ovs_nsh_key_md1 *md1 = nl_attr_get(a);
has_md1 = true;
memcpy(nsh->context, md1->context, sizeof md1->context);
- if (len == 2 * sizeof(*md1)) {
+ if (nsh_mask && (len == 2 * sizeof *md1)) {
const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
memcpy(nsh_mask->context, md1_mask->context,
sizeof(*md1_mask));
@@ -3212,7 +3212,7 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
opts.flags = tun_key->gtpu_flags;
opts.msgtype = tun_key->gtpu_msgtype;
- nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GTPU_OPTS,
&opts, sizeof(opts));
}
nl_msg_end_nested(a, tun_key_ofs);
@@ -4618,7 +4618,7 @@ odp_flow_format(const struct nlattr *key, size_t key_len,
}
ds_put_char(ds, ')');
}
- if (!has_ethtype_key) {
+ if (!has_ethtype_key && mask) {
const struct nlattr *ma = nl_attr_find__(mask, mask_len,
OVS_KEY_ATTR_ETHERTYPE);
if (ma) {
diff --git a/lib/ofp-flow.c b/lib/ofp-flow.c
index ff0396845a..3bc744f78f 100644
--- a/lib/ofp-flow.c
+++ b/lib/ofp-flow.c
@@ -1254,7 +1254,16 @@ ofputil_append_flow_stats_reply(const struct ofputil_flow_stats *fs,
OVS_NOT_REACHED();
}
- ofpmp_postappend(replies, start_ofs);
+ if ((reply->size - start_ofs) > (UINT16_MAX - ofpbuf_headersize(reply))) {
+ /* When this happens, the reply will not fit in a single OFP message,
+ * and we should not append it to the queue. We will log a warning
+ * and continue with the next flow stat entry. */
+ reply->size = start_ofs;
+ VLOG_WARN_RL(&rl, "Flow exceeded the maximum flow statistics reply "
+ "size and was excluded from the response set");
+ } else {
+ ofpmp_postappend(replies, start_ofs);
+ }
fs_->match.flow.tunnel.metadata.tab = orig_tun_table;
}
diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c
index 659d49dbf7..dead31275d 100644
--- a/lib/ovsdb-cs.c
+++ b/lib/ovsdb-cs.c
@@ -1109,6 +1109,23 @@ ovsdb_cs_db_sync_condition(struct ovsdb_cs_db *db)
}
table->req_cond = NULL;
db->cond_changed = true;
+
+ /* There are two cases:
+ * a. either the server already processed the requested monitor
+ * condition change but the FSM was restarted before the
+ * client was notified. In this case the client should
+ * clear its local cache because it's out of sync with the
+ * monitor view on the server side.
+ *
+ * b. OR the server hasn't processed the requested monitor
+ * condition change yet.
+ *
+ * As there's no easy way to differentiate between the two,
+ * and given that this condition should be rare, reset the
+ * 'last_id', essentially flushing the local cached DB
+ * contents.
+ */
+ db->last_id = UUID_ZERO;
}
}
}
@@ -1539,12 +1556,11 @@ ovsdb_cs_db_parse_monitor_reply(struct ovsdb_cs_db *db,
const struct json *table_updates;
bool clear;
if (version == 3) {
- struct uuid last_id;
if (result->type != JSON_ARRAY || result->array.n != 3
|| (result->array.elems[0]->type != JSON_TRUE &&
result->array.elems[0]->type != JSON_FALSE)
|| result->array.elems[1]->type != JSON_STRING
- || !uuid_from_string(&last_id,
+ || !uuid_from_string(&db->last_id,
json_string(result->array.elems[1]))) {
struct ovsdb_error *error = ovsdb_syntax_error(
result, NULL, "bad monitor_cond_since reply format");
@@ -1833,7 +1849,7 @@ server_column_get_string(const struct server_row *row,
{
ovs_assert(server_columns[index].type.key.type == OVSDB_TYPE_STRING);
const struct ovsdb_datum *d = &row->data[index];
- return d->n == 1 ? d->keys[0].string : default_value;
+ return d->n == 1 ? d->keys[0].s->string : default_value;
}
static bool
diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index c145f5ad97..6654ed6deb 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -74,7 +74,7 @@ ovsdb_atom_init_default(union ovsdb_atom *atom, enum ovsdb_atomic_type type)
break;
case OVSDB_TYPE_STRING:
- atom->string = xmemdup("", 1);
+ atom->s = ovsdb_atom_string_create_nocopy(xmemdup("", 1));
break;
case OVSDB_TYPE_UUID:
@@ -136,7 +136,7 @@ ovsdb_atom_is_default(const union ovsdb_atom *atom,
return atom->boolean == false;
case OVSDB_TYPE_STRING:
- return atom->string[0] == '\0';
+ return atom->s->string[0] == '\0';
case OVSDB_TYPE_UUID:
return uuid_is_zero(&atom->uuid);
@@ -172,7 +172,8 @@ ovsdb_atom_clone(union ovsdb_atom *new, const union ovsdb_atom *old,
break;
case OVSDB_TYPE_STRING:
- new->string = xstrdup(old->string);
+ new->s = old->s;
+ new->s->n_refs++;
break;
case OVSDB_TYPE_UUID:
@@ -214,7 +215,7 @@ ovsdb_atom_hash(const union ovsdb_atom *atom, enum ovsdb_atomic_type type,
return hash_boolean(atom->boolean, basis);
case OVSDB_TYPE_STRING:
- return hash_string(atom->string, basis);
+ return hash_string(atom->s->string, basis);
case OVSDB_TYPE_UUID:
return hash_int(uuid_hash(&atom->uuid), basis);
@@ -246,7 +247,7 @@ ovsdb_atom_compare_3way(const union ovsdb_atom *a,
return a->boolean - b->boolean;
case OVSDB_TYPE_STRING:
- return strcmp(a->string, b->string);
+ return a->s == b->s ? 0 : strcmp(a->s->string, b->s->string);
case OVSDB_TYPE_UUID:
return uuid_compare_3way(&a->uuid, &b->uuid);
@@ -404,7 +405,7 @@ ovsdb_atom_from_json__(union ovsdb_atom *atom,
case OVSDB_TYPE_STRING:
if (json->type == JSON_STRING) {
- atom->string = xstrdup(json->string);
+ atom->s = ovsdb_atom_string_create(json->string);
return NULL;
}
break;
@@ -473,7 +474,7 @@ ovsdb_atom_to_json(const union ovsdb_atom *atom, enum ovsdb_atomic_type type)
return json_boolean_create(atom->boolean);
case OVSDB_TYPE_STRING:
- return json_string_create(atom->string);
+ return json_string_create(atom->s->string);
case OVSDB_TYPE_UUID:
return wrap_json("uuid", json_string_create_nocopy(
@@ -551,14 +552,18 @@ ovsdb_atom_from_string__(union ovsdb_atom *atom,
if (s_len < 2 || s[s_len - 1] != '"') {
return xasprintf("%s: missing quote at end of "
"quoted string", s);
- } else if (!json_string_unescape(s + 1, s_len - 2,
- &atom->string)) {
- char *error = xasprintf("%s: %s", s, atom->string);
- free(atom->string);
- return error;
+ } else {
+ char *res;
+ if (json_string_unescape(s + 1, s_len - 2, &res)) {
+ atom->s = ovsdb_atom_string_create_nocopy(res);
+ } else {
+ char *error = xasprintf("%s: %s", s, res);
+ free(res);
+ return error;
+ }
}
} else {
- atom->string = xstrdup(s);
+ atom->s = ovsdb_atom_string_create(s);
}
break;
@@ -721,14 +726,14 @@ ovsdb_atom_to_string(const union ovsdb_atom *atom, enum ovsdb_atomic_type type,
break;
case OVSDB_TYPE_STRING:
- if (string_needs_quotes(atom->string)) {
+ if (string_needs_quotes(atom->s->string)) {
struct json json;
json.type = JSON_STRING;
- json.string = atom->string;
+ json.string = atom->s->string;
json_to_ds(&json, 0, out);
} else {
- ds_put_cstr(out, atom->string);
+ ds_put_cstr(out, atom->s->string);
}
break;
@@ -750,7 +755,7 @@ ovsdb_atom_to_bare(const union ovsdb_atom *atom, enum ovsdb_atomic_type type,
struct ds *out)
{
if (type == OVSDB_TYPE_STRING) {
- ds_put_cstr(out, atom->string);
+ ds_put_cstr(out, atom->s->string);
} else {
ovsdb_atom_to_string(atom, type, out);
}
@@ -799,7 +804,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom,
const struct ovsdb_base_type *base)
{
if (base->enum_
- && ovsdb_datum_find_key(base->enum_, atom, base->type) == UINT_MAX) {
+ && !ovsdb_datum_find_key(base->enum_, atom, base->type, NULL)) {
struct ovsdb_error *error;
struct ds actual = DS_EMPTY_INITIALIZER;
struct ds valid = DS_EMPTY_INITIALIZER;
@@ -877,7 +882,7 @@ ovsdb_atom_check_constraints(const union ovsdb_atom *atom,
return NULL;
case OVSDB_TYPE_STRING:
- return check_string_constraints(atom->string, &base->string);
+ return check_string_constraints(atom->s->string, &base->string);
case OVSDB_TYPE_UUID:
return NULL;
@@ -1691,8 +1696,8 @@ ovsdb_datum_from_smap(struct ovsdb_datum *datum, const struct smap *smap)
struct smap_node *node;
size_t i = 0;
SMAP_FOR_EACH (node, smap) {
- datum->keys[i].string = xstrdup(node->key);
- datum->values[i].string = xstrdup(node->value);
+ datum->keys[i].s = ovsdb_atom_string_create(node->key);
+ datum->values[i].s = ovsdb_atom_string_create(node->value);
i++;
}
ovs_assert(i == datum->n);
@@ -1784,14 +1789,16 @@ ovsdb_datum_compare_3way(const struct ovsdb_datum *a,
a->n));
}
-/* If 'key' is one of the keys in 'datum', returns its index within 'datum',
- * otherwise UINT_MAX. 'key.type' must be the type of the atoms stored in the
- * 'keys' array in 'datum'.
+/* If 'key' is one of the keys in 'datum', returns 'true' and sets '*pos' to
+ * its index within 'datum', otherwise returns 'false' and sets '*pos' to the
+ * index where 'key' should have been. 'key.type' must be the type of the
+ * atoms stored in the 'keys' array in 'datum'.
*/
-unsigned int
+bool
ovsdb_datum_find_key(const struct ovsdb_datum *datum,
const union ovsdb_atom *key,
- enum ovsdb_atomic_type key_type)
+ enum ovsdb_atomic_type key_type,
+ unsigned int *pos)
{
unsigned int low = 0;
unsigned int high = datum->n;
@@ -1803,10 +1810,16 @@ ovsdb_datum_find_key(const struct ovsdb_datum *datum,
} else if (cmp > 0) {
low = idx + 1;
} else {
- return idx;
+ if (pos) {
+ *pos = idx;
+ }
+ return true;
}
}
- return UINT_MAX;
+ if (pos) {
+ *pos = low;
+ }
+ return false;
}
/* If 'key' and 'value' is one of the key-value pairs in 'datum', returns its
@@ -1821,10 +1834,11 @@ ovsdb_datum_find_key_value(const struct ovsdb_datum *datum,
const union ovsdb_atom *value,
enum ovsdb_atomic_type value_type)
{
- unsigned int idx = ovsdb_datum_find_key(datum, key, key_type);
- if (idx != UINT_MAX
- && value_type != OVSDB_TYPE_VOID
- && !ovsdb_atom_equals(&datum->values[idx], value, value_type)) {
+ unsigned int idx;
+
+ if (!ovsdb_datum_find_key(datum, key, key_type, &idx)
+ || (value_type != OVSDB_TYPE_VOID
+ && !ovsdb_atom_equals(&datum->values[idx], value, value_type))) {
idx = UINT_MAX;
}
return idx;
@@ -1948,38 +1962,68 @@ ovsdb_datum_add_unsafe(struct ovsdb_datum *datum,
}
}
+/* Adds 'n' atoms starting from index 'start_idx' from 'src' to the end of
+ * 'dst'. 'dst' should have enough memory allocated to hold the additional
+ * 'n' atoms. Atoms are not cloned, i.e. 'dst' will reference the same data.
+ * Caller also should take care of the result being sorted. */
+static void
+ovsdb_datum_push_unsafe(struct ovsdb_datum *dst,
+ const struct ovsdb_datum *src,
+ unsigned int start_idx, unsigned int n,
+ const struct ovsdb_type *type)
+{
+ memcpy(&dst->keys[dst->n], &src->keys[start_idx], n * sizeof src->keys[0]);
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ memcpy(&dst->values[dst->n], &src->values[start_idx],
+ n * sizeof src->values[0]);
+ }
+ dst->n += n;
+}
+
void
ovsdb_datum_union(struct ovsdb_datum *a, const struct ovsdb_datum *b,
- const struct ovsdb_type *type, bool replace)
+ const struct ovsdb_type *type)
{
- unsigned int n;
- size_t bi;
+ struct ovsdb_datum result;
+ unsigned int copied, pos;
- n = a->n;
- for (bi = 0; bi < b->n; bi++) {
- unsigned int ai;
+ ovsdb_datum_init_empty(&result);
- ai = ovsdb_datum_find_key(a, &b->keys[bi], type->key.type);
- if (ai == UINT_MAX) {
- if (n == a->n) {
- ovsdb_datum_reallocate(a, type, a->n + (b->n - bi));
- }
- ovsdb_atom_clone(&a->keys[n], &b->keys[bi], type->key.type);
- if (type->value.type != OVSDB_TYPE_VOID) {
- ovsdb_atom_clone(&a->values[n], &b->values[bi],
- type->value.type);
- }
- n++;
- } else if (replace && type->value.type != OVSDB_TYPE_VOID) {
- ovsdb_atom_destroy(&a->values[ai], type->value.type);
- ovsdb_atom_clone(&a->values[ai], &b->values[bi],
+ copied = 0;
+ for (size_t bi = 0; bi < b->n; bi++) {
+ if (ovsdb_datum_find_key(a, &b->keys[bi], type->key.type, &pos)) {
+ /* Atom with the same key already exists. */
+ continue;
+ }
+ if (!result.keys) {
+ ovsdb_datum_reallocate(&result, type, a->n + (b->n - bi));
+ }
+ if (pos > copied) {
+ /* Need to copy some atoms from 'a' first. */
+ ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type);
+ copied = pos;
+ }
+ /* Inserting new atom from 'b'. */
+ ovsdb_atom_clone(&result.keys[result.n], &b->keys[bi], type->key.type);
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_clone(&result.values[result.n], &b->values[bi],
type->value.type);
}
+ result.n++;
}
- if (n != a->n) {
- a->n = n;
- ovs_assert(!ovsdb_datum_sort(a, type->key.type));
+ if (!result.keys) {
+ /* 'a' doesn't need to be changed. */
+ return;
+ }
+ if (a->n > copied) {
+ /* Copying remaining atoms. */
+ ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type);
}
+ /* All atoms are copied now. */
+ a->n = 0;
+
+ ovsdb_datum_swap(&result, a);
+ ovsdb_datum_destroy(&result, type);
}
void
@@ -1987,26 +2031,55 @@ ovsdb_datum_subtract(struct ovsdb_datum *a, const struct ovsdb_type *a_type,
const struct ovsdb_datum *b,
const struct ovsdb_type *b_type)
{
- bool changed = false;
- size_t i;
+ unsigned int *idx, ai;
+ size_t n_idx;
ovs_assert(a_type->key.type == b_type->key.type);
ovs_assert(a_type->value.type == b_type->value.type
|| b_type->value.type == OVSDB_TYPE_VOID);
- /* XXX The big-O of this could easily be improved. */
- for (i = 0; i < a->n; ) {
- unsigned int idx = ovsdb_datum_find(a, i, b, b_type);
- if (idx != UINT_MAX) {
- changed = true;
- ovsdb_datum_remove_unsafe(a, i, a_type);
- } else {
- i++;
+ idx = xmalloc(b->n * sizeof *idx);
+ n_idx = 0;
+ for (size_t bi = 0; bi < b->n; bi++) {
+ ai = ovsdb_datum_find(b, bi, a, b_type);
+ if (ai == UINT_MAX) {
+ /* No such atom in 'a'. */
+ continue;
}
+ /* Not destroying right away since ovsdb_datum_find() will use them. */
+ idx[n_idx++] = ai;
}
- if (changed) {
- ovsdb_datum_sort_assert(a, a_type->key.type);
+ if (!n_idx) {
+ free(idx);
+ return;
+ }
+
+ struct ovsdb_datum result;
+
+ ovsdb_datum_init_empty(&result);
+ ovsdb_datum_reallocate(&result, a_type, a->n - n_idx);
+
+ unsigned int start_idx = 0;
+ for (size_t i = 0; i < n_idx; i++) {
+ ai = idx[i];
+
+ /* Destroying atom. */
+ ovsdb_atom_destroy(&a->keys[ai], a_type->key.type);
+ if (a_type->value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_destroy(&a->values[ai], a_type->value.type);
+ }
+
+ /* Copy non-removed atoms from 'a' to result. */
+ ovsdb_datum_push_unsafe(&result, a, start_idx, ai - start_idx, a_type);
+ start_idx = idx[i] + 1;
}
+ /* Copying remaining atoms. */
+ ovsdb_datum_push_unsafe(&result, a, start_idx, a->n - start_idx, a_type);
+ a->n = 0;
+
+ ovsdb_datum_swap(&result, a);
+ ovsdb_datum_destroy(&result, a_type);
+ free(idx);
}
struct ovsdb_symbol_table *
@@ -2067,6 +2140,64 @@ ovsdb_symbol_table_insert(struct ovsdb_symbol_table *symtab,
/* APIs for Generating and apply diffs. */
+/* Find what needs to be added to and removed from 'old' to construct 'new'.
+ *
+ * The 'added' and 'removed' datums are always safe; the orders of keys are
+ * maintained since they are added in order. */
+void
+ovsdb_datum_added_removed(struct ovsdb_datum *added,
+ struct ovsdb_datum *removed,
+ const struct ovsdb_datum *old,
+ const struct ovsdb_datum *new,
+ const struct ovsdb_type *type)
+{
+ size_t oi, ni;
+
+ ovsdb_datum_init_empty(added);
+ ovsdb_datum_init_empty(removed);
+ if (!ovsdb_type_is_composite(type)) {
+ ovsdb_datum_clone(removed, old, type);
+ ovsdb_datum_clone(added, new, type);
+ return;
+ }
+
+ /* Generate the diff in O(n) time. */
+ for (oi = ni = 0; oi < old->n && ni < new->n;) {
+ int c = ovsdb_atom_compare_3way(&old->keys[oi], &new->keys[ni],
+ type->key.type);
+ if (c < 0) {
+ ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi],
+ type, NULL);
+ oi++;
+ } else if (c > 0) {
+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni],
+ type, NULL);
+ ni++;
+ } else {
+ if (type->value.type != OVSDB_TYPE_VOID &&
+ ovsdb_atom_compare_3way(&old->values[oi], &new->values[ni],
+ type->value.type)) {
+ ovsdb_datum_add_unsafe(removed, &old->keys[oi],
+ &old->values[oi], type, NULL);
+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni],
+ type, NULL);
+ }
+ oi++; ni++;
+ }
+ }
+
+ for (; oi < old->n; oi++) {
+ ovsdb_datum_add_unsafe(removed, &old->keys[oi], &old->values[oi],
+ type, NULL);
+ }
+
+ for (; ni < new->n; ni++) {
+ ovsdb_datum_add_unsafe(added, &new->keys[ni], &new->values[ni],
+ type, NULL);
+ }
+}
+
+
/* Generate a difference ovsdb_dataum between 'old' and 'new'.
* 'new' can be regenerated by applying the difference to the 'old'.
*
@@ -2127,6 +2258,106 @@ ovsdb_datum_diff(struct ovsdb_datum *diff,
}
}
+/* Apply 'diff' to 'a'.
+ *
+ * Return NULL if the 'a' is successfully updated, otherwise, return
+ * ovsdb_error. */
+struct ovsdb_error *
+ovsdb_datum_apply_diff_in_place(struct ovsdb_datum *a,
+ const struct ovsdb_datum *diff,
+ const struct ovsdb_type *type)
+{
+ struct ovsdb_error *error = NULL;
+ struct ovsdb_datum result;
+ size_t i, new_size;
+ unsigned int *idx, pos;
+ enum {
+ DIFF_OP_ADD,
+ DIFF_OP_REMOVE,
+ DIFF_OP_UPDATE,
+ } *operation;
+
+ if (!ovsdb_type_is_composite(type)) {
+ ovsdb_datum_destroy(a, type);
+ ovsdb_datum_clone(a, diff, type);
+ return NULL;
+ }
+
+ operation = xmalloc(diff->n * sizeof *operation);
+ idx = xmalloc(diff->n * sizeof *idx);
+ new_size = a->n;
+ for (i = 0; i < diff->n; i++) {
+ if (!ovsdb_datum_find_key(a, &diff->keys[i], type->key.type, &pos)) {
+ operation[i] = DIFF_OP_ADD;
+ new_size++;
+ } else if (type->value.type != OVSDB_TYPE_VOID
+ && !ovsdb_atom_equals(&diff->values[i], &a->values[pos],
+ type->value.type)) {
+ operation[i] = DIFF_OP_UPDATE;
+ } else {
+ operation[i] = DIFF_OP_REMOVE;
+ new_size--;
+ }
+ idx[i] = pos;
+ }
+
+ /* Make sure member size of 'new' conforms to type. */
+ if (new_size < type->n_min || new_size > type->n_max) {
+ error = ovsdb_error(NULL, "Datum crated by diff has size error");
+ goto exit;
+ }
+
+ ovsdb_datum_init_empty(&result);
+ ovsdb_datum_reallocate(&result, type, new_size);
+
+ unsigned int copied = 0;
+ for (i = 0; i < diff->n; i++) {
+ pos = idx[i];
+
+ if (copied < pos) {
+ /* Copying all atoms that should go before the current one. */
+ ovsdb_datum_push_unsafe(&result, a, copied, pos - copied, type);
+ copied = pos;
+ }
+
+ switch (operation[i]) {
+ case DIFF_OP_UPDATE:
+ case DIFF_OP_ADD:
+ /* Inserting new atom from 'diff'. */
+ ovsdb_atom_clone(&result.keys[result.n],
+ &diff->keys[i], type->key.type);
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_clone(&result.values[result.n],
+ &diff->values[i], type->value.type);
+ }
+ result.n++;
+ if (operation[i] != DIFF_OP_UPDATE) {
+ break;
+ }
+ /* fall through */
+
+ case DIFF_OP_REMOVE:
+ /* Destroying atom. */
+ ovsdb_atom_destroy(&a->keys[pos], type->key.type);
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_destroy(&a->values[pos], type->value.type);
+ }
+ copied++; /* Skipping removed atom. */
+ break;
+ }
+ }
+ /* Copying remaining atoms. */
+ ovsdb_datum_push_unsafe(&result, a, copied, a->n - copied, type);
+ a->n = 0;
+
+ ovsdb_datum_swap(&result, a);
+ ovsdb_datum_destroy(&result, type);
+exit:
+ free(operation);
+ free(idx);
+ return error;
+}
+
/* Apply 'diff' to 'old' to regenerate 'new'.
*
* Return NULL if the 'new' is successfully generated, otherwise, return
diff --git a/lib/ovsdb-data.h b/lib/ovsdb-data.h
index c5a80ee39f..f66ed3472c 100644
--- a/lib/ovsdb-data.h
+++ b/lib/ovsdb-data.h
@@ -20,6 +20,7 @@
#include "compiler.h"
#include "ovsdb-types.h"
#include "openvswitch/shash.h"
+#include "util.h"
#ifdef __cplusplus
extern "C" {
@@ -31,12 +32,33 @@ struct ds;
struct ovsdb_symbol_table;
struct smap;
+struct ovsdb_atom_string {
+ char *string;
+ size_t n_refs;
+};
+
+static inline struct ovsdb_atom_string *
+ovsdb_atom_string_create_nocopy(char *str)
+{
+ struct ovsdb_atom_string *s = xzalloc(sizeof *s);
+
+ s->string = str;
+ s->n_refs = 1;
+ return s;
+}
+
+static inline struct ovsdb_atom_string *
+ovsdb_atom_string_create(const char *str)
+{
+ return ovsdb_atom_string_create_nocopy(xstrdup(str));
+}
+
/* One value of an atomic type (given by enum ovs_atomic_type). */
union ovsdb_atom {
int64_t integer;
double real;
bool boolean;
- char *string;
+ struct ovsdb_atom_string *s;
struct uuid uuid;
};
@@ -66,8 +88,9 @@ ovsdb_atom_needs_destruction(enum ovsdb_atomic_type type)
static inline void
ovsdb_atom_destroy(union ovsdb_atom *atom, enum ovsdb_atomic_type type)
{
- if (type == OVSDB_TYPE_STRING) {
- free(atom->string);
+ if (type == OVSDB_TYPE_STRING && !--atom->s->n_refs) {
+ free(atom->s->string);
+ free(atom->s);
}
}
@@ -209,9 +232,10 @@ bool ovsdb_datum_equals(const struct ovsdb_datum *,
const struct ovsdb_type *);
/* Search. */
-unsigned int ovsdb_datum_find_key(const struct ovsdb_datum *,
- const union ovsdb_atom *key,
- enum ovsdb_atomic_type key_type);
+bool ovsdb_datum_find_key(const struct ovsdb_datum *,
+ const union ovsdb_atom *key,
+ enum ovsdb_atomic_type key_type,
+ unsigned int *pos);
unsigned int ovsdb_datum_find_key_value(const struct ovsdb_datum *,
const union ovsdb_atom *key,
enum ovsdb_atomic_type key_type,
@@ -227,14 +251,19 @@ bool ovsdb_datum_excludes_all(const struct ovsdb_datum *,
const struct ovsdb_type *);
void ovsdb_datum_union(struct ovsdb_datum *,
const struct ovsdb_datum *,
- const struct ovsdb_type *,
- bool replace);
+ const struct ovsdb_type *);
void ovsdb_datum_subtract(struct ovsdb_datum *a,
const struct ovsdb_type *a_type,
const struct ovsdb_datum *b,
const struct ovsdb_type *b_type);
/* Generate and apply diffs */
+void ovsdb_datum_added_removed(struct ovsdb_datum *added,
+ struct ovsdb_datum *removed,
+ const struct ovsdb_datum *old,
+ const struct ovsdb_datum *new,
+ const struct ovsdb_type *type);
+
void ovsdb_datum_diff(struct ovsdb_datum *diff,
const struct ovsdb_datum *old_datum,
const struct ovsdb_datum *new_datum,
@@ -246,6 +275,12 @@ struct ovsdb_error *ovsdb_datum_apply_diff(struct ovsdb_datum *new_datum,
const struct ovsdb_type *type)
OVS_WARN_UNUSED_RESULT;
+struct ovsdb_error * ovsdb_datum_apply_diff_in_place(
+ struct ovsdb_datum *a,
+ const struct ovsdb_datum *diff,
+ const struct ovsdb_type *type)
+OVS_WARN_UNUSED_RESULT;
+
/* Raw operations that may not maintain the invariants. */
void ovsdb_datum_remove_unsafe(struct ovsdb_datum *, size_t idx,
const struct ovsdb_type *);
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 2198c69c60..496ec490d3 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -1898,8 +1898,7 @@ ovsdb_idl_index_destroy_row(const struct ovsdb_idl_row *row_)
BITMAP_FOR_EACH_1 (i, class->n_columns, row->written) {
c = &class->columns[i];
(c->unparse) (row);
- free(row->new_datum[i].values);
- free(row->new_datum[i].keys);
+ ovsdb_datum_destroy(&row->new_datum[i], &c->type);
}
free(row->new_datum);
free(row->written);
@@ -2787,9 +2786,8 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row,
struct ovsdb_datum *new_datum;
unsigned int pos;
new_datum = map_op_datum(map_op);
- pos = ovsdb_datum_find_key(old_datum,
- &new_datum->keys[0],
- key_type);
+ ovsdb_datum_find_key(old_datum, &new_datum->keys[0],
+ key_type, &pos);
if (ovsdb_atom_equals(&new_datum->values[0],
&old_datum->values[pos],
value_type)) {
@@ -2798,11 +2796,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row,
}
} else if (map_op_type(map_op) == MAP_OP_DELETE){
/* Verify that there is a key to delete. */
- unsigned int pos;
- pos = ovsdb_datum_find_key(old_datum,
- &map_op_datum(map_op)->keys[0],
- key_type);
- if (pos == UINT_MAX) {
+ if (!ovsdb_datum_find_key(old_datum,
+ &map_op_datum(map_op)->keys[0],
+ key_type, NULL)) {
/* No key to delete. Move on to next update. */
VLOG_WARN("Trying to delete a key that doesn't "
"exist in the map.");
@@ -2897,11 +2893,9 @@ ovsdb_idl_txn_extract_mutations(struct ovsdb_idl_row *row,
any_ins = true;
} else { /* SETP_OP_DELETE */
/* Verify that there is a key to delete. */
- unsigned int pos;
- pos = ovsdb_datum_find_key(old_datum,
- &set_op_datum(set_op)->keys[0],
- key_type);
- if (pos == UINT_MAX) {
+ if (!ovsdb_datum_find_key(old_datum,
+ &set_op_datum(set_op)->keys[0],
+ key_type, NULL)) {
/* No key to delete. Move on to next update. */
VLOG_WARN("Trying to delete a key that doesn't "
"exist in the set.");
@@ -4066,7 +4060,6 @@ ovsdb_idl_txn_write_partial_map(const struct ovsdb_idl_row *row_,
struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_);
enum ovsdb_atomic_type key_type;
enum map_op_type op_type;
- unsigned int pos;
const struct ovsdb_datum *old_datum;
if (!is_valid_partial_update(row, column, datum)) {
@@ -4078,8 +4071,11 @@ ovsdb_idl_txn_write_partial_map(const struct ovsdb_idl_row *row_,
/* Find out if this is an insert or an update. */
key_type = column->type.key.type;
old_datum = ovsdb_idl_read(row, column);
- pos = ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type);
- op_type = pos == UINT_MAX ? MAP_OP_INSERT : MAP_OP_UPDATE;
+ if (ovsdb_datum_find_key(old_datum, &datum->keys[0], key_type, NULL)) {
+ op_type = MAP_OP_UPDATE;
+ } else {
+ op_type = MAP_OP_INSERT;
+ }
ovsdb_idl_txn_add_map_op(row, column, datum, op_type);
}
@@ -4112,6 +4108,9 @@ void
ovsdb_idl_loop_destroy(struct ovsdb_idl_loop *loop)
{
if (loop) {
+ if (loop->committing_txn) {
+ ovsdb_idl_txn_destroy(loop->committing_txn);
+ }
ovsdb_idl_destroy(loop->idl);
}
}
@@ -4121,8 +4120,8 @@ ovsdb_idl_loop_run(struct ovsdb_idl_loop *loop)
{
ovsdb_idl_run(loop->idl);
- /* See if we can commit the loop->committing_txn. */
- if (loop->committing_txn) {
+ /* See if the 'committing_txn' succeeded in the meantime. */
+ if (loop->committing_txn && loop->committing_txn->status == TXN_SUCCESS) {
ovsdb_idl_try_commit_loop_txn(loop, NULL);
}
diff --git a/lib/pcap-file.c b/lib/pcap-file.c
index b30a11c24b..41835f6f4d 100644
--- a/lib/pcap-file.c
+++ b/lib/pcap-file.c
@@ -89,6 +89,7 @@ ovs_pcap_open(const char *file_name, const char *mode)
: mode[0] == 'w' ? "writing"
: "appending"),
ovs_strerror(errno));
+ free(p_file);
return NULL;
}
diff --git a/lib/reconnect.c b/lib/reconnect.c
index a929ddfd2d..89a0bcaf95 100644
--- a/lib/reconnect.c
+++ b/lib/reconnect.c
@@ -75,7 +75,8 @@ struct reconnect {
static void reconnect_transition__(struct reconnect *, long long int now,
enum state state);
-static long long int reconnect_deadline__(const struct reconnect *);
+static long long int reconnect_deadline__(const struct reconnect *,
+ long long int now);
static bool reconnect_may_retry(struct reconnect *);
static const char *
@@ -539,7 +540,7 @@ reconnect_transition__(struct reconnect *fsm, long long int now,
}
static long long int
-reconnect_deadline__(const struct reconnect *fsm)
+reconnect_deadline__(const struct reconnect *fsm, long long int now)
{
ovs_assert(fsm->state_entered != LLONG_MIN);
switch (fsm->state) {
@@ -557,8 +558,18 @@ reconnect_deadline__(const struct reconnect *fsm)
if (fsm->probe_interval) {
long long int base = MAX(fsm->last_activity, fsm->state_entered);
long long int expiration = base + fsm->probe_interval;
- if (fsm->last_receive_attempt >= expiration) {
+ if (now < expiration || fsm->last_receive_attempt >= expiration) {
+ /* We still have time before the expiration or the time has
+ * already passed and there was no activity. In the first case
+ * we need to wait for the expiration, in the second - we're
+ * already past the deadline. */
return expiration;
+ } else {
+ /* Time has already passed, but we didn't attempt to receive
+ * anything. We need to wake up and try to receive even if
+ * nothing is pending, so we can update the expiration time or
+ * transition to a different state. */
+ return now + 1;
}
}
return LLONG_MAX;
@@ -566,8 +577,10 @@ reconnect_deadline__(const struct reconnect *fsm)
case S_IDLE:
if (fsm->probe_interval) {
long long int expiration = fsm->state_entered + fsm->probe_interval;
- if (fsm->last_receive_attempt >= expiration) {
+ if (now < expiration || fsm->last_receive_attempt >= expiration) {
return expiration;
+ } else {
+ return now + 1;
}
}
return LLONG_MAX;
@@ -618,7 +631,7 @@ reconnect_deadline__(const struct reconnect *fsm)
enum reconnect_action
reconnect_run(struct reconnect *fsm, long long int now)
{
- if (now >= reconnect_deadline__(fsm)) {
+ if (now >= reconnect_deadline__(fsm, now)) {
switch (fsm->state) {
case S_VOID:
return 0;
@@ -671,7 +684,7 @@ reconnect_wait(struct reconnect *fsm, long long int now)
int
reconnect_timeout(struct reconnect *fsm, long long int now)
{
- long long int deadline = reconnect_deadline__(fsm);
+ long long int deadline = reconnect_deadline__(fsm, now);
if (deadline != LLONG_MAX) {
long long int remaining = deadline - now;
return MAX(0, MIN(INT_MAX, remaining));
diff --git a/lib/stopwatch.c b/lib/stopwatch.c
index f5602163bc..1c71df1a12 100644
--- a/lib/stopwatch.c
+++ b/lib/stopwatch.c
@@ -114,7 +114,6 @@ static void
calc_percentile(unsigned long long n_samples, struct percentile *pctl,
unsigned long long new_sample)
{
-
if (n_samples < P_SQUARE_MIN) {
pctl->samples[n_samples - 1] = new_sample;
}
@@ -228,13 +227,12 @@ add_sample(struct stopwatch *sw, unsigned long long new_sample)
sw->min = new_sample;
}
- calc_percentile(sw->n_samples, &sw->pctl, new_sample);
-
if (sw->n_samples++ == 0) {
sw->short_term.average = sw->long_term.average = new_sample;
return;
}
+ calc_percentile(sw->n_samples, &sw->pctl, new_sample);
calc_average(&sw->short_term, new_sample);
calc_average(&sw->long_term, new_sample);
}
diff --git a/lib/stp.c b/lib/stp.c
index 809b405a52..a869b5f390 100644
--- a/lib/stp.c
+++ b/lib/stp.c
@@ -737,7 +737,7 @@ void
stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
{
struct stp *stp = p->stp;
- const struct stp_bpdu_header *header;
+ struct stp_bpdu_header header;
ovs_mutex_lock(&mutex);
if (p->state == STP_DISABLED) {
@@ -750,19 +750,19 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
goto out;
}
- header = bpdu;
- if (header->protocol_id != htons(STP_PROTOCOL_ID)) {
+ memcpy(&header, bpdu, sizeof header);
+ if (header.protocol_id != htons(STP_PROTOCOL_ID)) {
VLOG_WARN("%s: received BPDU with unexpected protocol ID %"PRIu16,
- stp->name, ntohs(header->protocol_id));
+ stp->name, ntohs(header.protocol_id));
p->error_count++;
goto out;
}
- if (header->protocol_version != STP_PROTOCOL_VERSION) {
+ if (header.protocol_version != STP_PROTOCOL_VERSION) {
VLOG_DBG("%s: received BPDU with unexpected protocol version %"PRIu8,
- stp->name, header->protocol_version);
+ stp->name, header.protocol_version);
}
- switch (header->bpdu_type) {
+ switch (header.bpdu_type) {
case STP_TYPE_CONFIG:
if (bpdu_size < sizeof(struct stp_config_bpdu)) {
VLOG_WARN("%s: received config BPDU with invalid size %"PRIuSIZE,
@@ -785,7 +785,7 @@ stp_received_bpdu(struct stp_port *p, const void *bpdu, size_t bpdu_size)
default:
VLOG_WARN("%s: received BPDU of unexpected type %"PRIu8,
- stp->name, header->bpdu_type);
+ stp->name, header.bpdu_type);
p->error_count++;
goto out;
}
diff --git a/lib/tc.c b/lib/tc.c
index 38a1dfc0eb..adb2d3182a 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -2545,6 +2545,17 @@ nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request,
return 0;
}
+static void
+nl_msg_put_flower_acts_release(struct ofpbuf *request, uint16_t act_index)
+{
+ size_t act_offset;
+
+ act_offset = nl_msg_start_nested(request, act_index);
+ nl_msg_put_act_tunnel_key_release(request);
+ nl_msg_put_act_flags(request);
+ nl_msg_end_nested(request, act_offset);
+}
+
static int
nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
{
@@ -2579,6 +2590,11 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
}
break;
case TC_ACT_ENCAP: {
+ if (!released && flower->tunnel) {
+ nl_msg_put_flower_acts_release(request, act_index++);
+ released = true;
+ }
+
act_offset = nl_msg_start_nested(request, act_index++);
nl_msg_put_act_tunnel_key_set(request, action->encap.id_present,
action->encap.id,
@@ -2636,10 +2652,7 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower)
break;
case TC_ACT_OUTPUT: {
if (!released && flower->tunnel) {
- act_offset = nl_msg_start_nested(request, act_index++);
- nl_msg_put_act_tunnel_key_release(request);
- nl_msg_put_act_flags(request);
- nl_msg_end_nested(request, act_offset);
+ nl_msg_put_flower_acts_release(request, act_index++);
released = true;
}
diff --git a/lib/tnl-neigh-cache.c b/lib/tnl-neigh-cache.c
index 5bda4af7e0..995c88bf17 100644
--- a/lib/tnl-neigh-cache.c
+++ b/lib/tnl-neigh-cache.c
@@ -32,6 +32,7 @@
#include "errno.h"
#include "flow.h"
#include "netdev.h"
+#include "ovs-atomic.h"
#include "ovs-thread.h"
#include "packets.h"
#include "openvswitch/poll-loop.h"
@@ -44,14 +45,13 @@
#include "openvswitch/vlog.h"
-/* In seconds */
-#define NEIGH_ENTRY_DEFAULT_IDLE_TIME (15 * 60)
+#define NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS (15 * 60 * 1000)
struct tnl_neigh_entry {
struct cmap_node cmap_node;
struct in6_addr ip;
struct eth_addr mac;
- time_t expires; /* Expiration time. */
+ atomic_llong expires; /* Expiration time in ms. */
char br_name[IFNAMSIZ];
};
@@ -64,6 +64,16 @@ tnl_neigh_hash(const struct in6_addr *ip)
return hash_bytes(ip->s6_addr, 16, 0);
}
+static bool
+tnl_neigh_expired(struct tnl_neigh_entry *neigh)
+{
+ long long expires;
+
+ atomic_read_explicit(&neigh->expires, &expires, memory_order_acquire);
+
+ return expires <= time_msec();
+}
+
static struct tnl_neigh_entry *
tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
{
@@ -73,11 +83,13 @@ tnl_neigh_lookup__(const char br_name[IFNAMSIZ], const struct in6_addr *dst)
hash = tnl_neigh_hash(dst);
CMAP_FOR_EACH_WITH_HASH (neigh, cmap_node, hash, &table) {
if (ipv6_addr_equals(&neigh->ip, dst) && !strcmp(neigh->br_name, br_name)) {
- if (neigh->expires <= time_now()) {
+ if (tnl_neigh_expired(neigh)) {
return NULL;
}
- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+ atomic_store_explicit(&neigh->expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS,
+ memory_order_release);
return neigh;
}
}
@@ -113,15 +125,16 @@ tnl_neigh_delete(struct tnl_neigh_entry *neigh)
ovsrcu_postpone(neigh_entry_free, neigh);
}
-static void
-tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
- const struct eth_addr mac)
+void
+tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
+ const struct eth_addr mac)
{
ovs_mutex_lock(&mutex);
struct tnl_neigh_entry *neigh = tnl_neigh_lookup__(name, dst);
if (neigh) {
if (eth_addr_equals(neigh->mac, mac)) {
- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+ atomic_store_relaxed(&neigh->expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS);
ovs_mutex_unlock(&mutex);
return;
}
@@ -133,7 +146,8 @@ tnl_neigh_set__(const char name[IFNAMSIZ], const struct in6_addr *dst,
neigh->ip = *dst;
neigh->mac = mac;
- neigh->expires = time_now() + NEIGH_ENTRY_DEFAULT_IDLE_TIME;
+ atomic_store_relaxed(&neigh->expires, time_msec() +
+ NEIGH_ENTRY_DEFAULT_IDLE_TIME_MS);
ovs_strlcpy(neigh->br_name, name, sizeof neigh->br_name);
cmap_insert(&table, &neigh->cmap_node, tnl_neigh_hash(&neigh->ip));
ovs_mutex_unlock(&mutex);
@@ -144,12 +158,12 @@ tnl_arp_set(const char name[IFNAMSIZ], ovs_be32 dst,
const struct eth_addr mac)
{
struct in6_addr dst6 = in6_addr_mapped_ipv4(dst);
- tnl_neigh_set__(name, &dst6, mac);
+ tnl_neigh_set(name, &dst6, mac);
}
static int
tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char name[IFNAMSIZ])
+ const char name[IFNAMSIZ], bool allow_update)
{
/* Snoop normal ARP replies and gratuitous ARP requests/replies only */
if (!is_arp(flow)
@@ -159,13 +173,17 @@ tnl_arp_snoop(const struct flow *flow, struct flow_wildcards *wc,
return EINVAL;
}
- tnl_arp_set(name, FLOW_WC_GET_AND_MASK_WC(flow, wc, nw_src), flow->arp_sha);
+ memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
+
+ if (allow_update) {
+ tnl_arp_set(name, flow->nw_src, flow->arp_sha);
+ }
return 0;
}
static int
tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char name[IFNAMSIZ])
+ const char name[IFNAMSIZ], bool allow_update)
{
if (!is_nd(flow, wc) || flow->tp_src != htons(ND_NEIGHBOR_ADVERT)) {
return EINVAL;
@@ -184,20 +202,22 @@ tnl_nd_snoop(const struct flow *flow, struct flow_wildcards *wc,
memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
memset(&wc->masks.nd_target, 0xff, sizeof wc->masks.nd_target);
- tnl_neigh_set__(name, &flow->nd_target, flow->arp_tha);
+ if (allow_update) {
+ tnl_neigh_set(name, &flow->nd_target, flow->arp_tha);
+ }
return 0;
}
int
tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char name[IFNAMSIZ])
+ const char name[IFNAMSIZ], bool allow_update)
{
int res;
- res = tnl_arp_snoop(flow, wc, name);
+ res = tnl_arp_snoop(flow, wc, name, allow_update);
if (res != EINVAL) {
return res;
}
- return tnl_nd_snoop(flow, wc, name);
+ return tnl_nd_snoop(flow, wc, name, allow_update);
}
void
@@ -208,7 +228,7 @@ tnl_neigh_cache_run(void)
ovs_mutex_lock(&mutex);
CMAP_FOR_EACH(neigh, cmap_node, &table) {
- if (neigh->expires <= time_now()) {
+ if (tnl_neigh_expired(neigh)) {
tnl_neigh_delete(neigh);
changed = true;
}
@@ -294,7 +314,7 @@ tnl_neigh_cache_add(struct unixctl_conn *conn, int argc OVS_UNUSED,
return;
}
- tnl_neigh_set__(br_name, &ip6, mac);
+ tnl_neigh_set(br_name, &ip6, mac);
unixctl_command_reply(conn, "OK");
}
@@ -319,7 +339,7 @@ tnl_neigh_cache_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
ds_put_format(&ds, ETH_ADDR_FMT" %s",
ETH_ADDR_ARGS(neigh->mac), neigh->br_name);
- if (neigh->expires <= time_now()) {
+ if (tnl_neigh_expired(neigh)) {
ds_put_format(&ds, " STALE");
}
ds_put_char(&ds, '\n');
diff --git a/lib/tnl-neigh-cache.h b/lib/tnl-neigh-cache.h
index e4b42b0594..877bca3127 100644
--- a/lib/tnl-neigh-cache.h
+++ b/lib/tnl-neigh-cache.h
@@ -32,7 +32,9 @@
#include "util.h"
int tnl_neigh_snoop(const struct flow *flow, struct flow_wildcards *wc,
- const char dev_name[IFNAMSIZ]);
+ const char dev_name[IFNAMSIZ], bool allow_update);
+void tnl_neigh_set(const char name[IFNAMSIZ], const struct in6_addr *dst,
+ const struct eth_addr mac);
int tnl_neigh_lookup(const char dev_name[IFNAMSIZ], const struct in6_addr *dst,
struct eth_addr *mac);
void tnl_neigh_cache_init(void);
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 1c9c720f04..57f94df544 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -2971,11 +2971,11 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
}
ds_put_char(&ds, '\n');
- for (i = 0; i < n_revalidators; i++) {
+ for (i = 0; i < udpif->n_revalidators; i++) {
struct revalidator *revalidator = &udpif->revalidators[i];
int j, elements = 0;
- for (j = i; j < N_UMAPS; j += n_revalidators) {
+ for (j = i; j < N_UMAPS; j += udpif->n_revalidators) {
elements += cmap_count(&udpif->ukeys[j].cmap);
}
ds_put_format(&ds, " %u: (keys %d)\n", revalidator->id, elements);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index a426fcfeb6..aeff3b2679 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -460,7 +460,7 @@ static void xlate_commit_actions(struct xlate_ctx *ctx);
static void
patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
- struct xport *out_dev);
+ struct xport *out_dev, bool is_last_action);
static void
ctx_trigger_freeze(struct xlate_ctx *ctx)
@@ -3272,7 +3272,9 @@ compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port)
struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
odp_port_t tunnel_out_port = ODPP_NONE;
- if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
+ if (!ipfix ||
+ (output_odp_port == ODPP_NONE &&
+ ctx->xin->flow.in_port.ofp_port == OFPP_NONE)) {
return;
}
@@ -3598,7 +3600,7 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac,
static int
native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
const struct flow *flow, odp_port_t tunnel_odp_port,
- bool truncate)
+ bool truncate, bool is_last_action)
{
struct netdev_tnl_build_header_params tnl_params;
struct ovs_action_push_tnl tnl_push_data;
@@ -3728,7 +3730,7 @@ native_tunnel_output(struct xlate_ctx *ctx, const struct xport *xport,
entry->tunnel_hdr.hdr_size = tnl_push_data.header_len;
entry->tunnel_hdr.operation = ADD;
- patch_port_output(ctx, xport, out_dev);
+ patch_port_output(ctx, xport, out_dev, is_last_action);
/* Similar to the stats update in revalidation, the x_cache entries
* are populated by the previous translation are used to update the
@@ -3822,7 +3824,7 @@ xlate_flow_is_protected(const struct xlate_ctx *ctx, const struct flow *flow, co
*/
static void
patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
- struct xport *out_dev)
+ struct xport *out_dev, bool is_last_action)
{
struct flow *flow = &ctx->xin->flow;
struct flow old_flow = ctx->xin->flow;
@@ -3864,8 +3866,9 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
if (!process_special(ctx, out_dev) && may_receive(out_dev, ctx)) {
if (xport_stp_forward_state(out_dev) &&
xport_rstp_forward_state(out_dev)) {
+
xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true,
- false, true, clone_xlate_actions);
+ false, is_last_action, clone_xlate_actions);
if (!ctx->freezing) {
xlate_action_set(ctx);
}
@@ -3880,7 +3883,7 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
mirror_mask_t old_mirrors2 = ctx->mirrors;
xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true,
- false, true, clone_xlate_actions);
+ false, is_last_action, clone_xlate_actions);
ctx->mirrors = old_mirrors2;
ctx->base_flow = old_base_flow;
ctx->odp_actions->size = old_size;
@@ -4097,7 +4100,21 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow,
(flow->dl_type == htons(ETH_TYPE_ARP) ||
flow->nw_proto == IPPROTO_ICMPV6) &&
is_neighbor_reply_correct(ctx, flow)) {
- tnl_neigh_snoop(flow, wc, ctx->xbridge->name);
+ tnl_neigh_snoop(flow, wc, ctx->xbridge->name,
+ ctx->xin->allow_side_effects);
+ } else if (*tnl_port != ODPP_NONE &&
+ ctx->xin->allow_side_effects &&
+ dl_type_is_ip_any(flow->dl_type)) {
+ struct eth_addr mac = flow->dl_src;
+ struct in6_addr s_ip6;
+
+ if (flow->dl_type == htons(ETH_TYPE_IP)) {
+ in6_addr_set_mapped_ipv4(&s_ip6, flow->nw_src);
+ } else {
+ s_ip6 = flow->ipv6_src;
+ }
+
+ tnl_neigh_set(ctx->xbridge->name, &s_ip6, mac);
}
}
@@ -4107,7 +4124,7 @@ terminate_native_tunnel(struct xlate_ctx *ctx, struct flow *flow,
static void
compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
const struct xlate_bond_recirc *xr, bool check_stp,
- bool is_last_action OVS_UNUSED, bool truncate)
+ bool is_last_action, bool truncate)
{
const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
struct flow_wildcards *wc = ctx->wc;
@@ -4144,7 +4161,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
if (truncate) {
xlate_report_error(ctx, "Cannot truncate output to patch port");
}
- patch_port_output(ctx, xport, xport->peer);
+ patch_port_output(ctx, xport, xport->peer, is_last_action);
return;
}
@@ -4239,7 +4256,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
xr->recirc_id);
} else if (is_native_tunnel) {
/* Output to native tunnel port. */
- native_tunnel_output(ctx, xport, flow, odp_port, truncate);
+ native_tunnel_output(ctx, xport, flow, odp_port, truncate,
+ is_last_action);
flow->tunnel = flow_tnl; /* Restore tunnel metadata */
} else if (terminate_native_tunnel(ctx, flow, wc,
@@ -6177,11 +6195,32 @@ static void
compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
bool is_last_action)
{
- ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label;
- uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark;
- size_t ct_offset;
uint16_t zone;
+ if (ofc->zone_src.field) {
+ union mf_subvalue value;
+ memset(&value, 0xff, sizeof(value));
+ zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow);
+ if (ctx->xin->frozen_state) {
+ /* If the upcall is a resume of a recirculation, we only need to
+ * unwildcard the fields that are not in the frozen_metadata, as
+ * when the rules update, OVS will generate a new recirc_id,
+ * which will invalidate the megaflow with old the recirc_id.
+ */
+ if (!mf_is_frozen_metadata(ofc->zone_src.field)) {
+ mf_write_subfield_flow(&ofc->zone_src, &value,
+ &ctx->wc->masks);
+ }
+ } else {
+ mf_write_subfield_flow(&ofc->zone_src, &value, &ctx->wc->masks);
+ }
+ } else {
+ zone = ofc->zone_imm;
+ }
+
+ size_t ct_offset;
+ ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label;
+ uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark;
/* Ensure that any prior actions are applied before composing the new
* conntrack action. */
xlate_commit_actions(ctx);
@@ -6193,11 +6232,6 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx,
is_last_action, false);
- if (ofc->zone_src.field) {
- zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow);
- } else {
- zone = ofc->zone_imm;
- }
ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT);
if (ofc->flags & NX_CT_F_COMMIT) {
@@ -6333,6 +6367,7 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx,
* then ctx->exit would be true. Reset to false so that we can
* do flow translation for 'IF_LESS_EQUAL' case. finish_freezing()
* would have taken care of Undoing the changes done for freeze. */
+ bool old_exit = ctx->exit;
ctx->exit = false;
offset_attr = nl_msg_start_nested(
@@ -6357,7 +6392,7 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx,
ctx->was_mpls = old_was_mpls;
ctx->conntracked = old_conntracked;
ctx->xin->flow = old_flow;
- ctx->exit = true;
+ ctx->exit = old_exit;
}
static void
@@ -6738,13 +6773,14 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
return;
}
+ bool exit = false;
OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
struct ofpact_controller *controller;
const struct ofpact_metadata *metadata;
const struct ofpact_set_field *set_field;
const struct mf_field *mf;
bool last = is_last_action && ofpact_last(a, ofpacts, ofpacts_len)
- && ctx->action_set.size;
+ && !ctx->action_set.size;
if (ctx->error) {
break;
@@ -6752,7 +6788,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
recirc_for_mpls(a, ctx);
- if (ctx->exit) {
+ if (ctx->exit || exit) {
/* Check if need to store the remaining actions for later
* execution. */
if (ctx->freezing) {
@@ -7149,17 +7185,18 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
break;
case OFPACT_CHECK_PKT_LARGER: {
- if (last) {
- /* If this is last action, then there is no need to
- * translate the action. */
- break;
- }
const struct ofpact *remaining_acts = ofpact_next(a);
size_t remaining_acts_len = ofpact_remaining_len(remaining_acts,
ofpacts,
ofpacts_len);
xlate_check_pkt_larger(ctx, ofpact_get_CHECK_PKT_LARGER(a),
remaining_acts, remaining_acts_len);
+ if (ctx->xbridge->support.check_pkt_len) {
+ /* If datapath supports check_pkt_len, then
+ * xlate_check_pkt_larger() does the translation for the
+ * ofpacts following 'a'. */
+ exit = true;
+ }
break;
}
}
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index cba49a99e1..c012cebae1 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -6496,6 +6496,7 @@ ofproto_unixctl_dpif_show_dp_features(struct unixctl_conn *conn,
dpif_show_support(&ofproto->backer->bt_support, &ds);
unixctl_command_reply(conn, ds_cstr(&ds));
+ ds_destroy(&ds);
}
static void
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 57c7d17cb2..14b909973d 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -66,6 +66,7 @@ struct bfd_cfg;
struct meter;
struct ofoperation;
struct ofproto_packet_out;
+struct rule_collection;
struct smap;
extern struct ovs_mutex ofproto_mutex;
@@ -115,6 +116,9 @@ struct ofproto {
/* List of expirable flows, in all flow tables. */
struct ovs_list expirable OVS_GUARDED_BY(ofproto_mutex);
+ /* List of flows to remove from flow tables. */
+ struct rule_collection *to_remove OVS_GUARDED_BY(ofproto_mutex);
+
/* Meter table. */
struct ofputil_meter_features meter_features;
struct hmap meters; /* uint32_t indexed 'struct meter *'. */
@@ -1962,6 +1966,7 @@ struct ofproto_flow_mod {
bool modify_may_add_flow;
bool modify_keep_counts;
enum nx_flow_update_event event;
+ uint8_t table_id;
/* These are only used during commit execution.
* ofproto_flow_mod_uninit() does NOT clean these up. */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index bd6103b1c8..56aeac7209 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -213,6 +213,8 @@ static void ofproto_rule_insert__(struct ofproto *, struct rule *)
OVS_REQUIRES(ofproto_mutex);
static void ofproto_rule_remove__(struct ofproto *, struct rule *)
OVS_REQUIRES(ofproto_mutex);
+static void remove_rules_postponed(struct rule_collection *)
+ OVS_REQUIRES(ofproto_mutex);
/* The source of an OpenFlow request.
*
@@ -530,6 +532,8 @@ ofproto_create(const char *datapath_name, const char *datapath_type,
hindex_init(&ofproto->cookies);
hmap_init(&ofproto->learned_cookies);
ovs_list_init(&ofproto->expirable);
+ ofproto->to_remove = xzalloc(sizeof *ofproto->to_remove);
+ rule_collection_init(ofproto->to_remove);
ofproto->connmgr = connmgr_create(ofproto, datapath_name, datapath_name);
ofproto->min_mtu = INT_MAX;
cmap_init(&ofproto->groups);
@@ -1631,6 +1635,7 @@ ofproto_flush__(struct ofproto *ofproto, bool del)
}
ofproto_group_delete_all__(ofproto);
meter_delete_all(ofproto);
+ remove_rules_postponed(ofproto->to_remove);
/* XXX: Concurrent handler threads may insert new learned flows based on
* learn actions of the now deleted flows right after we release
* 'ofproto_mutex'. */
@@ -1682,6 +1687,11 @@ ofproto_destroy__(struct ofproto *ofproto)
ovs_assert(hmap_is_empty(&ofproto->learned_cookies));
hmap_destroy(&ofproto->learned_cookies);
+ ovs_mutex_lock(&ofproto_mutex);
+ rule_collection_destroy(ofproto->to_remove);
+ free(ofproto->to_remove);
+ ovs_mutex_unlock(&ofproto_mutex);
+
ofproto->ofproto_class->dealloc(ofproto);
}
@@ -1878,6 +1888,9 @@ ofproto_run(struct ofproto *p)
connmgr_run(p->connmgr, handle_openflow);
+ ovs_mutex_lock(&ofproto_mutex);
+ remove_rules_postponed(p->to_remove);
+ ovs_mutex_unlock(&ofproto_mutex);
return error;
}
@@ -4437,6 +4450,20 @@ rule_criteria_destroy(struct rule_criteria *criteria)
criteria->version = OVS_VERSION_NOT_REMOVED; /* Mark as destroyed. */
}
+/* Adds rules to the 'to_remove' collection, so they can be destroyed
+ * later all together. Destroys 'rules'. */
+static void
+rules_mark_for_removal(struct ofproto *ofproto, struct rule_collection *rules)
+ OVS_REQUIRES(ofproto_mutex)
+{
+ struct rule *rule;
+
+ RULE_COLLECTION_FOR_EACH (rule, rules) {
+ rule_collection_add(ofproto->to_remove, rule);
+ }
+ rule_collection_destroy(rules);
+}
+
/* Schedules postponed removal of rules, destroys 'rules'. */
static void
remove_rules_postponed(struct rule_collection *rules)
@@ -5833,7 +5860,7 @@ modify_flows_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
}
}
learned_cookies_flush(ofproto, &dead_cookies);
- remove_rules_postponed(old_rules);
+ rules_mark_for_removal(ofproto, old_rules);
}
return error;
@@ -5941,7 +5968,7 @@ delete_flows_finish__(struct ofproto *ofproto,
learned_cookies_dec(ofproto, rule_get_actions(rule),
&dead_cookies);
}
- remove_rules_postponed(rules);
+ rules_mark_for_removal(ofproto, rules);
learned_cookies_flush(ofproto, &dead_cookies);
}
@@ -7967,6 +7994,7 @@ ofproto_flow_mod_init(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
ofm->criteria.version = OVS_VERSION_NOT_REMOVED;
ofm->conjs = NULL;
ofm->n_conjs = 0;
+ ofm->table_id = fm->table_id;
bool check_buffer_id = false;
@@ -8104,6 +8132,33 @@ ofproto_flow_mod_finish(struct ofproto *ofproto, struct ofproto_flow_mod *ofm,
return error;
}
+static void
+ofproto_table_classifier_defer(struct ofproto *ofproto,
+ const struct ofproto_flow_mod *ofm)
+{
+ if (check_table_id(ofproto, ofm->table_id)) {
+ if (ofm->table_id == OFPTT_ALL) {
+ struct oftable *table;
+
+ OFPROTO_FOR_EACH_TABLE (table, ofproto) {
+ classifier_defer(&table->cls);
+ }
+ } else {
+ classifier_defer(&ofproto->tables[ofm->table_id].cls);
+ }
+ }
+}
+
+static void
+ofproto_publish_classifiers(struct ofproto *ofproto)
+{
+ struct oftable *table;
+
+ OFPROTO_FOR_EACH_TABLE (table, ofproto) {
+ classifier_publish(&table->cls);
+ }
+}
+
/* Commit phases (all while locking ofproto_mutex):
*
* 1. Begin: Gather resources and make changes visible in the next version.
@@ -8165,6 +8220,10 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
/* Store the version in which the changes should take
* effect. */
be->ofm.version = version;
+ /* Publishing of the classifier update for every flow
+ * modification in a bundle separately is expensive in
+ * CPU time and memory. Deferring. */
+ ofproto_table_classifier_defer(ofproto, &be->ofm);
error = ofproto_flow_mod_start(ofproto, &be->ofm);
} else if (be->type == OFPTYPE_GROUP_MOD) {
/* Store the version in which the changes should take
@@ -8173,6 +8232,9 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
error = ofproto_group_mod_start(ofproto, &be->ogm);
} else if (be->type == OFPTYPE_PACKET_OUT) {
be->opo.version = version;
+ /* Need to use current version of flows for packet-out,
+ * so publishing all classifiers now. */
+ ofproto_publish_classifiers(ofproto);
error = ofproto_packet_out_start(ofproto, &be->opo);
} else {
OVS_NOT_REACHED();
@@ -8183,6 +8245,9 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
}
}
+ /* Publishing all changes made to classifiers. */
+ ofproto_publish_classifiers(ofproto);
+
if (error) {
/* Send error referring to the original message. */
ofconn_send_error(ofconn, be->msg, error);
@@ -8191,14 +8256,23 @@ do_bundle_commit(struct ofconn *ofconn, uint32_t id, uint16_t flags)
/* 2. Revert. Undo all the changes made above. */
LIST_FOR_EACH_REVERSE_CONTINUE(be, node, &bundle->msg_list) {
if (be->type == OFPTYPE_FLOW_MOD) {
+ /* Publishing of the classifier update for every flow
+ * modification in a bundle separately is expensive in
+ * CPU time and memory. Deferring. */
+ ofproto_table_classifier_defer(ofproto, &be->ofm);
ofproto_flow_mod_revert(ofproto, &be->ofm);
} else if (be->type == OFPTYPE_GROUP_MOD) {
ofproto_group_mod_revert(ofproto, &be->ogm);
} else if (be->type == OFPTYPE_PACKET_OUT) {
+ /* Need to use current version of flows for packet-out,
+ * so publishing all classifiers now. */
+ ofproto_publish_classifiers(ofproto);
ofproto_packet_out_revert(ofproto, &be->opo);
}
/* Nothing needs to be reverted for a port mod. */
}
+ /* Publishing all changes made to classifiers. */
+ ofproto_publish_classifiers(ofproto);
} else {
/* 4. Finish. */
LIST_FOR_EACH (be, node, &bundle->msg_list) {
diff --git a/ovsdb/file.c b/ovsdb/file.c
index 59220824fa..9f44007d97 100644
--- a/ovsdb/file.c
+++ b/ovsdb/file.c
@@ -113,19 +113,17 @@ ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting,
if (row_contains_diff
&& !ovsdb_datum_is_default(&row->fields[column->index],
&column->type)) {
- struct ovsdb_datum new_datum;
-
- error = ovsdb_datum_apply_diff(&new_datum,
+ error = ovsdb_datum_apply_diff_in_place(
&row->fields[column->index],
&datum, &column->type);
ovsdb_datum_destroy(&datum, &column->type);
if (error) {
return error;
}
- ovsdb_datum_swap(&datum, &new_datum);
+ } else {
+ ovsdb_datum_swap(&row->fields[column->index], &datum);
+ ovsdb_datum_destroy(&datum, &column->type);
}
- ovsdb_datum_swap(&row->fields[column->index], &datum);
- ovsdb_datum_destroy(&datum, &column->type);
}
return NULL;
diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c
index 532dedcb64..ab814cf20e 100644
--- a/ovsdb/monitor.c
+++ b/ovsdb/monitor.c
@@ -1231,6 +1231,15 @@ ovsdb_monitor_get_update(
condition,
ovsdb_monitor_compose_row_update2);
if (!condition || !condition->conditional) {
+ if (json) {
+ struct json *json_serialized;
+
+ /* Pre-serializing the object to avoid doing this
+ * for every client. */
+ json_serialized = json_serialized_object_create(json);
+ json_destroy(json);
+ json = json_serialized;
+ }
ovsdb_monitor_json_cache_insert(dbmon, version, mcs,
json);
}
diff --git a/ovsdb/mutation.c b/ovsdb/mutation.c
index 56edc5f000..03d1c3499e 100644
--- a/ovsdb/mutation.c
+++ b/ovsdb/mutation.c
@@ -383,7 +383,7 @@ ovsdb_mutation_set_execute(struct ovsdb_row *row,
break;
case OVSDB_M_INSERT:
- ovsdb_datum_union(dst, arg, dst_type, false);
+ ovsdb_datum_union(dst, arg, dst_type);
error = ovsdb_mutation_check_count(dst, dst_type);
break;
diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
index 61cded16d3..a2ee10af1b 100755
--- a/ovsdb/ovsdb-idlc.in
+++ b/ovsdb/ovsdb-idlc.in
@@ -551,20 +551,20 @@ static void
print(" smap_init(&row->%s);" % columnName)
print(" for (size_t i = 0; i < datum->n; i++) {")
print(" smap_add(&row->%s," % columnName)
- print(" datum->keys[i].string,")
- print(" datum->values[i].string);")
+ print(" datum->keys[i].s->string,")
+ print(" datum->values[i].s->string);")
print(" }")
elif (type.n_min == 1 and type.n_max == 1) or type.is_optional_pointer():
print("")
print(" if (datum->n >= 1) {")
if not type.key.ref_table:
- print(" %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_string()))
+ print(" %s = datum->keys[0].%s;" % (keyVar, type.key.type.to_rvalue_string()))
else:
print(" %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->keys[0].uuid));" % (keyVar, prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower()))
if valueVar:
if not type.value.ref_table:
- print(" %s = datum->values[0].%s;" % (valueVar, type.value.type.to_string()))
+ print(" %s = datum->values[0].%s;" % (valueVar, type.value.type.to_rvalue_string()))
else:
print(" %s = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->values[0].uuid));" % (valueVar, prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower()))
print(" } else {")
@@ -592,7 +592,7 @@ static void
""" % (prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower(), prefix, type.key.ref_table.name.lower()))
keySrc = "keyRow"
else:
- keySrc = "datum->keys[i].%s" % type.key.type.to_string()
+ keySrc = "datum->keys[i].%s" % type.key.type.to_rvalue_string()
if type.value and type.value.ref_table:
print("""\
struct %s%s *valueRow = %s%s_cast(ovsdb_idl_get_row_arc(row_, &%stable_%s, &datum->values[i].uuid));
@@ -602,7 +602,7 @@ static void
""" % (prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower(), prefix, type.value.ref_table.name.lower()))
valueSrc = "valueRow"
elif valueVar:
- valueSrc = "datum->values[i].%s" % type.value.type.to_string()
+ valueSrc = "datum->values[i].%s" % type.value.type.to_rvalue_string()
print(" if (!row->n_%s) {" % (columnName))
print(" %s = xmalloc(%s * sizeof *%s);" % (
@@ -910,45 +910,45 @@ void
'args': ', '.join(['%(type)s%(name)s'
% m for m in members])})
if type.n_min == 1 and type.n_max == 1:
- print(" union ovsdb_atom key;")
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
if type.value:
- print(" union ovsdb_atom value;")
+ print(" union ovsdb_atom *value = xmalloc(sizeof *value);")
print("")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar))
if type.value:
- print(" datum.values = &value;")
- print(" "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar))
+ print(" datum.values = value;")
+ print(" " + type.value.copyCValue("value->%s" % type.value.type.to_lvalue_string(), valueVar))
else:
print(" datum.values = NULL;")
- txn_write_func = "ovsdb_idl_txn_write_clone"
+ txn_write_func = "ovsdb_idl_txn_write"
elif type.is_optional_pointer():
- print(" union ovsdb_atom key;")
print("")
print(" if (%s) {" % keyVar)
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
print(" }")
print(" datum.values = NULL;")
- txn_write_func = "ovsdb_idl_txn_write_clone"
+ txn_write_func = "ovsdb_idl_txn_write"
elif type.n_max == 1:
- print(" union ovsdb_atom key;")
print("")
print(" if (%s) {" % nVar)
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
print(" }")
print(" datum.values = NULL;")
- txn_write_func = "ovsdb_idl_txn_write_clone"
+ txn_write_func = "ovsdb_idl_txn_write"
else:
print("")
print(" datum.n = %s;" % nVar)
@@ -958,9 +958,9 @@ void
else:
print(" datum.values = NULL;")
print(" for (size_t i = 0; i < %s; i++) {" % nVar)
- print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar))
+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar))
if type.value:
- print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar))
+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar))
print(" }")
if type.value:
valueType = type.value.toAtomicType()
@@ -996,9 +996,8 @@ void
''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix),
'valtype':column.type.value.to_const_c_type(prefix), 'S': structName.upper(),
'C': columnName.upper(), 't': tableName})
-
- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "new_key"))
- print(" "+ type.value.copyCValue("datum->values[0].%s" % type.value.type.to_string(), "new_value"))
+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "new_key"))
+ print(" " + type.value.copyCValue("datum->values[0].%s" % type.value.type.to_lvalue_string(), "new_value"))
print('''
ovsdb_idl_txn_write_partial_map(&row->header_,
&%(s)s_col_%(c)s,
@@ -1022,8 +1021,7 @@ void
''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix),
'valtype':column.type.value.to_const_c_type(prefix), 'S': structName.upper(),
'C': columnName.upper(), 't': tableName})
-
- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "delete_key"))
+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "delete_key"))
print('''
ovsdb_idl_txn_delete_partial_map(&row->header_,
&%(s)s_col_%(c)s,
@@ -1049,8 +1047,7 @@ void
datum->values = NULL;
''' % {'s': structName, 'c': columnName,
'valtype':column.type.key.to_const_c_type(prefix), 't': tableName})
-
- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "new_value"))
+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "new_value"))
print('''
ovsdb_idl_txn_write_partial_set(&row->header_,
&%(s)s_col_%(c)s,
@@ -1074,8 +1071,7 @@ void
''' % {'s': structName, 'c': columnName,'coltype':column.type.key.to_const_c_type(prefix),
'valtype':column.type.key.to_const_c_type(prefix), 'S': structName.upper(),
'C': columnName.upper(), 't': tableName})
-
- print(" "+ type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_string(), "delete_value"))
+ print(" " + type.key.copyCValue("datum->keys[0].%s" % type.key.type.to_lvalue_string(), "delete_value"))
print('''
ovsdb_idl_txn_delete_partial_set(&row->header_,
&%(s)s_col_%(c)s,
@@ -1143,37 +1139,36 @@ void
print(" struct ovsdb_datum datum;")
free = []
if type.n_min == 1 and type.n_max == 1:
- print(" union ovsdb_atom key;")
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
if type.value:
- print(" union ovsdb_atom value;")
+ print(" union ovsdb_atom *value = xmalloc(sizeof *value);")
print("")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar, refTable=False))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar, refTable=False))
if type.value:
- print(" datum.values = &value;")
- print(" "+ type.value.assign_c_value_casting_away_const("value.%s" % type.value.type.to_string(), valueVar, refTable=False))
+ print(" " + type.value.copyCValue("value.%s" % type.value.type.to_lvalue_string(), valueVar, refTable=False))
else:
print(" datum.values = NULL;")
elif type.is_optional_pointer():
- print(" union ovsdb_atom key;")
print("")
print(" if (%s) {" % keyVar)
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), keyVar, refTable=False))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar, refTable=False))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
print(" }")
print(" datum.values = NULL;")
elif type.n_max == 1:
- print(" union ovsdb_atom key;")
print("")
print(" if (%s) {" % nVar)
+ print(" union ovsdb_atom *key = xmalloc(sizeof *key);")
print(" datum.n = 1;")
- print(" datum.keys = &key;")
- print(" " + type.key.assign_c_value_casting_away_const("key.%s" % type.key.type.to_string(), "*" + keyVar, refTable=False))
+ print(" datum.keys = key;")
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar, refTable=False))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
@@ -1182,16 +1177,14 @@ void
else:
print(" datum.n = %s;" % nVar)
print(" datum.keys = %s ? xmalloc(%s * sizeof *datum.keys) : NULL;" % (nVar, nVar))
- free += ['datum.keys']
if type.value:
print(" datum.values = xmalloc(%s * sizeof *datum.values);" % nVar)
- free += ['datum.values']
else:
print(" datum.values = NULL;")
print(" for (size_t i = 0; i < %s; i++) {" % nVar)
- print(" " + type.key.assign_c_value_casting_away_const("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar, refTable=False))
+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar, refTable=False))
if type.value:
- print(" " + type.value.assign_c_value_casting_away_const("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar, refTable=False))
+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar, refTable=False))
print(" }")
if type.value:
valueType = type.value.toAtomicType()
@@ -1211,8 +1204,8 @@ void
's': structName,
'S': structName.upper(),
'c': columnName})
- for var in free:
- print(" free(%s);" % var)
+ print(" ovsdb_datum_destroy(&datum, &%(s)s_col_%(c)s.type);" \
+ % {'s': structName, 'c': columnName})
print("}")
# Index table related functions
@@ -1272,7 +1265,7 @@ struct ovsdb_idl_cursor
struct ovsdb_idl_index *index, const struct %(s)s *target)
{
ovs_assert(index->table->class_ == &%(p)stable_%(tl)s);
- return ovsdb_idl_cursor_first_ge(index, &target->header_);
+ return ovsdb_idl_cursor_first_ge(index, target ? &target->header_ : NULL);
}
struct %(s)s *
@@ -1309,8 +1302,8 @@ struct %(s)s *
i = 0;
SMAP_FOR_EACH (node, %(c)s) {
- datum->keys[i].string = node->key;
- datum->values[i].string = node->value;
+ datum->keys[i].s = ovsdb_atom_string_create(node->key);
+ datum->values[i].s = ovsdb_atom_string_create(node->value);
i++;
}
ovsdb_datum_sort_unique(datum, OVSDB_TYPE_STRING, OVSDB_TYPE_STRING);
@@ -1359,10 +1352,10 @@ struct %(s)s *
print()
print(" datum.n = 1;")
print(" datum.keys = key;")
- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar))
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar))
if type.value:
print(" datum.values = value;")
- print(" "+ type.value.assign_c_value_casting_away_const("value->%s" % type.value.type.to_string(), valueVar))
+ print(" " + type.value.copyCValue("value->%s" % type.value.type.to_lvalue_string(), valueVar))
else:
print(" datum.values = NULL;")
txn_write_func = "ovsdb_idl_index_write"
@@ -1373,7 +1366,7 @@ struct %(s)s *
print(" key = xmalloc(sizeof (union ovsdb_atom));")
print(" datum.n = 1;")
print(" datum.keys = key;")
- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar))
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), keyVar))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
@@ -1387,7 +1380,7 @@ struct %(s)s *
print(" key = xmalloc(sizeof(union ovsdb_atom));")
print(" datum.n = 1;")
print(" datum.keys = key;")
- print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), "*" + keyVar))
+ print(" " + type.key.copyCValue("key->%s" % type.key.type.to_lvalue_string(), "*" + keyVar))
print(" } else {")
print(" datum.n = 0;")
print(" datum.keys = NULL;")
@@ -1404,9 +1397,9 @@ struct %(s)s *
else:
print(" datum.values = NULL;")
print(" for (i = 0; i < %s; i++) {" % nVar)
- print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_string(), "%s[i]" % keyVar))
+ print(" " + type.key.copyCValue("datum.keys[i].%s" % type.key.type.to_lvalue_string(), "%s[i]" % keyVar))
if type.value:
- print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_string(), "%s[i]" % valueVar))
+ print(" " + type.value.copyCValue("datum.values[i].%s" % type.value.type.to_lvalue_string(), "%s[i]" % valueVar))
print(" }")
if type.value:
valueType = type.value.toAtomicType()
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index 0b3d2bb714..b34d97e291 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -904,8 +904,8 @@ query_db_string(const struct shash *all_dbs, const char *name,
datum = &row->fields[column->index];
for (i = 0; i < datum->n; i++) {
- if (datum->keys[i].string[0]) {
- return datum->keys[i].string;
+ if (datum->keys[i].s->string[0]) {
+ return datum->keys[i].s->string;
}
}
}
@@ -1018,7 +1018,7 @@ query_db_remotes(const char *name, const struct shash *all_dbs,
datum = &row->fields[column->index];
for (i = 0; i < datum->n; i++) {
- add_remote(remotes, datum->keys[i].string);
+ add_remote(remotes, datum->keys[i].s->string);
}
}
} else if (column->type.key.type == OVSDB_TYPE_UUID
diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c
index 05a0223e71..d4a9e34cc4 100644
--- a/ovsdb/ovsdb-tool.c
+++ b/ovsdb/ovsdb-tool.c
@@ -919,7 +919,8 @@ print_raft_header(const struct raft_header *h,
if (!uuid_is_zero(&h->snap.eid)) {
printf(" prev_eid: %04x\n", uuid_prefix(&h->snap.eid, 4));
}
- print_data("prev_", h->snap.data, schemap, names);
+ print_data("prev_", raft_entry_get_parsed_data(&h->snap),
+ schemap, names);
}
}
@@ -973,11 +974,13 @@ raft_header_to_standalone_log(const struct raft_header *h,
struct ovsdb_log *db_log_data)
{
if (h->snap_index) {
- if (!h->snap.data || json_array(h->snap.data)->n != 2) {
+ const struct json *data = raft_entry_get_parsed_data(&h->snap);
+
+ if (!data || json_array(data)->n != 2) {
ovs_fatal(0, "Incorrect raft header data array length");
}
- struct json_array *pa = json_array(h->snap.data);
+ struct json_array *pa = json_array(data);
struct json *schema_json = pa->elems[0];
struct ovsdb_error *error = NULL;
@@ -1373,7 +1376,7 @@ do_check_cluster(struct ovs_cmdl_context *ctx)
}
struct raft_entry *e = &s->entries[log_idx];
e->term = r->term;
- e->data = r->entry.data;
+ raft_entry_set_parsed_data_nocopy(e, r->entry.data);
e->eid = r->entry.eid;
e->servers = r->entry.servers;
break;
diff --git a/ovsdb/ovsdb-util.c b/ovsdb/ovsdb-util.c
index c4075cdae3..6d7be066b6 100644
--- a/ovsdb/ovsdb-util.c
+++ b/ovsdb/ovsdb-util.c
@@ -111,13 +111,13 @@ ovsdb_util_read_map_string_column(const struct ovsdb_row *row,
for (i = 0; i < datum->n; i++) {
atom_key = &datum->keys[i];
- if (!strcmp(atom_key->string, key)) {
+ if (!strcmp(atom_key->s->string, key)) {
atom_value = &datum->values[i];
break;
}
}
- return atom_value ? atom_value->string : NULL;
+ return atom_value ? atom_value->s->string : NULL;
}
/* Read string-uuid key-values from a map. Returns the row associated with
@@ -143,7 +143,7 @@ ovsdb_util_read_map_string_uuid_column(const struct ovsdb_row *row,
const struct ovsdb_datum *datum = &row->fields[column->index];
for (size_t i = 0; i < datum->n; i++) {
union ovsdb_atom *atom_key = &datum->keys[i];
- if (!strcmp(atom_key->string, key)) {
+ if (!strcmp(atom_key->s->string, key)) {
const union ovsdb_atom *atom_value = &datum->values[i];
return ovsdb_table_get_row(ref_table, &atom_value->uuid);
}
@@ -181,7 +181,7 @@ ovsdb_util_read_string_column(const struct ovsdb_row *row,
const union ovsdb_atom *atom;
atom = ovsdb_util_read_column(row, column_name, OVSDB_TYPE_STRING);
- *stringp = atom ? atom->string : NULL;
+ *stringp = atom ? atom->s->string : NULL;
return atom != NULL;
}
@@ -269,8 +269,10 @@ ovsdb_util_write_string_column(struct ovsdb_row *row, const char *column_name,
const char *string)
{
if (string) {
- const union ovsdb_atom atom = { .string = CONST_CAST(char *, string) };
+ union ovsdb_atom atom = {
+ .s = ovsdb_atom_string_create(CONST_CAST(char *, string)) };
ovsdb_util_write_singleton(row, column_name, &atom, OVSDB_TYPE_STRING);
+ ovsdb_atom_destroy(&atom, OVSDB_TYPE_STRING);
} else {
ovsdb_util_clear_column(row, column_name);
}
@@ -305,8 +307,8 @@ ovsdb_util_write_string_string_column(struct ovsdb_row *row,
datum->values = xmalloc(n * sizeof *datum->values);
for (i = 0; i < n; ++i) {
- datum->keys[i].string = keys[i];
- datum->values[i].string = values[i];
+ datum->keys[i].s = ovsdb_atom_string_create_nocopy(keys[i]);
+ datum->values[i].s = ovsdb_atom_string_create_nocopy(values[i]);
}
/* Sort and check constraints. */
diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index 126d16a2f5..e6d866182c 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -422,6 +422,8 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage)
ovs_list_init(&db->triggers);
db->run_triggers_now = db->run_triggers = false;
+ db->n_atoms = 0;
+
db->is_relay = false;
ovs_list_init(&db->txn_forward_new);
hmap_init(&db->txn_forward_sent);
@@ -518,6 +520,9 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage)
}
simap_increase(usage, "cells", cells);
+ simap_increase(usage, "atoms", db->n_atoms);
+ simap_increase(usage, "txn-history", db->n_txn_history);
+ simap_increase(usage, "txn-history-atoms", db->n_txn_history_atoms);
if (db->storage) {
ovsdb_storage_get_memory_usage(db->storage, usage);
diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
index 4a7bd0f0ec..ec2d235ec2 100644
--- a/ovsdb/ovsdb.h
+++ b/ovsdb/ovsdb.h
@@ -90,8 +90,11 @@ struct ovsdb {
/* History trasanctions for incremental monitor transfer. */
bool need_txn_history; /* Need to maintain history of transactions. */
unsigned int n_txn_history; /* Current number of history transactions. */
+ unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */
+ size_t n_atoms; /* Total number of ovsdb atoms in the database. */
+
/* Relay mode. */
bool is_relay; /* True, if database is in relay mode. */
/* List that holds transactions waiting to be forwarded to the server. */
diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c
index 26d39a087f..30760233ee 100644
--- a/ovsdb/raft-private.c
+++ b/ovsdb/raft-private.c
@@ -18,11 +18,14 @@
#include "raft-private.h"
+#include "coverage.h"
#include "openvswitch/dynamic-string.h"
#include "ovsdb-error.h"
#include "ovsdb-parser.h"
#include "socket-util.h"
#include "sset.h"
+
+COVERAGE_DEFINE(raft_entry_serialize);
/* Addresses of Raft servers. */
@@ -281,7 +284,8 @@ void
raft_entry_clone(struct raft_entry *dst, const struct raft_entry *src)
{
dst->term = src->term;
- dst->data = json_nullable_clone(src->data);
+ dst->data.full_json = json_nullable_clone(src->data.full_json);
+ dst->data.serialized = json_nullable_clone(src->data.serialized);
dst->eid = src->eid;
dst->servers = json_nullable_clone(src->servers);
dst->election_timer = src->election_timer;
@@ -291,7 +295,8 @@ void
raft_entry_uninit(struct raft_entry *e)
{
if (e) {
- json_destroy(e->data);
+ json_destroy(e->data.full_json);
+ json_destroy(e->data.serialized);
json_destroy(e->servers);
}
}
@@ -301,8 +306,9 @@ raft_entry_to_json(const struct raft_entry *e)
{
struct json *json = json_object_create();
raft_put_uint64(json, "term", e->term);
- if (e->data) {
- json_object_put(json, "data", json_clone(e->data));
+ if (raft_entry_has_data(e)) {
+ json_object_put(json, "data",
+ json_clone(raft_entry_get_serialized_data(e)));
json_object_put_format(json, "eid", UUID_FMT, UUID_ARGS(&e->eid));
}
if (e->servers) {
@@ -323,9 +329,10 @@ raft_entry_from_json(struct json *json, struct raft_entry *e)
struct ovsdb_parser p;
ovsdb_parser_init(&p, json, "raft log entry");
e->term = raft_parse_required_uint64(&p, "term");
- e->data = json_nullable_clone(
+ raft_entry_set_parsed_data(e,
ovsdb_parser_member(&p, "data", OP_OBJECT | OP_ARRAY | OP_OPTIONAL));
- e->eid = e->data ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO;
+ e->eid = raft_entry_has_data(e)
+ ? raft_parse_required_uuid(&p, "eid") : UUID_ZERO;
e->servers = json_nullable_clone(
ovsdb_parser_member(&p, "servers", OP_OBJECT | OP_OPTIONAL));
if (e->servers) {
@@ -344,9 +351,72 @@ bool
raft_entry_equals(const struct raft_entry *a, const struct raft_entry *b)
{
return (a->term == b->term
- && json_equal(a->data, b->data)
&& uuid_equals(&a->eid, &b->eid)
- && json_equal(a->servers, b->servers));
+ && json_equal(a->servers, b->servers)
+ && json_equal(raft_entry_get_parsed_data(a),
+ raft_entry_get_parsed_data(b)));
+}
+
+bool
+raft_entry_has_data(const struct raft_entry *e)
+{
+ return e->data.full_json || e->data.serialized;
+}
+
+static void
+raft_entry_data_serialize(struct raft_entry *e)
+{
+ if (!raft_entry_has_data(e) || e->data.serialized) {
+ return;
+ }
+ COVERAGE_INC(raft_entry_serialize);
+ e->data.serialized = json_serialized_object_create(e->data.full_json);
+}
+
+void
+raft_entry_set_parsed_data_nocopy(struct raft_entry *e, struct json *json)
+{
+ ovs_assert(!json || json->type != JSON_SERIALIZED_OBJECT);
+ e->data.full_json = json;
+ e->data.serialized = NULL;
+}
+
+void
+raft_entry_set_parsed_data(struct raft_entry *e, const struct json *json)
+{
+ raft_entry_set_parsed_data_nocopy(e, json_nullable_clone(json));
+}
+
+/* Returns a pointer to the fully parsed json object of the data.
+ * Caller takes the ownership of the result.
+ *
+ * Entry will no longer contain a fully parsed json object.
+ * Subsequent calls for the same raft entry will return NULL. */
+struct json * OVS_WARN_UNUSED_RESULT
+raft_entry_steal_parsed_data(struct raft_entry *e)
+{
+ /* Ensure that serialized version exists. */
+ raft_entry_data_serialize(e);
+
+ struct json *json = e->data.full_json;
+ e->data.full_json = NULL;
+
+ return json;
+}
+
+/* Returns a pointer to the fully parsed json object of the data, if any. */
+const struct json *
+raft_entry_get_parsed_data(const struct raft_entry *e)
+{
+ return e->data.full_json;
+}
+
+/* Returns a pointer to the JSON_SERIALIZED_OBJECT of the data. */
+const struct json *
+raft_entry_get_serialized_data(const struct raft_entry *e)
+{
+ raft_entry_data_serialize(CONST_CAST(struct raft_entry *, e));
+ return e->data.serialized;
}
void
@@ -402,8 +472,8 @@ raft_header_from_json__(struct raft_header *h, struct ovsdb_parser *p)
* present, all of them must be. */
h->snap_index = raft_parse_optional_uint64(p, "prev_index");
if (h->snap_index) {
- h->snap.data = json_nullable_clone(
- ovsdb_parser_member(p, "prev_data", OP_ANY));
+ raft_entry_set_parsed_data(
+ &h->snap, ovsdb_parser_member(p, "prev_data", OP_ANY));
h->snap.eid = raft_parse_required_uuid(p, "prev_eid");
h->snap.term = raft_parse_required_uint64(p, "prev_term");
h->snap.election_timer = raft_parse_optional_uint64(
@@ -455,8 +525,9 @@ raft_header_to_json(const struct raft_header *h)
if (h->snap_index) {
raft_put_uint64(json, "prev_index", h->snap_index);
raft_put_uint64(json, "prev_term", h->snap.term);
- if (h->snap.data) {
- json_object_put(json, "prev_data", json_clone(h->snap.data));
+ if (raft_entry_has_data(&h->snap)) {
+ json_object_put(json, "prev_data",
+ json_clone(raft_entry_get_serialized_data(&h->snap)));
}
json_object_put_format(json, "prev_eid",
UUID_FMT, UUID_ARGS(&h->snap.eid));
diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h
index a69e37e5c2..48c6df511f 100644
--- a/ovsdb/raft-private.h
+++ b/ovsdb/raft-private.h
@@ -118,7 +118,10 @@ void raft_servers_format(const struct hmap *servers, struct ds *ds);
* entry. */
struct raft_entry {
uint64_t term;
- struct json *data;
+ struct {
+ struct json *full_json; /* Fully parsed JSON object. */
+ struct json *serialized; /* JSON_SERIALIZED_OBJECT version of data. */
+ } data;
struct uuid eid;
struct json *servers;
uint64_t election_timer;
@@ -130,6 +133,13 @@ struct json *raft_entry_to_json(const struct raft_entry *);
struct ovsdb_error *raft_entry_from_json(struct json *, struct raft_entry *)
OVS_WARN_UNUSED_RESULT;
bool raft_entry_equals(const struct raft_entry *, const struct raft_entry *);
+bool raft_entry_has_data(const struct raft_entry *);
+void raft_entry_set_parsed_data(struct raft_entry *, const struct json *);
+void raft_entry_set_parsed_data_nocopy(struct raft_entry *, struct json *);
+struct json *raft_entry_steal_parsed_data(struct raft_entry *)
+ OVS_WARN_UNUSED_RESULT;
+const struct json *raft_entry_get_parsed_data(const struct raft_entry *);
+const struct json *raft_entry_get_serialized_data(const struct raft_entry *);
/* On disk data serialization and deserialization. */
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 2fb5156519..1a3447a8dd 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -494,11 +494,11 @@ raft_create_cluster(const char *file_name, const char *name,
.snap_index = index++,
.snap = {
.term = term,
- .data = json_nullable_clone(data),
.eid = uuid_random(),
.servers = json_object_create(),
},
};
+ raft_entry_set_parsed_data(&h.snap, data);
shash_add_nocopy(json_object(h.snap.servers),
xasprintf(UUID_FMT, UUID_ARGS(&h.sid)),
json_string_create(local_address));
@@ -727,10 +727,10 @@ raft_add_entry(struct raft *raft,
uint64_t index = raft->log_end++;
struct raft_entry *entry = &raft->entries[index - raft->log_start];
entry->term = term;
- entry->data = data;
entry->eid = eid ? *eid : UUID_ZERO;
entry->servers = servers;
entry->election_timer = election_timer;
+ raft_entry_set_parsed_data_nocopy(entry, data);
return index;
}
@@ -741,13 +741,16 @@ raft_write_entry(struct raft *raft, uint64_t term, struct json *data,
const struct uuid *eid, struct json *servers,
uint64_t election_timer)
{
+ uint64_t index = raft_add_entry(raft, term, data, eid, servers,
+ election_timer);
+ const struct json *entry_data = raft_entry_get_serialized_data(
+ &raft->entries[index - raft->log_start]);
struct raft_record r = {
.type = RAFT_REC_ENTRY,
.term = term,
.entry = {
- .index = raft_add_entry(raft, term, data, eid, servers,
- election_timer),
- .data = data,
+ .index = index,
+ .data = CONST_CAST(struct json *, entry_data),
.servers = servers,
.election_timer = election_timer,
.eid = eid ? *eid : UUID_ZERO,
@@ -2161,7 +2164,7 @@ raft_get_eid(const struct raft *raft, uint64_t index)
{
for (; index >= raft->log_start; index--) {
const struct raft_entry *e = raft_get_entry(raft, index);
- if (e->data) {
+ if (raft_entry_has_data(e)) {
return &e->eid;
}
}
@@ -2826,8 +2829,8 @@ raft_truncate(struct raft *raft, uint64_t new_end)
return servers_changed;
}
-static const struct json *
-raft_peek_next_entry(struct raft *raft, struct uuid *eid)
+static const struct raft_entry *
+raft_peek_next_entry(struct raft *raft)
{
/* Invariant: log_start - 2 <= last_applied <= commit_index < log_end. */
ovs_assert(raft->log_start <= raft->last_applied + 2);
@@ -2839,32 +2842,20 @@ raft_peek_next_entry(struct raft *raft, struct uuid *eid)
}
if (raft->log_start == raft->last_applied + 2) {
- *eid = raft->snap.eid;
- return raft->snap.data;
+ return &raft->snap;
}
while (raft->last_applied < raft->commit_index) {
const struct raft_entry *e = raft_get_entry(raft,
raft->last_applied + 1);
- if (e->data) {
- *eid = e->eid;
- return e->data;
+ if (raft_entry_has_data(e)) {
+ return e;
}
raft->last_applied++;
}
return NULL;
}
-static const struct json *
-raft_get_next_entry(struct raft *raft, struct uuid *eid)
-{
- const struct json *data = raft_peek_next_entry(raft, eid);
- if (data) {
- raft->last_applied++;
- }
- return data;
-}
-
/* Updates commit index in raft log. If commit index is already up-to-date
* it does nothing and return false, otherwise, returns true. */
static bool
@@ -2878,7 +2869,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index)
while (raft->commit_index < new_commit_index) {
uint64_t index = ++raft->commit_index;
const struct raft_entry *e = raft_get_entry(raft, index);
- if (e->data) {
+ if (raft_entry_has_data(e)) {
struct raft_command *cmd
= raft_find_command_by_eid(raft, &e->eid);
if (cmd) {
@@ -3059,7 +3050,9 @@ raft_handle_append_entries(struct raft *raft,
for (; i < n_entries; i++) {
const struct raft_entry *e = &entries[i];
error = raft_write_entry(raft, e->term,
- json_nullable_clone(e->data), &e->eid,
+ json_nullable_clone(
+ raft_entry_get_parsed_data(e)),
+ &e->eid,
json_nullable_clone(e->servers),
e->election_timer);
if (error) {
@@ -3314,20 +3307,29 @@ bool
raft_has_next_entry(const struct raft *raft_)
{
struct raft *raft = CONST_CAST(struct raft *, raft_);
- struct uuid eid;
- return raft_peek_next_entry(raft, &eid) != NULL;
+ return raft_peek_next_entry(raft) != NULL;
}
/* Returns the next log entry or snapshot from 'raft', or NULL if there are
- * none left to read. Stores the entry ID of the log entry in '*eid'. Stores
- * true in '*is_snapshot' if the returned data is a snapshot, false if it is a
- * log entry. */
-const struct json *
-raft_next_entry(struct raft *raft, struct uuid *eid, bool *is_snapshot)
+ * none left to read. Stores the entry ID of the log entry in '*eid'.
+ *
+ * The caller takes ownership of the result. */
+struct json * OVS_WARN_UNUSED_RESULT
+raft_next_entry(struct raft *raft, struct uuid *eid)
{
- const struct json *data = raft_get_next_entry(raft, eid);
- *is_snapshot = data == raft->snap.data;
- return data;
+ const struct raft_entry *e = raft_peek_next_entry(raft);
+
+ if (!e) {
+ return NULL;
+ }
+
+ raft->last_applied++;
+ *eid = e->eid;
+
+ /* DB will only read each entry once, so we don't need to store the fully
+ * parsed json object any longer. The serialized version is sufficient
+ * for sending to other cluster members or writing to the log. */
+ return raft_entry_steal_parsed_data(CONST_CAST(struct raft_entry *, e));
}
/* Returns the log index of the last-read snapshot or log entry. */
@@ -3420,6 +3422,7 @@ raft_send_install_snapshot_request(struct raft *raft,
const struct raft_server *s,
const char *comment)
{
+ const struct json *data = raft_entry_get_serialized_data(&raft->snap);
union raft_rpc rpc = {
.install_snapshot_request = {
.common = {
@@ -3432,7 +3435,7 @@ raft_send_install_snapshot_request(struct raft *raft,
.last_term = raft->snap.term,
.last_servers = raft->snap.servers,
.last_eid = raft->snap.eid,
- .data = raft->snap.data,
+ .data = CONST_CAST(struct json *, data),
.election_timer = raft->election_timer, /* use latest value */
}
};
@@ -3980,6 +3983,10 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log,
uint64_t new_log_start,
const struct raft_entry *new_snapshot)
{
+ /* Ensure that new snapshot contains serialized data object, so it will
+ * not be allocated while serializing the on-stack raft header object. */
+ ovs_assert(raft_entry_get_serialized_data(new_snapshot));
+
struct raft_header h = {
.sid = raft->sid,
.cid = raft->cid,
@@ -3998,12 +4005,13 @@ raft_write_snapshot(struct raft *raft, struct ovsdb_log *log,
/* Write log records. */
for (uint64_t index = new_log_start; index < raft->log_end; index++) {
const struct raft_entry *e = &raft->entries[index - raft->log_start];
+ const struct json *log_data = raft_entry_get_serialized_data(e);
struct raft_record r = {
.type = RAFT_REC_ENTRY,
.term = e->term,
.entry = {
.index = index,
- .data = e->data,
+ .data = CONST_CAST(struct json *, log_data),
.servers = e->servers,
.election_timer = e->election_timer,
.eid = e->eid,
@@ -4093,19 +4101,21 @@ raft_handle_install_snapshot_request__(
/* Case 3: The new snapshot starts past the end of our current log, so
* discard all of our current log. */
- const struct raft_entry new_snapshot = {
+ struct raft_entry new_snapshot = {
.term = rq->last_term,
- .data = rq->data,
.eid = rq->last_eid,
- .servers = rq->last_servers,
+ .servers = json_clone(rq->last_servers),
.election_timer = rq->election_timer,
};
+ raft_entry_set_parsed_data(&new_snapshot, rq->data);
+
struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start,
&new_snapshot);
if (error) {
char *error_s = ovsdb_error_to_string_free(error);
VLOG_WARN("could not save snapshot: %s", error_s);
free(error_s);
+ raft_entry_uninit(&new_snapshot);
return false;
}
@@ -4120,7 +4130,7 @@ raft_handle_install_snapshot_request__(
}
raft_entry_uninit(&raft->snap);
- raft_entry_clone(&raft->snap, &new_snapshot);
+ raft->snap = new_snapshot;
raft_get_servers_from_log(raft, VLL_INFO);
raft_get_election_timer_from_log(raft);
@@ -4216,7 +4226,7 @@ raft_may_snapshot(const struct raft *raft)
&& !raft->leaving
&& !raft->left
&& !raft->failed
- && raft->role != RAFT_LEADER
+ && (raft->role == RAFT_FOLLOWER || hmap_count(&raft->servers) == 1)
&& raft->last_applied >= raft->log_start);
}
@@ -4265,11 +4275,12 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data)
uint64_t new_log_start = raft->last_applied + 1;
struct raft_entry new_snapshot = {
.term = raft_get_term(raft, new_log_start - 1),
- .data = json_clone(new_snapshot_data),
.eid = *raft_get_eid(raft, new_log_start - 1),
.servers = json_clone(raft_servers_for_index(raft, new_log_start - 1)),
.election_timer = raft->election_timer,
};
+ raft_entry_set_parsed_data(&new_snapshot, new_snapshot_data);
+
struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start,
&new_snapshot);
if (error) {
@@ -4286,6 +4297,9 @@ raft_store_snapshot(struct raft *raft, const struct json *new_snapshot_data)
memmove(&raft->entries[0], &raft->entries[new_log_start - raft->log_start],
(raft->log_end - new_log_start) * sizeof *raft->entries);
raft->log_start = new_log_start;
+ /* It's a snapshot of the current database state, ovsdb-server will not
+ * read it back. Destroying the parsed json object to not waste memory. */
+ json_destroy(raft_entry_steal_parsed_data(&raft->snap));
return NULL;
}
diff --git a/ovsdb/raft.h b/ovsdb/raft.h
index 3545c41c2c..599bc0ae86 100644
--- a/ovsdb/raft.h
+++ b/ovsdb/raft.h
@@ -132,8 +132,8 @@ bool raft_left(const struct raft *);
bool raft_failed(const struct raft *);
/* Reading snapshots and log entries. */
-const struct json *raft_next_entry(struct raft *, struct uuid *eid,
- bool *is_snapshot);
+struct json *raft_next_entry(struct raft *, struct uuid *eid)
+ OVS_WARN_UNUSED_RESULT;
bool raft_has_next_entry(const struct raft *);
uint64_t raft_get_applied_index(const struct raft *);
diff --git a/ovsdb/rbac.c b/ovsdb/rbac.c
index 2986027c90..ff411675f0 100644
--- a/ovsdb/rbac.c
+++ b/ovsdb/rbac.c
@@ -53,8 +53,8 @@ ovsdb_find_row_by_string_key(const struct ovsdb_table *table,
HMAP_FOR_EACH (row, hmap_node, &table->rows) {
const struct ovsdb_datum *datum = &row->fields[column->index];
for (size_t i = 0; i < datum->n; i++) {
- if (datum->keys[i].string[0] &&
- !strcmp(key, datum->keys[i].string)) {
+ if (datum->keys[i].s->string[0] &&
+ !strcmp(key, datum->keys[i].s->string)) {
return row;
}
}
@@ -113,7 +113,7 @@ ovsdb_rbac_authorized(const struct ovsdb_row *perms,
}
for (i = 0; i < datum->n; i++) {
- const char *name = datum->keys[i].string;
+ const char *name = datum->keys[i].s->string;
const char *value = NULL;
bool is_map;
@@ -271,7 +271,7 @@ rbac_column_modification_permitted(const struct ovsdb_column *column,
size_t i;
for (i = 0; i < modifiable->n; i++) {
- char *name = modifiable->keys[i].string;
+ char *name = modifiable->keys[i].s->string;
if (!strcmp(name, column->name)) {
return true;
diff --git a/ovsdb/row.c b/ovsdb/row.c
index 65a0546211..e83c60a218 100644
--- a/ovsdb/row.c
+++ b/ovsdb/row.c
@@ -38,8 +38,7 @@ allocate_row(const struct ovsdb_table *table)
struct ovsdb_row *row = xmalloc(row_size);
row->table = CONST_CAST(struct ovsdb_table *, table);
row->txn_row = NULL;
- ovs_list_init(&row->src_refs);
- ovs_list_init(&row->dst_refs);
+ hmap_init(&row->dst_refs);
row->n_refs = 0;
return row;
}
@@ -61,6 +60,78 @@ ovsdb_row_create(const struct ovsdb_table *table)
return row;
}
+static struct ovsdb_weak_ref *
+ovsdb_weak_ref_clone(struct ovsdb_weak_ref *src)
+{
+ struct ovsdb_weak_ref *weak = xzalloc(sizeof *weak);
+
+ hmap_node_nullify(&weak->dst_node);
+ ovs_list_init(&weak->src_node);
+ weak->src_table = src->src_table;
+ weak->src = src->src;
+ weak->dst_table = src->dst_table;
+ weak->dst = src->dst;
+ ovsdb_atom_clone(&weak->key, &src->key, src->type.key.type);
+ if (src->type.value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_clone(&weak->value, &src->value, src->type.value.type);
+ }
+ ovsdb_type_clone(&weak->type, &src->type);
+ weak->column_idx = src->column_idx;
+ weak->by_key = src->by_key;
+ return weak;
+}
+
+uint32_t
+ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *weak)
+{
+ return uuid_hash(&weak->src);
+}
+
+static bool
+ovsdb_weak_ref_equals(const struct ovsdb_weak_ref *a,
+ const struct ovsdb_weak_ref *b)
+{
+ if (a == b) {
+ return true;
+ }
+ return a->src_table == b->src_table
+ && a->dst_table == b->dst_table
+ && uuid_equals(&a->src, &b->src)
+ && uuid_equals(&a->dst, &b->dst)
+ && a->column_idx == b->column_idx
+ && a->by_key == b->by_key
+ && ovsdb_atom_equals(&a->key, &b->key, a->type.key.type);
+}
+
+struct ovsdb_weak_ref *
+ovsdb_row_find_weak_ref(const struct ovsdb_row *row,
+ const struct ovsdb_weak_ref *ref)
+{
+ struct ovsdb_weak_ref *weak;
+ HMAP_FOR_EACH_WITH_HASH (weak, dst_node,
+ ovsdb_weak_ref_hash(ref), &row->dst_refs) {
+ if (ovsdb_weak_ref_equals(weak, ref)) {
+ return weak;
+ }
+ }
+ return NULL;
+}
+
+void
+ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *weak)
+{
+ if (!weak) {
+ return;
+ }
+ ovs_assert(ovs_list_is_empty(&weak->src_node));
+ ovsdb_atom_destroy(&weak->key, weak->type.key.type);
+ if (weak->type.value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_destroy(&weak->value, weak->type.value.type);
+ }
+ ovsdb_type_destroy(&weak->type);
+ free(weak);
+}
+
struct ovsdb_row *
ovsdb_row_clone(const struct ovsdb_row *old)
{
@@ -75,6 +146,13 @@ ovsdb_row_clone(const struct ovsdb_row *old)
&old->fields[column->index],
&column->type);
}
+
+ struct ovsdb_weak_ref *weak, *clone;
+ HMAP_FOR_EACH (weak, dst_node, &old->dst_refs) {
+ clone = ovsdb_weak_ref_clone(weak);
+ hmap_insert(&new->dst_refs, &clone->dst_node,
+ ovsdb_weak_ref_hash(clone));
+ }
return new;
}
@@ -85,20 +163,13 @@ ovsdb_row_destroy(struct ovsdb_row *row)
{
if (row) {
const struct ovsdb_table *table = row->table;
- struct ovsdb_weak_ref *weak, *next;
+ struct ovsdb_weak_ref *weak;
const struct shash_node *node;
- LIST_FOR_EACH_SAFE (weak, next, dst_node, &row->dst_refs) {
- ovs_list_remove(&weak->src_node);
- ovs_list_remove(&weak->dst_node);
- free(weak);
- }
-
- LIST_FOR_EACH_SAFE (weak, next, src_node, &row->src_refs) {
- ovs_list_remove(&weak->src_node);
- ovs_list_remove(&weak->dst_node);
- free(weak);
+ HMAP_FOR_EACH_POP (weak, dst_node, &row->dst_refs) {
+ ovsdb_weak_ref_destroy(weak);
}
+ hmap_destroy(&row->dst_refs);
SHASH_FOR_EACH (node, &table->schema->columns) {
const struct ovsdb_column *column = node->data;
diff --git a/ovsdb/row.h b/ovsdb/row.h
index 394ac8eb49..fe04555d0c 100644
--- a/ovsdb/row.h
+++ b/ovsdb/row.h
@@ -36,11 +36,28 @@ struct ovsdb_column_set;
* ovsdb_weak_ref" structures are created for them.
*/
struct ovsdb_weak_ref {
- struct ovs_list src_node; /* In src->src_refs list. */
- struct ovs_list dst_node; /* In destination row's dst_refs list. */
- struct ovsdb_row *src; /* Source row. */
- struct ovsdb_table *dst_table; /* Destination table. */
+ struct hmap_node dst_node; /* In ovsdb_row's 'dst_refs' hmap. */
+ struct ovs_list src_node; /* In txn_row's 'deleted/added_refs'. */
+
+ struct ovsdb_table *src_table; /* Source row table. */
+ struct uuid src; /* Source row uuid. */
+
+ struct ovsdb_table *dst_table; /* Destination row table. */
struct uuid dst; /* Destination row uuid. */
+
+ /* Source row's key-value pair that created this reference.
+ * This information is needed in order to find and delete the reference
+ * from the source row. We need both key and value in order to avoid
+ * accidential deletion of an updated data, i.e. if value in datum got
+ * updated and the reference was created by the old value.
+ * Storing column index in order to remove references from the correct
+ * column. 'by_key' flag allows to distinguish 2 references in a corner
+ * case where key and value are the same. */
+ union ovsdb_atom key;
+ union ovsdb_atom value;
+ struct ovsdb_type type; /* Datum type of the key-value pair. */
+ unsigned int column_idx; /* Row column index for this pair. */
+ bool by_key; /* 'true' if reference is a 'key'. */
};
/* A row in a database table. */
@@ -50,8 +67,7 @@ struct ovsdb_row {
struct ovsdb_txn_row *txn_row; /* Transaction that row is in, if any. */
/* Weak references. Updated and checked only at transaction commit. */
- struct ovs_list src_refs; /* Weak references from this row. */
- struct ovs_list dst_refs; /* Weak references to this row. */
+ struct hmap dst_refs; /* Weak references to this row. */
/* Number of strong refs to this row from other rows, in this table or
* other tables, through 'uuid' columns that have a 'refTable' constraint
@@ -69,6 +85,12 @@ struct ovsdb_row {
* index 'i' is contained in hmap table->indexes[i]. */
};
+uint32_t ovsdb_weak_ref_hash(const struct ovsdb_weak_ref *);
+struct ovsdb_weak_ref * ovsdb_row_find_weak_ref(const struct ovsdb_row *,
+ const struct ovsdb_weak_ref *);
+void ovsdb_weak_ref_destroy(struct ovsdb_weak_ref *);
+
+
struct ovsdb_row *ovsdb_row_create(const struct ovsdb_table *);
struct ovsdb_row *ovsdb_row_clone(const struct ovsdb_row *);
void ovsdb_row_destroy(struct ovsdb_row *);
diff --git a/ovsdb/storage.c b/ovsdb/storage.c
index d727b1eacd..d4984be250 100644
--- a/ovsdb/storage.c
+++ b/ovsdb/storage.c
@@ -268,9 +268,7 @@ ovsdb_storage_read(struct ovsdb_storage *storage,
struct json *schema_json = NULL;
struct json *txn_json = NULL;
if (storage->raft) {
- bool is_snapshot;
- json = json_nullable_clone(
- raft_next_entry(storage->raft, txnid, &is_snapshot));
+ json = raft_next_entry(storage->raft, txnid);
if (!json) {
return NULL;
} else if (json->type != JSON_ARRAY || json->array.n != 2) {
@@ -509,7 +507,11 @@ schedule_next_snapshot(struct ovsdb_storage *storage, bool quick)
long long int now = time_msec();
storage->next_snapshot_min = now + base + random_range(range);
- storage->next_snapshot_max = now + 60LL * 60 * 24 * 1000; /* 1 day */
+ if (!quick) {
+ long long int one_day = 60LL * 60 * 24 * 1000;
+
+ storage->next_snapshot_max = now + one_day;
+ }
} else {
storage->next_snapshot_min = LLONG_MAX;
storage->next_snapshot_max = LLONG_MAX;
@@ -517,7 +519,7 @@ schedule_next_snapshot(struct ovsdb_storage *storage, bool quick)
}
bool
-ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage)
+ovsdb_storage_should_snapshot(struct ovsdb_storage *storage)
{
if (storage->raft || storage->log) {
/* If we haven't reached the minimum snapshot time, don't snapshot. */
@@ -546,6 +548,15 @@ ovsdb_storage_should_snapshot(const struct ovsdb_storage *storage)
}
if (!snapshot_recommended) {
+ if (storage->raft) {
+ /* Re-scheduling with a quick retry in order to avoid condition
+ * where all the raft servers passed the minimal time already,
+ * but the log didn't grow a lot, so they are all checking on
+ * every iteration. This will randomize the time of the next
+ * attempt, so all the servers will not start snapshotting at
+ * the same time when the log reaches a critical size. */
+ schedule_next_snapshot(storage, true);
+ }
return false;
}
diff --git a/ovsdb/storage.h b/ovsdb/storage.h
index e120094d7a..ff026b77fa 100644
--- a/ovsdb/storage.h
+++ b/ovsdb/storage.h
@@ -76,7 +76,7 @@ uint64_t ovsdb_write_get_commit_index(const struct ovsdb_write *);
void ovsdb_write_wait(const struct ovsdb_write *);
void ovsdb_write_destroy(struct ovsdb_write *);
-bool ovsdb_storage_should_snapshot(const struct ovsdb_storage *);
+bool ovsdb_storage_should_snapshot(struct ovsdb_storage *);
struct ovsdb_error *ovsdb_storage_store_snapshot(struct ovsdb_storage *storage,
const struct json *schema,
const struct json *snapshot)
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index 8ffefcf7c9..db86d847c3 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -41,6 +41,9 @@ struct ovsdb_txn {
struct ovs_list txn_tables; /* Contains "struct ovsdb_txn_table"s. */
struct ds comment;
struct uuid txnid; /* For clustered mode only. It is the eid. */
+ size_t n_atoms; /* Number of atoms in all transaction rows. */
+ ssize_t n_atoms_diff; /* Difference between number of added and
+ * removed atoms. */
};
/* A table modified by a transaction. */
@@ -86,6 +89,10 @@ struct ovsdb_txn_row {
struct uuid uuid;
struct ovsdb_table *table;
+ /* Weak refs that needs to be added/deleted to/from destination rows. */
+ struct ovs_list added_refs;
+ struct ovs_list deleted_refs;
+
/* Used by for_each_txn_row(). */
unsigned int serial; /* Serial number of in-progress commit. */
@@ -151,6 +158,23 @@ ovsdb_txn_row_abort(struct ovsdb_txn *txn OVS_UNUSED,
} else {
hmap_replace(&new->table->rows, &new->hmap_node, &old->hmap_node);
}
+
+ struct ovsdb_weak_ref *weak, *next;
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) {
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
+ if (hmap_node_is_null(&weak->dst_node)) {
+ ovsdb_weak_ref_destroy(weak);
+ }
+ }
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) {
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
+ if (hmap_node_is_null(&weak->dst_node)) {
+ ovsdb_weak_ref_destroy(weak);
+ }
+ }
+
ovsdb_row_destroy(new);
free(txn_row);
@@ -266,9 +290,9 @@ ovsdb_txn_adjust_atom_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r,
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
ovsdb_txn_adjust_row_refs(struct ovsdb_txn *txn, const struct ovsdb_row *r,
- const struct ovsdb_column *column, int delta)
+ const struct ovsdb_column *column,
+ const struct ovsdb_datum *field, int delta)
{
- const struct ovsdb_datum *field = &r->fields[column->index];
struct ovsdb_error *error;
error = ovsdb_txn_adjust_atom_refs(txn, r, column, &column->type.key,
@@ -291,14 +315,39 @@ update_row_ref_count(struct ovsdb_txn *txn, struct ovsdb_txn_row *r)
struct ovsdb_error *error;
if (bitmap_is_set(r->changed, column->index)) {
- if (r->old) {
- error = ovsdb_txn_adjust_row_refs(txn, r->old, column, -1);
+ if (r->old && !r->new) {
+ error = ovsdb_txn_adjust_row_refs(
+ txn, r->old, column,
+ &r->old->fields[column->index], -1);
if (error) {
return OVSDB_WRAP_BUG("error decreasing refcount", error);
}
- }
- if (r->new) {
- error = ovsdb_txn_adjust_row_refs(txn, r->new, column, 1);
+ } else if (!r->old && r->new) {
+ error = ovsdb_txn_adjust_row_refs(
+ txn, r->new, column,
+ &r->new->fields[column->index], 1);
+ if (error) {
+ return error;
+ }
+ } else if (r->old && r->new) {
+ struct ovsdb_datum added, removed;
+
+ ovsdb_datum_added_removed(&added, &removed,
+ &r->old->fields[column->index],
+ &r->new->fields[column->index],
+ &column->type);
+
+ error = ovsdb_txn_adjust_row_refs(
+ txn, r->old, column, &removed, -1);
+ ovsdb_datum_destroy(&removed, &column->type);
+ if (error) {
+ ovsdb_datum_destroy(&added, &column->type);
+ return OVSDB_WRAP_BUG("error decreasing refcount", error);
+ }
+
+ error = ovsdb_txn_adjust_row_refs(
+ txn, r->new, column, &added, 1);
+ ovsdb_datum_destroy(&added, &column->type);
if (error) {
return error;
}
@@ -459,93 +508,125 @@ static struct ovsdb_error *
ovsdb_txn_update_weak_refs(struct ovsdb_txn *txn OVS_UNUSED,
struct ovsdb_txn_row *txn_row)
{
- struct ovsdb_weak_ref *weak, *next;
+ struct ovsdb_weak_ref *weak, *next, *dst_weak;
+ struct ovsdb_row *dst_row;
- /* Remove the weak references originating in the old version of the row. */
- if (txn_row->old) {
- LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->old->src_refs) {
- ovs_list_remove(&weak->src_node);
- ovs_list_remove(&weak->dst_node);
- free(weak);
+ /* Find and clean up deleted references from destination rows. */
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) {
+ dst_row = CONST_CAST(struct ovsdb_row *,
+ ovsdb_table_get_row(weak->dst_table, &weak->dst));
+ if (dst_row) {
+ dst_weak = ovsdb_row_find_weak_ref(dst_row, weak);
+ hmap_remove(&dst_row->dst_refs, &dst_weak->dst_node);
+ ovs_assert(ovs_list_is_empty(&dst_weak->src_node));
+ ovsdb_weak_ref_destroy(dst_weak);
+ }
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
+ if (hmap_node_is_null(&weak->dst_node)) {
+ ovsdb_weak_ref_destroy(weak);
}
}
- /* Although the originating rows have the responsibility of updating the
- * weak references in the dst, it is possible that some source rows aren't
- * part of the transaction. In that situation this row needs to move the
- * list of incoming weak references from the old row into the new one.
- */
- if (txn_row->old && txn_row->new) {
- /* Move the incoming weak references from old to new. */
- ovs_list_push_back_all(&txn_row->new->dst_refs,
- &txn_row->old->dst_refs);
- }
-
- /* Insert the weak references originating in the new version of the row. */
- struct ovsdb_row *dst_row;
- if (txn_row->new) {
- LIST_FOR_EACH (weak, src_node, &txn_row->new->src_refs) {
- /* dst_row MUST exist. */
- dst_row = CONST_CAST(struct ovsdb_row *,
+ /* Insert the weak references added in the new version of the row. */
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->added_refs) {
+ dst_row = CONST_CAST(struct ovsdb_row *,
ovsdb_table_get_row(weak->dst_table, &weak->dst));
- ovs_list_insert(&dst_row->dst_refs, &weak->dst_node);
- }
+
+ ovs_assert(!ovsdb_row_find_weak_ref(dst_row, weak));
+ hmap_insert(&dst_row->dst_refs, &weak->dst_node,
+ ovsdb_weak_ref_hash(weak));
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
}
return NULL;
}
static void
-add_weak_ref(const struct ovsdb_row *src_, const struct ovsdb_row *dst_)
+add_weak_ref(struct ovsdb_txn_row *txn_row, const struct ovsdb_row *dst_,
+ struct ovs_list *ref_list,
+ const union ovsdb_atom *key, const union ovsdb_atom *value,
+ bool by_key, const struct ovsdb_column *column)
{
- struct ovsdb_row *src = CONST_CAST(struct ovsdb_row *, src_);
struct ovsdb_row *dst = CONST_CAST(struct ovsdb_row *, dst_);
struct ovsdb_weak_ref *weak;
- if (src == dst) {
+ if (txn_row->new == dst) {
return;
}
- if (!ovs_list_is_empty(&dst->dst_refs)) {
- /* Omit duplicates. */
- weak = CONTAINER_OF(ovs_list_back(&dst->dst_refs),
- struct ovsdb_weak_ref, dst_node);
- if (weak->src == src) {
- return;
- }
- }
-
- weak = xmalloc(sizeof *weak);
- weak->src = src;
+ weak = xzalloc(sizeof *weak);
+ weak->src_table = txn_row->new->table;
+ weak->src = *ovsdb_row_get_uuid(txn_row->new);
weak->dst_table = dst->table;
weak->dst = *ovsdb_row_get_uuid(dst);
- /* The dst_refs list is updated at commit time. */
- ovs_list_init(&weak->dst_node);
- ovs_list_push_back(&src->src_refs, &weak->src_node);
+ ovsdb_type_clone(&weak->type, &column->type);
+ ovsdb_atom_clone(&weak->key, key, column->type.key.type);
+ if (column->type.value.type != OVSDB_TYPE_VOID) {
+ ovsdb_atom_clone(&weak->value, value, column->type.value.type);
+ }
+ weak->by_key = by_key;
+ weak->column_idx = column->index;
+ hmap_node_nullify(&weak->dst_node);
+ ovs_list_push_back(ref_list, &weak->src_node);
+}
+
+static void
+find_and_add_weak_ref(struct ovsdb_txn_row *txn_row,
+ const union ovsdb_atom *key,
+ const union ovsdb_atom *value,
+ const struct ovsdb_column *column,
+ bool by_key, struct ovs_list *ref_list,
+ struct ovsdb_datum *not_found, bool *zero)
+{
+ const struct ovsdb_row *row = by_key
+ ? ovsdb_table_get_row(column->type.key.uuid.refTable, &key->uuid)
+ : ovsdb_table_get_row(column->type.value.uuid.refTable, &value->uuid);
+
+ if (row) {
+ add_weak_ref(txn_row, row, ref_list, key, value, by_key, column);
+ } else if (not_found) {
+ if (uuid_is_zero(by_key ? &key->uuid : &value->uuid)) {
+ *zero = true;
+ }
+ ovsdb_datum_add_unsafe(not_found, key, value, &column->type, NULL);
+ }
}
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
{
+ struct ovsdb_weak_ref *weak, *next;
struct ovsdb_table *table;
struct shash_node *node;
if (txn_row->old && !txn_row->new) {
/* Mark rows that have weak references to 'txn_row' as modified, so
- * that their weak references will get reassessed. */
- struct ovsdb_weak_ref *weak, *next;
-
- LIST_FOR_EACH_SAFE (weak, next, dst_node, &txn_row->old->dst_refs) {
- if (!weak->src->txn_row) {
- ovsdb_txn_row_modify(txn, weak->src);
+ * that their weak references will get reassessed. Adding all weak
+ * refs to 'deleted_ref' lists of their source rows, so they will be
+ * cleaned up from datums and deleted on commit. */
+
+ HMAP_FOR_EACH (weak, dst_node, &txn_row->old->dst_refs) {
+ struct ovsdb_txn_row *src_txn_row;
+
+ src_txn_row = find_or_make_txn_row(txn, weak->src_table,
+ &weak->src);
+ if (!src_txn_row) {
+ /* Source row is also removed. */
+ continue;
}
+ ovs_assert(src_txn_row);
+ ovs_assert(ovs_list_is_empty(&weak->src_node));
+ ovs_list_insert(&src_txn_row->deleted_refs, &weak->src_node);
}
}
if (!txn_row->new) {
- /* We don't have to do anything about references that originate at
- * 'txn_row', because ovsdb_row_destroy() will remove those weak
- * references. */
+ /* Since all the atoms will be destroyed by the ovsdb_row_destroy(),
+ * there is no need to check them here. Source references queued
+ * into 'deleted_ref' while removing other rows will be cleaned up at
+ * commit time. */
return NULL;
}
@@ -553,50 +634,94 @@ assess_weak_refs(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
SHASH_FOR_EACH (node, &table->schema->columns) {
const struct ovsdb_column *column = node->data;
struct ovsdb_datum *datum = &txn_row->new->fields[column->index];
+ struct ovsdb_datum added, removed, deleted_refs;
unsigned int orig_n, i;
bool zero = false;
orig_n = datum->n;
+ /* Collecting all key-value pairs that references deleted rows. */
+ ovsdb_datum_init_empty(&deleted_refs);
+ LIST_FOR_EACH_SAFE (weak, next, src_node, &txn_row->deleted_refs) {
+ if (column->index == weak->column_idx) {
+ ovsdb_datum_add_unsafe(&deleted_refs, &weak->key, &weak->value,
+ &column->type, NULL);
+ ovs_list_remove(&weak->src_node);
+ ovs_list_init(&weak->src_node);
+ }
+ }
+ ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type,
+ column->type.value.type);
+
+ /* Removing elements that references deleted rows. */
+ ovsdb_datum_subtract(datum, &column->type,
+ &deleted_refs, &column->type);
+ ovsdb_datum_destroy(&deleted_refs, &column->type);
+
+ /* Generating the difference between old and new data. */
+ if (txn_row->old) {
+ ovsdb_datum_added_removed(&added, &removed,
+ &txn_row->old->fields[column->index],
+ datum, &column->type);
+ } else {
+ ovsdb_datum_init_empty(&removed);
+ ovsdb_datum_clone(&added, datum, &column->type);
+ }
+
+ /* Checking added data and creating new references. */
+ ovsdb_datum_init_empty(&deleted_refs);
if (ovsdb_base_type_is_weak_ref(&column->type.key)) {
- for (i = 0; i < datum->n; ) {
- const struct ovsdb_row *row;
-
- row = ovsdb_table_get_row(column->type.key.uuid.refTable,
- &datum->keys[i].uuid);
- if (row) {
- add_weak_ref(txn_row->new, row);
- i++;
- } else {
- if (uuid_is_zero(&datum->keys[i].uuid)) {
- zero = true;
- }
- ovsdb_datum_remove_unsafe(datum, i, &column->type);
- }
+ for (i = 0; i < added.n; i++) {
+ find_and_add_weak_ref(txn_row, &added.keys[i],
+ added.values ? &added.values[i] : NULL,
+ column, true, &txn_row->added_refs,
+ &deleted_refs, &zero);
}
}
if (ovsdb_base_type_is_weak_ref(&column->type.value)) {
- for (i = 0; i < datum->n; ) {
- const struct ovsdb_row *row;
-
- row = ovsdb_table_get_row(column->type.value.uuid.refTable,
- &datum->values[i].uuid);
- if (row) {
- add_weak_ref(txn_row->new, row);
- i++;
- } else {
- if (uuid_is_zero(&datum->values[i].uuid)) {
- zero = true;
- }
- ovsdb_datum_remove_unsafe(datum, i, &column->type);
- }
+ for (i = 0; i < added.n; i++) {
+ find_and_add_weak_ref(txn_row, &added.keys[i],
+ &added.values[i],
+ column, false, &txn_row->added_refs,
+ &deleted_refs, &zero);
+ }
+ }
+ if (deleted_refs.n) {
+ /* Removing all the references that doesn't point to valid rows. */
+ ovsdb_datum_sort_unique(&deleted_refs, column->type.key.type,
+ column->type.value.type);
+ ovsdb_datum_subtract(datum, &column->type,
+ &deleted_refs, &column->type);
+ ovsdb_datum_destroy(&deleted_refs, &column->type);
+ }
+ ovsdb_datum_destroy(&added, &column->type);
+
+ /* Creating refs that needs to be removed on commit. This includes
+ * both: the references that got directly removed from the datum and
+ * references removed due to deletion of a referenced row. */
+ if (ovsdb_base_type_is_weak_ref(&column->type.key)) {
+ for (i = 0; i < removed.n; i++) {
+ find_and_add_weak_ref(txn_row, &removed.keys[i],
+ removed.values
+ ? &removed.values[i] : NULL,
+ column, true, &txn_row->deleted_refs,
+ NULL, NULL);
}
}
+ if (ovsdb_base_type_is_weak_ref(&column->type.value)) {
+ for (i = 0; i < removed.n; i++) {
+ find_and_add_weak_ref(txn_row, &removed.keys[i],
+ &removed.values[i],
+ column, false, &txn_row->deleted_refs,
+ NULL, NULL);
+ }
+ }
+ ovsdb_datum_destroy(&removed, &column->type);
+
if (datum->n != orig_n) {
bitmap_set1(txn_row->changed, column->index);
- ovsdb_datum_sort_assert(datum, column->type.key.type);
if (datum->n < column->type.n_min) {
const struct uuid *row_uuid = ovsdb_row_get_uuid(txn_row->new);
if (zero && !txn_row->old) {
@@ -817,6 +942,37 @@ check_index_uniqueness(struct ovsdb_txn *txn OVS_UNUSED,
return NULL;
}
+static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
+count_atoms(struct ovsdb_txn *txn, struct ovsdb_txn_row *txn_row)
+{
+ struct ovsdb_table *table = txn_row->table;
+ ssize_t n_atoms_old = 0, n_atoms_new = 0;
+ struct shash_node *node;
+
+ SHASH_FOR_EACH (node, &table->schema->columns) {
+ const struct ovsdb_column *column = node->data;
+ const struct ovsdb_type *type = &column->type;
+ unsigned int idx = column->index;
+
+ if (txn_row->old) {
+ n_atoms_old += txn_row->old->fields[idx].n;
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ n_atoms_old += txn_row->old->fields[idx].n;
+ }
+ }
+ if (txn_row->new) {
+ n_atoms_new += txn_row->new->fields[idx].n;
+ if (type->value.type != OVSDB_TYPE_VOID) {
+ n_atoms_new += txn_row->new->fields[idx].n;
+ }
+ }
+ }
+
+ txn->n_atoms += n_atoms_old + n_atoms_new;
+ txn->n_atoms_diff += n_atoms_new - n_atoms_old;
+ return NULL;
+}
+
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
update_version(struct ovsdb_txn *txn OVS_UNUSED, struct ovsdb_txn_row *txn_row)
{
@@ -885,6 +1041,12 @@ ovsdb_txn_precommit(struct ovsdb_txn *txn)
return error;
}
+ /* Count atoms. */
+ error = for_each_txn_row(txn, count_atoms);
+ if (error) {
+ return OVSDB_WRAP_BUG("can't happen", error);
+ }
+
/* Update _version for rows that changed. */
error = for_each_txn_row(txn, update_version);
if (error) {
@@ -900,6 +1062,8 @@ ovsdb_txn_clone(const struct ovsdb_txn *txn)
struct ovsdb_txn *txn_cloned = xzalloc(sizeof *txn_cloned);
ovs_list_init(&txn_cloned->txn_tables);
txn_cloned->txnid = txn->txnid;
+ txn_cloned->n_atoms = txn->n_atoms;
+ txn_cloned->n_atoms_diff = txn->n_atoms_diff;
struct ovsdb_txn_table *t;
LIST_FOR_EACH (t, node, &txn->txn_tables) {
@@ -958,6 +1122,7 @@ ovsdb_txn_add_to_history(struct ovsdb_txn *txn)
node->txn = ovsdb_txn_clone(txn);
ovs_list_push_back(&txn->db->txn_history, &node->node);
txn->db->n_txn_history++;
+ txn->db->n_txn_history_atoms += txn->n_atoms;
}
}
@@ -968,6 +1133,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn)
if (!ovsdb_txn_is_empty(txn)) {
txn->db->run_triggers_now = txn->db->run_triggers = true;
+ txn->db->n_atoms += txn->n_atoms_diff;
ovsdb_monitors_commit(txn->db, txn);
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs));
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit));
@@ -1215,6 +1381,9 @@ ovsdb_txn_row_create(struct ovsdb_txn *txn, struct ovsdb_table *table,
txn_row->n_refs = old ? old->n_refs : 0;
txn_row->serial = serial - 1;
+ ovs_list_init(&txn_row->added_refs);
+ ovs_list_init(&txn_row->deleted_refs);
+
if (old) {
old->txn_row = txn_row;
}
@@ -1423,12 +1592,20 @@ ovsdb_txn_history_run(struct ovsdb *db)
if (!db->need_txn_history) {
return;
}
- /* Remove old histories to limit the size of the history */
- while (db->n_txn_history > 100) {
+ /* Remove old histories to limit the size of the history. Removing until
+ * the number of ovsdb atoms in history becomes less than the number of
+ * atoms in the database, because it will be faster to just get a database
+ * snapshot than re-constructing changes from the history that big.
+ * Keeping at least one transaction to avoid sending UUID_ZERO as a last id
+ * if all entries got removed due to the size limit. */
+ while (db->n_txn_history > 1 &&
+ (db->n_txn_history > 100 ||
+ db->n_txn_history_atoms > db->n_atoms)) {
struct ovsdb_txn_history_node *txn_h_node = CONTAINER_OF(
ovs_list_pop_front(&db->txn_history),
struct ovsdb_txn_history_node, node);
+ db->n_txn_history_atoms -= txn_h_node->txn->n_atoms;
ovsdb_txn_destroy_cloned(txn_h_node->txn);
free(txn_h_node);
db->n_txn_history--;
@@ -1440,6 +1617,7 @@ ovsdb_txn_history_init(struct ovsdb *db, bool need_txn_history)
{
db->need_txn_history = need_txn_history;
db->n_txn_history = 0;
+ db->n_txn_history_atoms = 0;
ovs_list_init(&db->txn_history);
}
@@ -1458,4 +1636,5 @@ ovsdb_txn_history_destroy(struct ovsdb *db)
free(txn_h_node);
}
db->n_txn_history = 0;
+ db->n_txn_history_atoms = 0;
}
diff --git a/python/ovs/db/data.py b/python/ovs/db/data.py
index 2a2102d6be..99bf80ed62 100644
--- a/python/ovs/db/data.py
+++ b/python/ovs/db/data.py
@@ -204,7 +204,7 @@ class Atom(object):
else:
return '.boolean = false'
elif self.type == ovs.db.types.StringType:
- return '.string = "%s"' % escapeCString(self.value)
+ return '.s = %s' % escapeCString(self.value)
elif self.type == ovs.db.types.UuidType:
return '.uuid = %s' % ovs.ovsuuid.to_c_assignment(self.value)
@@ -563,16 +563,41 @@ class Datum(object):
if n == 0:
return ["static struct ovsdb_datum %s = { .n = 0 };"]
- s = ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)]
- for key in sorted(self.values):
- s += [" { %s }," % key.cInitAtom(key)]
- s += ["};"]
+ s = []
+ if self.type.key.type == ovs.db.types.StringType:
+ s += ["static struct ovsdb_atom_string %s_key_strings[%d] = {"
+ % (name, n)]
+ for key in sorted(self.values):
+ s += [' { .string = "%s", .n_refs = 2 },'
+ % escapeCString(key.value)]
+ s += ["};"]
+ s += ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)]
+ for i in range(n):
+ s += [" { .s = &%s_key_strings[%d] }," % (name, i)]
+ s += ["};"]
+ else:
+ s = ["static union ovsdb_atom %s_keys[%d] = {" % (name, n)]
+ for key in sorted(self.values):
+ s += [" { %s }," % key.cInitAtom(key)]
+ s += ["};"]
if self.type.value:
- s = ["static union ovsdb_atom %s_values[%d] = {" % (name, n)]
- for k, v in sorted(self.values.items()):
- s += [" { %s }," % v.cInitAtom(v)]
- s += ["};"]
+ if self.type.value.type == ovs.db.types.StringType:
+ s += ["static struct ovsdb_atom_string %s_val_strings[%d] = {"
+ % (name, n)]
+ for k, v in sorted(self.values):
+ s += [' { .string = "%s", .n_refs = 2 },'
+ % escapeCString(v.value)]
+ s += ["};"]
+ s += ["static union ovsdb_atom %s_values[%d] = {" % (name, n)]
+ for i in range(n):
+ s += [" { .s = &%s_val_strings[%d] }," % (name, i)]
+ s += ["};"]
+ else:
+ s = ["static union ovsdb_atom %s_values[%d] = {" % (name, n)]
+ for k, v in sorted(self.values.items()):
+ s += [" { %s }," % v.cInitAtom(v)]
+ s += ["};"]
s += ["static struct ovsdb_datum %s = {" % name]
s += [" .n = %d," % n]
diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
index ecae5e1432..87ee06cdef 100644
--- a/python/ovs/db/idl.py
+++ b/python/ovs/db/idl.py
@@ -1505,6 +1505,11 @@ class Transaction(object):
if self != self.idl.txn:
return self._status
+ if self.idl.state != Idl.IDL_S_MONITORING:
+ self._status = Transaction.TRY_AGAIN
+ self.__disassemble()
+ return self._status
+
# If we need a lock but don't have it, give up quickly.
if self.idl.lock_name and not self.idl.has_lock:
self._status = Transaction.NOT_LOCKED
diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py
index 626ae8fc44..3318a3b6f8 100644
--- a/python/ovs/db/types.py
+++ b/python/ovs/db/types.py
@@ -48,6 +48,16 @@ class AtomicType(object):
def to_string(self):
return self.name
+ def to_rvalue_string(self):
+ if self == StringType:
+ return 's->' + self.name
+ return self.name
+
+ def to_lvalue_string(self):
+ if self == StringType:
+ return 's'
+ return self.name
+
def to_json(self):
return self.name
@@ -373,18 +383,7 @@ class BaseType(object):
return "%(dst)s = *%(src)s;" % args
return ("%(dst)s = %(src)s->header_.uuid;") % args
elif self.type == StringType:
- return "%(dst)s = xstrdup(%(src)s);" % args
- else:
- return "%(dst)s = %(src)s;" % args
-
- def assign_c_value_casting_away_const(self, dst, src, refTable=True):
- args = {'dst': dst, 'src': src}
- if self.ref_table_name:
- if not refTable:
- return "%(dst)s = *%(src)s;" % args
- return ("%(dst)s = %(src)s->header_.uuid;") % args
- elif self.type == StringType:
- return "%(dst)s = CONST_CAST(char *, %(src)s);" % args
+ return "%(dst)s = ovsdb_atom_string_create(%(src)s);" % args
else:
return "%(dst)s = %(src)s;" % args
diff --git a/python/ovs/poller.py b/python/ovs/poller.py
index 3624ec8655..157719c3a4 100644
--- a/python/ovs/poller.py
+++ b/python/ovs/poller.py
@@ -26,9 +26,9 @@ if sys.platform == "win32":
import ovs.winutils as winutils
try:
- from OpenSSL import SSL
+ import ssl
except ImportError:
- SSL = None
+ ssl = None
try:
from eventlet import patcher as eventlet_patcher
@@ -73,7 +73,7 @@ class _SelectSelect(object):
def register(self, fd, events):
if isinstance(fd, socket.socket):
fd = fd.fileno()
- if SSL and isinstance(fd, SSL.Connection):
+ if ssl and isinstance(fd, ssl.SSLSocket):
fd = fd.fileno()
if sys.platform != 'win32':
diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py
index c4c6c87e9f..6b0d023ae3 100644
--- a/python/ovs/reconnect.py
+++ b/python/ovs/reconnect.py
@@ -44,7 +44,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return None
@staticmethod
@@ -56,7 +56,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return None
@staticmethod
@@ -68,7 +68,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return fsm.state_entered + fsm.backoff
@staticmethod
@@ -80,7 +80,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return fsm.state_entered + max(1000, fsm.backoff)
@staticmethod
@@ -92,13 +92,24 @@ class Reconnect(object):
is_connected = True
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
if fsm.probe_interval:
base = max(fsm.last_activity, fsm.state_entered)
expiration = base + fsm.probe_interval
- if (fsm.last_receive_attempt is None or
+ if (now < expiration or
+ fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
+ # We still have time before the expiration or the time has
+ # already passed and there was no activity. In the first
+ # case we need to wait for the expiration, in the second -
+ # we're already past the deadline. */
return expiration
+ else:
+ # Time has already passed, but we didn't attempt to receive
+ # anything. We need to wake up and try to receive even if
+ # nothing is pending, so we can update the expiration time
+ # or transition to a different state.
+ return now + 1
return None
@staticmethod
@@ -114,12 +125,15 @@ class Reconnect(object):
is_connected = True
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
if fsm.probe_interval:
expiration = fsm.state_entered + fsm.probe_interval
- if (fsm.last_receive_attempt is None or
+ if (now < expiration or
+ fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
return expiration
+ else:
+ return now + 1
return None
@staticmethod
@@ -134,7 +148,7 @@ class Reconnect(object):
is_connected = False
@staticmethod
- def deadline(fsm):
+ def deadline(fsm, now):
return fsm.state_entered
@staticmethod
@@ -545,7 +559,7 @@ class Reconnect(object):
returned if the "probe interval" is nonzero--see
self.set_probe_interval())."""
- deadline = self.state.deadline(self)
+ deadline = self.state.deadline(self, now)
if deadline is not None and now >= deadline:
return self.state.run(self, now)
else:
@@ -562,7 +576,7 @@ class Reconnect(object):
"""Returns the number of milliseconds after which self.run() should be
called if nothing else notable happens in the meantime, or None if this
is currently unnecessary."""
- deadline = self.state.deadline(self)
+ deadline = self.state.deadline(self, now)
if deadline is not None:
remaining = deadline - now
return max(0, remaining)
diff --git a/python/ovs/socket_util.py b/python/ovs/socket_util.py
index 3faa64e9d7..651012bf06 100644
--- a/python/ovs/socket_util.py
+++ b/python/ovs/socket_util.py
@@ -222,8 +222,7 @@ def inet_parse_active(target, default_port):
return (host_name, port)
-def inet_open_active(style, target, default_port, dscp):
- address = inet_parse_active(target, default_port)
+def inet_create_socket_active(style, address):
try:
is_addr_inet = is_valid_ipv4_address(address[0])
if is_addr_inet:
@@ -235,23 +234,32 @@ def inet_open_active(style, target, default_port, dscp):
except socket.error as e:
return get_exception_errno(e), None
+ return family, sock
+
+
+def inet_connect_active(sock, address, family, dscp):
try:
set_nonblocking(sock)
set_dscp(sock, family, dscp)
- try:
- sock.connect(address)
- except socket.error as e:
- error = get_exception_errno(e)
- if sys.platform == 'win32' and error == errno.WSAEWOULDBLOCK:
- # WSAEWOULDBLOCK would be the equivalent on Windows
- # for EINPROGRESS on Unix.
- error = errno.EINPROGRESS
- if error != errno.EINPROGRESS:
- raise
- return 0, sock
+ error = sock.connect_ex(address)
+ if error not in (0, errno.EINPROGRESS, errno.EWOULDBLOCK):
+ sock.close()
+ return error
+ return 0
except socket.error as e:
sock.close()
- return get_exception_errno(e), None
+ return get_exception_errno(e)
+
+
+def inet_open_active(style, target, default_port, dscp):
+ address = inet_parse_active(target, default_port)
+ family, sock = inet_create_socket_active(style, address)
+ if sock is None:
+ return family, sock
+ error = inet_connect_active(sock, address, family, dscp)
+ if error:
+ return error, None
+ return 0, sock
def get_exception_errno(e):
diff --git a/python/ovs/stream.py b/python/ovs/stream.py
index f5a520862c..ac5b0fd0c6 100644
--- a/python/ovs/stream.py
+++ b/python/ovs/stream.py
@@ -22,9 +22,9 @@ import ovs.socket_util
import ovs.vlog
try:
- from OpenSSL import SSL
+ import ssl
except ImportError:
- SSL = None
+ ssl = None
if sys.platform == 'win32':
import ovs.winutils as winutils
@@ -322,6 +322,12 @@ class Stream(object):
The recv function will not block waiting for data to arrive. If no
data have been received, it returns (errno.EAGAIN, "") immediately."""
+ try:
+ return self._recv(n)
+ except socket.error as e:
+ return (ovs.socket_util.get_exception_errno(e), "")
+
+ def _recv(self, n):
retval = self.connect()
if retval != 0:
return (retval, "")
@@ -331,10 +337,7 @@ class Stream(object):
if sys.platform == 'win32' and self.socket is None:
return self.__recv_windows(n)
- try:
- return (0, self.socket.recv(n))
- except socket.error as e:
- return (ovs.socket_util.get_exception_errno(e), "")
+ return (0, self.socket.recv(n))
def __recv_windows(self, n):
if self._read_pending:
@@ -396,6 +399,12 @@ class Stream(object):
Will not block. If no bytes can be immediately accepted for
transmission, returns -errno.EAGAIN immediately."""
+ try:
+ return self._send(buf)
+ except socket.error as e:
+ return -ovs.socket_util.get_exception_errno(e)
+
+ def _send(self, buf):
retval = self.connect()
if retval != 0:
return -retval
@@ -409,10 +418,7 @@ class Stream(object):
if sys.platform == 'win32' and self.socket is None:
return self.__send_windows(buf)
- try:
- return self.socket.send(buf)
- except socket.error as e:
- return -ovs.socket_util.get_exception_errno(e)
+ return self.socket.send(buf)
def __send_windows(self, buf):
if self._write_pending:
@@ -769,35 +775,42 @@ class SSLStream(Stream):
def check_connection_completion(sock):
try:
return Stream.check_connection_completion(sock)
- except SSL.SysCallError as e:
+ except ssl.SSLSyscallError as e:
return ovs.socket_util.get_exception_errno(e)
@staticmethod
def needs_probes():
return True
- @staticmethod
- def verify_cb(conn, cert, errnum, depth, ok):
- return ok
-
@staticmethod
def _open(suffix, dscp):
- error, sock = TCPStream._open(suffix, dscp)
- if error:
- return error, None
+ address = ovs.socket_util.inet_parse_active(suffix, 0)
+ family, sock = ovs.socket_util.inet_create_socket_active(
+ socket.SOCK_STREAM, address)
+ if sock is None:
+ return family, sock
# Create an SSL context
- ctx = SSL.Context(SSL.SSLv23_METHOD)
- ctx.set_verify(SSL.VERIFY_PEER, SSLStream.verify_cb)
- ctx.set_options(SSL.OP_NO_SSLv2 | SSL.OP_NO_SSLv3)
+ ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ ctx.verify_mode = ssl.CERT_REQUIRED
+ ctx.options |= ssl.OP_NO_SSLv2
+ ctx.options |= ssl.OP_NO_SSLv3
# If the client has not set the SSL configuration files
# exception would be raised.
- ctx.use_privatekey_file(Stream._SSL_private_key_file)
- ctx.use_certificate_file(Stream._SSL_certificate_file)
ctx.load_verify_locations(Stream._SSL_ca_cert_file)
+ ctx.load_cert_chain(Stream._SSL_certificate_file,
+ Stream._SSL_private_key_file)
+ ssl_sock = ctx.wrap_socket(sock, do_handshake_on_connect=False)
- ssl_sock = SSL.Connection(ctx, sock)
- ssl_sock.set_connect_state()
+ # Connect
+ error = ovs.socket_util.inet_connect_active(ssl_sock, address, family,
+ dscp)
+ if not error:
+ try:
+ ssl_sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+ except socket.error as e:
+ ssl_sock.close()
+ return ovs.socket_util.get_exception_errno(e), None
return error, ssl_sock
def connect(self):
@@ -809,40 +822,44 @@ class SSLStream(Stream):
# TCP Connection is successful. Now do the SSL handshake
try:
self.socket.do_handshake()
- except SSL.WantReadError:
+ except ssl.SSLWantReadError:
return errno.EAGAIN
- except SSL.SysCallError as e:
+ except ssl.SSLSyscallError as e:
return ovs.socket_util.get_exception_errno(e)
return 0
def recv(self, n):
try:
- return super(SSLStream, self).recv(n)
- except SSL.WantReadError:
+ return super(SSLStream, self)._recv(n)
+ except ssl.SSLWantReadError:
return (errno.EAGAIN, "")
- except SSL.SysCallError as e:
+ except ssl.SSLSyscallError as e:
return (ovs.socket_util.get_exception_errno(e), "")
- except SSL.ZeroReturnError:
+ except ssl.SSLZeroReturnError:
return (0, "")
+ except socket.error as e:
+ return (ovs.socket_util.get_exception_errno(e), "")
def send(self, buf):
try:
- return super(SSLStream, self).send(buf)
- except SSL.WantWriteError:
+ return super(SSLStream, self)._send(buf)
+ except ssl.SSLWantWriteError:
return -errno.EAGAIN
- except SSL.SysCallError as e:
+ except ssl.SSLSyscallError as e:
+ return -ovs.socket_util.get_exception_errno(e)
+ except socket.error as e:
return -ovs.socket_util.get_exception_errno(e)
def close(self):
if self.socket:
try:
- self.socket.shutdown()
- except SSL.Error:
+ self.socket.shutdown(socket.SHUT_RDWR)
+ except socket.error:
pass
return super(SSLStream, self).close()
-if SSL:
+if ssl:
# Register SSL only if the OpenSSL module is available
Stream.register_method("ssl", SSLStream)
diff --git a/tests/flowgen.py b/tests/flowgen.py
index 7ef32d13cb..cb0e9df388 100755
--- a/tests/flowgen.py
+++ b/tests/flowgen.py
@@ -135,7 +135,7 @@ def output(attrs):
12893) # urgent pointer
if attrs['TP_PROTO'] == 'TCP+options':
tcp = (tcp[:12]
- + struct.pack('H', (6 << 12) | 0x02 | 0x10)
+ + struct.pack('>H', (6 << 12) | 0x02 | 0x10)
+ tcp[14:])
tcp += struct.pack('>BBH', 2, 4, 1975) # MSS option
tcp += b'payload'
@@ -166,15 +166,15 @@ def output(attrs):
ip = ip[:2] + struct.pack('>H', len(ip)) + ip[4:]
packet += ip
if attrs['DL_HEADER'].startswith('802.2'):
- packet_len = len(packet)
+ packet_len = len(packet) - 14
if flow['DL_VLAN'] != 0xffff:
packet_len -= 4
packet = (packet[:len_ofs]
+ struct.pack('>H', packet_len)
+ packet[len_ofs + 2:])
- print(' '.join(['%s=%s' for k, v in attrs.items()]))
- print(' '.join(['%s=%s' for k, v in flow.items()]))
+ print(' '.join(['%s=%s' % (k, v) for k, v in attrs.items()]))
+ print(' '.join(['%s=%s' % (k, v) for k, v in flow.items()]))
print()
flows.write(struct.pack('>LH',
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index 956a69e1fa..df62bb9e8a 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -4862,6 +4862,54 @@ recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,fr
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([ofproto-dpif - handling of malformed TCP packets])
+OVS_VSWITCHD_START
+add_of_ports br0 1 90
+
+dnl drop packet has tcp port 0-f but allow other tcp packets
+AT_DATA([flows.txt], [dnl
+priority=75 tcp tp_dst=0/0xfff0 actions=drop
+priority=50 tcp actions=output:1
+])
+AT_CHECK([ovs-ofctl replace-flows br0 flows.txt])
+
+dnl good tcp pkt, tcp(sport=100,dpor=16)
+pkt1="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a141414140064001000000000000000005002200053330000"
+
+dnl malformed tcp pkt(tcp_hdr < 20 byte), tcp(sport=100,dport=16,dataofs=1)
+pkt2="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a141414140064001000000000000000001002200093330000"
+
+dnl malformed tcp pkt(tcp_hdr > pkt_len), tcp(sport=100,dport=16,dataofs=15)
+pkt3="be95df40fb57fa163e5ee3570800450000280001000040063e940a0a0a0a14141414006400100000000000000000f002200093330000"
+
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
+
+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt1"], [0], [stdout])
+dnl for good tcp pkt, ovs can extract the tp_dst=16
+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=16/0xfff0), packets:0, bytes:0, used:never, actions:1
+])
+
+AT_CHECK([ovs-appctl revalidator/purge], [0], [stdout])
+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt2"], [0], [stdout])
+dnl for malformed tcp pkt(tcp_hdr < 20 byte), ovs uses default value tp_dst=0
+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=0/0xfff0), packets:0, bytes:0, used:never, actions:drop
+])
+
+AT_CHECK([ovs-appctl revalidator/purge], [0], [stdout])
+AT_CHECK([ovs-appctl netdev-dummy/receive p90 "$pkt3"], [0], [stdout])
+dnl for malformed tcp pkt(tcp_hdr > pkt_len), ovs uses default value tp_dst=0
+AT_CHECK([ovs-appctl dpctl/dump-flows filter=in_port\(90\),tcp], [0], [dnl
+flow-dump from the main thread:
+recirc_id(0),in_port(90),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=6,frag=no),tcp(dst=0/0xfff0), packets:0, bytes:0, used:never, actions:drop
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([ofproto-dpif - exit])
OVS_VSWITCHD_START
add_of_ports br0 1 2 3 10 11 12 13 14
@@ -7524,7 +7572,7 @@ dnl configure bridge IPFIX and ensure that sample action generation works at the
dnl datapath level.
AT_SETUP([ofproto-dpif - Bridge IPFIX sanity check])
OVS_VSWITCHD_START
-add_of_ports br0 1 2
+add_of_ports br0 1 2 3
dnl Sample every packet using bridge-based sampling.
AT_CHECK([ovs-vsctl -- set bridge br0 ipfix=@fix -- \
@@ -7540,6 +7588,28 @@ flow-dump from the main thread:
packets:2, bytes:68, used:0.001s, actions:userspace(pid=0,ipfix(output_port=4294967295))
])
+AT_CHECK([ovs-appctl revalidator/purge])
+
+dnl Check sample is performed even if only one of the ports is present.
+AT_DATA([flows.txt], [dnl
+table=0,in_port=3,tcp actions=load:0xffff->NXM_OF_IN_PORT[],ct(zone=1,table=1)
+table=1,tcp, actions=output:2
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+for i in `seq 1 3`; do
+ AT_CHECK([ovs-appctl netdev-dummy/receive p3 'in_port(3),eth(src=50:54:00:00:00:08,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=192.168.0.2,dst=192.168.0.1,proto=6,tos=0,ttl=64,frag=no)'])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/.*\(packets:\)/\1/' | sed 's/used:[[0-9]].[[0-9]]*s/used:0.001s/'], [0], [dnl
+flow-dump from the main thread:
+packets:2, bytes:236, used:0.001s, actions:userspace(pid=0,ipfix(output_port=2)),2
+packets:2, bytes:236, used:0.001s, actions:userspace(pid=0,ipfix(output_port=4294967295)),ct(zone=1),recirc(0x1)
+])
+
+AT_CHECK([ovs-ofctl del-flows br0 in_port=3])
+AT_CHECK([ovs-ofctl del-flows br0 table=1])
+
AT_CHECK([ovs-appctl revalidator/purge])
dnl
dnl Add a slowpath meter. The userspace action should be metered.
@@ -8591,6 +8661,34 @@ AT_CHECK([sed -n 's/=[[0-9]][[0-9]]\(\.[[0-9]][[0-9]]*\)\{0,1\}s/=?s/p' stdout],
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - patch ports - meter (clone)])
+
+OVS_VSWITCHD_START(
+ [add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 -- \
+ add-port br0 p1 -- set Interface p1 type=patch \
+ options:peer=p2 ofport_request=2 -- \
+ add-br br1 -- \
+ set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+ set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+ fail-mode=secure -- \
+ add-port br1 p2 -- set Interface p2 type=patch \
+ options:peer=p1 -- \
+ add-port br1 p3 -- set Interface p3 type=dummy ofport_request=3])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br1 'meter=1 pktps stats bands=type=drop rate=2'])
+AT_CHECK([ovs-ofctl del-flows br0])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 in_port=local,ip,actions=2,1])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br1 in_port=1,ip,actions=meter:1,3])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(100),eth(src=f8:bc:12:44:34:b6,dst=f8:bc:12:46:58:e0),eth_type(0x0800),ipv4(src=10.1.1.22,dst=10.0.0.3,proto=6,tos=0,ttl=64,frag=no),tcp(src=53295,dst=8080)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(meter(0),3),1
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
dnl ----------------------------------------------------------------------
AT_BANNER([ofproto-dpif -- megaflows])
@@ -9695,6 +9793,26 @@ OFPST_TABLE reply (OF1.3) (xid=0x2):
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([ofproto-dpif packet-out table meter drop])
+OVS_VSWITCHD_START
+add_of_ports br0 1 2
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps bands=type=drop rate=1'])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 'in_port=1 action=meter:1,output:2'])
+
+ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)"
+ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)"
+
+# Check that vswitchd hasn't crashed by dumping the meter added above
+AT_CHECK([ovs-ofctl -O OpenFlow13 dump-meters br0 | ofctl_strip], [0], [dnl
+OFPST_METER_CONFIG reply (OF1.3):
+meter=1 pktps bands=
+type=drop rate=1
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([ofproto-dpif - ICMPv6])
OVS_VSWITCHD_START
add_of_ports br0 1
@@ -11404,6 +11522,23 @@ Megaflow: recirc_id=0x3,eth,ip,in_port=1,nw_frag=no
Datapath actions: 4
])
+ovs-ofctl del-flows br0
+
+AT_DATA([flows.txt], [dnl
+table=0,in_port=1 actions=load:0x1->NXM_NX_REG1[[]],resubmit(,1),load:0x2->NXM_NX_REG1[[]],resubmit(,1),load:0x3->NXM_NX_REG1[[]],resubmit(,1)
+table=1,in_port=1,reg1=0x1 actions=check_pkt_larger(200)->NXM_NX_REG0[[0]],resubmit(,4)
+table=1,in_port=1,reg1=0x2 actions=output:2
+table=1,in_port=1,reg1=0x3 actions=output:4
+table=4,in_port=1 actions=output:3
+])
+
+AT_CHECK([ovs-ofctl --protocols=OpenFlow10 add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([cat stdout | grep Datapath -B1], [0], [dnl
+Megaflow: recirc_id=0,eth,ip,in_port=1,nw_frag=no
+Datapath actions: check_pkt_len(size=200,gt(3),le(3)),2,4
+])
+
OVS_VSWITCHD_STOP
AT_CLEANUP
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index 604f15c2d1..c93cb9f16c 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -3246,3 +3246,22 @@ dnl because we need ovs-vswitchd to have the controller config before starting
dnl the controller to 'snoop' the OpenFlow messages from beginning
OVS_VSWITCHD_STOP(["/connection failed (No such file or directory)/d"])
AT_CLEANUP
+
+
+AT_SETUP([ovs-ofctl show-flows - Oversized flow])
+OVS_VSWITCHD_START
+
+printf " priority=90,icmp,reg15=0x8005,metadata=0x1,nw_dst=11.0.0.1,icmp_type=8,icmp_code=0 actions=" > flow.txt
+for i in `seq 1 1022`; do printf "set_field:0x399->reg13,set_field:0x$i->reg15,resubmit(,39),"; done >> flow.txt
+printf "resubmit(,39)\n" >> flow.txt
+
+AT_CHECK([ovs-ofctl -O OpenFlow15 add-flows br0 flow.txt])
+
+AT_CHECK([ovs-ofctl -O OpenFlow10 dump-flows br0 | ofctl_strip | sed '/NXST_FLOW/d' | sort], [0], [])
+OVS_WAIT_UNTIL([grep -q "ofp_flow|WARN|Flow exceeded the maximum flow statistics reply size and was excluded from the response set" ovs-vswitchd.log])
+
+cat flow.txt > expout
+AT_CHECK([ovs-ofctl -O OpenFlow15 dump-flows br0 | ofctl_strip | sed '/OFPST_FLOW/d' | sort], [0], [expout])
+
+OVS_VSWITCHD_STOP(["/Flow exceeded the maximum flow statistics reply size and was excluded from the response set/d"])
+AT_CLEANUP
diff --git a/tests/ovsdb-data.at b/tests/ovsdb-data.at
index 8cd2a26cb3..25c6acdac6 100644
--- a/tests/ovsdb-data.at
+++ b/tests/ovsdb-data.at
@@ -846,18 +846,21 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- integer],
[[diff-data '["integer"]' '[0]' '[2]']],
[[diff: 2
apply diff: 2
+apply diff in place: 2
OK]])
OVSDB_CHECK_POSITIVE([generate and apply diff -- boolean],
[[diff-data '["boolean"]' '[true]' '[false]']],
[[diff: false
apply diff: false
+apply diff in place: false
OK]])
OVSDB_CHECK_POSITIVE([generate and apply diff -- string],
[[diff-data '["string"]' '["AAA"]' '["BBB"]']],
[[diff: "BBB"
apply diff: "BBB"
+apply diff in place: "BBB"
OK]])
dnl Test set modifications.
@@ -870,15 +873,19 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- set],
]],
[[diff: ["set",[0,2]]
apply diff: ["set",[1,2]]
+apply diff in place: ["set",[1,2]]
OK
diff: 0
apply diff: 1
+apply diff in place: 1
OK
diff: ["set",[0,1]]
apply diff: ["set",[0,1]]
+apply diff in place: ["set",[0,1]]
OK
diff: ["set",[0,1]]
apply diff: ["set",[]]
+apply diff in place: ["set",[]]
OK]])
dnl Test set modifications causes data to violate set size constrain.
@@ -898,18 +905,23 @@ OVSDB_CHECK_POSITIVE([generate and apply diff -- map],
]],
[[diff: ["map",[["2 gills","1 chopin"],["2 pints","1 quart"]]]
apply diff: ["map",[["2 pints","1 quart"]]]
+apply diff in place: ["map",[["2 pints","1 quart"]]]
OK
diff: ["map",[]]
apply diff: ["map",[["2 gills","1 chopin"]]]
+apply diff in place: ["map",[["2 gills","1 chopin"]]]
OK
diff: ["map",[["2 gills","1 chopin"]]]
apply diff: ["map",[]]
+apply diff in place: ["map",[]]
OK
diff: ["map",[["2 pints","1 quart"]]]
apply diff: ["map",[["2 pints","1 quart"]]]
+apply diff in place: ["map",[["2 pints","1 quart"]]]
OK
diff: ["map",[["2 gills","1 gallon"]]]
apply diff: ["map",[["2 gills","1 gallon"]]]
+apply diff in place: ["map",[["2 gills","1 gallon"]]]
OK]])
OVSDB_CHECK_NEGATIVE([generate and apply diff with map -- size error],
diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at
index 1386f13770..91d34d0de6 100644
--- a/tests/ovsdb-idl.at
+++ b/tests/ovsdb-idl.at
@@ -225,7 +225,7 @@ m4_define([OVSDB_CHECK_IDL_TCP6_MULTIPLE_REMOTES_PY],
m4_define([OVSDB_CHECK_IDL_SSL_PY],
[AT_SETUP([$1 - Python3 - SSL])
AT_SKIP_IF([test "$HAVE_OPENSSL" = no])
- $PYTHON3 -c "import OpenSSL.SSL"
+ $PYTHON3 -c "import ssl"
SSL_PRESENT=$?
AT_SKIP_IF([test $SSL_PRESENT != 0])
AT_KEYWORDS([ovsdb server idl positive Python with ssl socket $5])
@@ -2309,7 +2309,7 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect],
'condition simple [["i","==",2]]' \
'condition simple [["i","==",1]]' \
'+reconnect' \
- '["idltest",
+ '?["idltest",
{"op": "update",
"table": "simple",
"where": [["i", "==", 1]],
@@ -2320,7 +2320,7 @@ OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect],
003: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
004: change conditions
005: reconnect
-006: table simple: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
+006: table simple
007: {"error":null,"result":[{"count":1}]}
008: table simple: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
009: done
diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at
index ac243d6a79..876cb836cd 100644
--- a/tests/ovsdb-server.at
+++ b/tests/ovsdb-server.at
@@ -1228,6 +1228,71 @@ AT_CHECK([test $logged_updates -lt $logged_nonblock_updates])
AT_CHECK_UNQUOTED([ovs-vsctl get open_vswitch . system_version], [0],
[xyzzy$counter
])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+AT_CLEANUP
+
+AT_SETUP([ovsdb-server transaction history size])
+on_exit 'kill `cat *.pid`'
+
+dnl Start an ovsdb-server with the clustered vswitchd schema.
+AT_CHECK([ovsdb-tool create-cluster db dnl
+ $abs_top_srcdir/vswitchd/vswitch.ovsschema unix:s1.raft],
+ [0], [ignore], [ignore])
+AT_CHECK([ovsdb-server --detach --no-chdir --pidfile dnl
+ --log-file --remote=punix:db.sock db],
+ [0], [ignore], [ignore])
+AT_CHECK([ovs-vsctl --no-wait init])
+
+dnl Create a bridge with N ports per transaction. Increase N every 4
+dnl iterations. And then remove the bridges. By increasing the size of
+dnl transactions, ensuring that they take up a significant percentage of
+dnl the total database size, so the transaction history will not be able
+dnl to hold all of them.
+dnl
+dnl The test verifies that the number of atoms in the transaction history
+dnl is always less than the number of atoms in the database, except for
+dnl a case where there is only one transaction in a history.
+get_memory_value () {
+ n=$(ovs-appctl -t ovsdb-server memory/show dnl
+ | tr ' ' '\n' | grep "^$1:" | cut -d ':' -f 2)
+ if test X"$n" == "X"; then
+ n=0
+ fi
+ echo $n
+}
+
+check_atoms () {
+ if test $(get_memory_value txn-history) -eq 1; then return; fi
+ n_db_atoms=$(get_memory_value atoms)
+ n_txn_history_atoms=$(get_memory_value txn-history-atoms)
+ echo "n_db_atoms: $n_db_atoms"
+ echo "n_txn_history_atoms: $n_txn_history_atoms"
+ AT_CHECK([test $n_txn_history_atoms -le $n_db_atoms])
+}
+
+add_ports () {
+ for j in $(seq 1 $2); do
+ printf " -- add-port br$1 p$1-%d" $j
+ done
+}
+
+initial_db_atoms=$(get_memory_value atoms)
+
+for i in $(seq 1 100); do
+ cmd=$(add_ports $i $(($i / 4 + 1)))
+ AT_CHECK([ovs-vsctl --no-wait add-br br$i $cmd])
+ check_atoms
+done
+
+for i in $(seq 1 100); do
+ AT_CHECK([ovs-vsctl --no-wait del-br br$i])
+ check_atoms
+done
+
+dnl After removing all the bridges, the number of atoms in the database
+dnl should return to its initial value.
+AT_CHECK([test $(get_memory_value atoms) -eq $initial_db_atoms])
+
OVS_APP_EXIT_AND_WAIT([ovsdb-server])
AT_CLEANUP
diff --git a/tests/reconnect.at b/tests/reconnect.at
index 0f74709f5a..5bca84351c 100644
--- a/tests/reconnect.at
+++ b/tests/reconnect.at
@@ -39,8 +39,19 @@ run
connected
# Try timeout without noting that we tried to receive.
-# (This does nothing since we never timeout in this case.)
+# Timeout should be scheduled to the next probe interval.
timeout
+run
+
+# Once we reached the timeout, it should not expire until the receive actually
+# attempted. However, we still need to wake up as soon as possible in order to
+# have a chance to mark the receive attempt even if nothing was received.
+timeout
+run
+
+# Short time advance past the original probe interval, but not expired still.
+timeout
+run
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
@@ -67,18 +78,37 @@ connected
last connected 0 ms ago, connected 0 ms total
# Try timeout without noting that we tried to receive.
-# (This does nothing since we never timeout in this case.)
-timeout
- no timeout
-
-# Now disable the receive-attempted feature and timeout again.
-receive-attempted LLONG_MAX
+# Timeout should be scheduled to the next probe interval.
timeout
advance 5000 ms
### t=6000 ###
in ACTIVE for 5000 ms (0 ms backoff)
run
+
+# Once we reached the timeout, it should not expire until the receive actually
+# attempted. However, we still need to wake up as soon as possible in order to
+# have a chance to mark the receive attempt even if nothing was received.
+timeout
+ advance 1 ms
+
+### t=6001 ###
+ in ACTIVE for 5001 ms (0 ms backoff)
+run
+
+# Short time advance past the original probe interval, but not expired still.
+timeout
+ advance 1 ms
+
+### t=6002 ###
+ in ACTIVE for 5002 ms (0 ms backoff)
+run
+
+# Now disable the receive-attempted feature and timeout again.
+receive-attempted LLONG_MAX
+timeout
+ advance 0 ms
+run
should send probe
in IDLE for 0 ms (0 ms backoff)
@@ -86,7 +116,7 @@ run
timeout
advance 5000 ms
-### t=11000 ###
+### t=11002 ###
in IDLE for 5000 ms (0 ms backoff)
run
should disconnect
@@ -94,7 +124,7 @@ disconnected
in BACKOFF for 0 ms (1000 ms backoff)
1 successful connections out of 1 attempts, seqno 2
disconnected
- disconnected at 11000 ms (0 ms ago)
+ disconnected at 11002 ms (0 ms ago)
])
######################################################################
@@ -111,8 +141,19 @@ run
connected
# Try timeout without noting that we tried to receive.
-# (This does nothing since we never timeout in this case.)
+# Timeout should be scheduled to the next probe interval.
+timeout
+run
+
+# Once we reached the timeout, it should not expire until the receive actually
+# attempted. However, we still need to wake up as soon as possible in order to
+# have a chance to mark the receive attempt even if nothing was received.
+timeout
+run
+
+# Short time advance past the original probe interval, but not expired still.
timeout
+run
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
@@ -148,18 +189,37 @@ connected
last connected 0 ms ago, connected 0 ms total
# Try timeout without noting that we tried to receive.
-# (This does nothing since we never timeout in this case.)
-timeout
- no timeout
-
-# Now disable the receive-attempted feature and timeout again.
-receive-attempted LLONG_MAX
+# Timeout should be scheduled to the next probe interval.
timeout
advance 5000 ms
### t=6500 ###
in ACTIVE for 5000 ms (0 ms backoff)
run
+
+# Once we reached the timeout, it should not expire until the receive actually
+# attempted. However, we still need to wake up as soon as possible in order to
+# have a chance to mark the receive attempt even if nothing was received.
+timeout
+ advance 1 ms
+
+### t=6501 ###
+ in ACTIVE for 5001 ms (0 ms backoff)
+run
+
+# Short time advance past the original probe interval, but not expired still.
+timeout
+ advance 1 ms
+
+### t=6502 ###
+ in ACTIVE for 5002 ms (0 ms backoff)
+run
+
+# Now disable the receive-attempted feature and timeout again.
+receive-attempted LLONG_MAX
+timeout
+ advance 0 ms
+run
should send probe
in IDLE for 0 ms (0 ms backoff)
@@ -167,7 +227,7 @@ run
timeout
advance 5000 ms
-### t=11500 ###
+### t=11502 ###
in IDLE for 5000 ms (0 ms backoff)
run
should disconnect
@@ -175,7 +235,7 @@ disconnected
in BACKOFF for 0 ms (1000 ms backoff)
1 successful connections out of 1 attempts, seqno 2
disconnected
- disconnected at 11500 ms (0 ms ago)
+ disconnected at 11502 ms (0 ms ago)
])
######################################################################
@@ -1271,14 +1331,14 @@ activity
created 1000, last activity 3000, last connected 2000
# Connection times out.
-timeout
- no timeout
-receive-attempted LLONG_MAX
timeout
advance 5000 ms
### t=8000 ###
in ACTIVE for 6000 ms (1000 ms backoff)
+receive-attempted LLONG_MAX
+timeout
+ advance 0 ms
run
should send probe
in IDLE for 0 ms (1000 ms backoff)
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index f400cfabc9..092de308be 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -1981,6 +1981,111 @@ tcp,orig=(src=10.1.1.3,dst=10.1.1.4,sport=<cleared>,dport=<cleared>),reply=(src=
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([conntrack - zones from other field])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0)
+priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_ZONE[]),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=5)
+priority=100,in_port=2,ct_state=+trk,ct_zone=5,tcp,action=1
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+OVS_START_L7([at_ns1], [http])
+
+dnl HTTP requests from p0->p1 should work fine.
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
+tcp,dnl
+orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),dnl
+reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),dnl
+zone=5,protoinfo=(state=<cleared>)
+])
+
+dnl This is to test when the zoneid is set by a field variable like
+dnl NXM_NX_CT_ZONE, the OVS xlate should generate a megaflow with a form of
+dnl "ct_zone(5), ... actions: ct(commit, zone=5)". The match "ct_zone(5)"
+dnl is needed as if we changes the zoneid into 15 in the following, the old
+dnl "ct_zone(5), ... actions: ct(commit, zone=5)" megaflow will not get hit,
+dnl and OVS will generate a new megaflow with the match "ct_zone(0xf)".
+dnl This will make sure that the new packets are committing to zoneid 15
+dnl rather than old 5.
+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl
+ | grep "+trk" | grep -q "ct_zone(0x5)" ], [0], [])
+
+AT_CHECK([ovs-ofctl mod-flows br0 dnl
+ 'priority=100,ct_state=-trk,tcp,in_port="ovs-p0" actions=ct(table=0,zone=15)'])
+
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl
+ | grep "+trk" | grep -q "ct_zone(0xf)" ], [0], [])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([conntrack - zones from other field, more tests])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+dnl Allow any traffic from ns0->ns1. Only allow nd, return traffic from ns1->ns0.
+AT_DATA([flows.txt], [dnl
+priority=1,action=drop
+priority=10,arp,action=normal
+priority=10,icmp,action=normal
+priority=100,in_port=1,tcp,ct_state=-trk,action=ct(zone=5,table=0,commit,exec(load:0xffff0005->NXM_NX_CT_LABEL[[0..31]]))
+priority=100,in_port=1,tcp,ct_state=+trk,action=ct(commit,zone=NXM_NX_CT_LABEL[[0..15]]),2
+priority=100,in_port=2,ct_state=-trk,tcp,action=ct(table=0,zone=5)
+priority=100,in_port=2,ct_state=+trk,ct_zone=5,tcp,action=1
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+OVS_START_L7([at_ns1], [http])
+
+dnl HTTP requests from p0->p1 should work fine.
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
+tcp,dnl
+orig=(src=10.1.1.1,dst=10.1.1.2,sport=<cleared>,dport=<cleared>),dnl
+reply=(src=10.1.1.2,dst=10.1.1.1,sport=<cleared>,dport=<cleared>),dnl
+zone=5,labels=0xffff0005,protoinfo=(state=<cleared>)
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl
+ | grep "+trk" | sed 's/0xffff0005\/0xffff/0x5\/0xffff/' dnl
+ | grep -q "ct_label(0x5/0xffff)" ], [0], [])
+
+AT_CHECK([ovs-ofctl mod-flows br0 'priority=100,ct_state=-trk,tcp,in_port="ovs-p0" actions=ct(table=0,zone=15,commit,exec(load:0xffff000f->NXM_NX_CT_LABEL[[0..31]]))'])
+
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows --names filter=in_port=ovs-p0 dnl
+ | grep "+trk" | sed 's/0xffff000f\/0xffff/0xf\/0xffff/' dnl
+ | grep -q "ct_label(0xf/0xffff)" ], [0], [])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([conntrack - multiple bridges])
CHECK_CONNTRACK()
OVS_TRAFFIC_VSWITCHD_START(
@@ -3305,6 +3410,46 @@ NS_CHECK_EXEC([at_ns0], [ping6 -s 3200 -q -c 3 -i 0.3 -w 2 fc00::2 | FORMAT_PING
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([conntrack - IPv4 Fragmentation + NAT])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+CHECK_CONNTRACK()
+
+OVS_TRAFFIC_VSWITCHD_START(
+ [set-fail-mode br0 secure -- ])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.2.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.2.1.2/24")
+
+dnl Create a dummy route for NAT
+NS_CHECK_EXEC([at_ns1], [ip addr add 10.1.1.2/32 dev lo])
+NS_CHECK_EXEC([at_ns0], [ip route add 10.1.1.0/24 via 10.2.1.2])
+NS_CHECK_EXEC([at_ns1], [ip route add 10.1.1.0/24 via 10.2.1.1])
+
+dnl Solely for debugging when things go wrong
+NS_EXEC([at_ns0], [tcpdump -l -n -xx -U -i p0 -w p0.pcap >tcpdump.out 2>/dev/null &])
+NS_EXEC([at_ns1], [tcpdump -l -n -xx -U -i p1 -w p1.pcap >tcpdump.out 2>/dev/null &])
+
+AT_DATA([flows.txt], [dnl
+table=0,arp,actions=normal
+table=0,ct_state=-trk,ip,in_port=ovs-p0, actions=ct(table=1, nat)
+table=0,ct_state=-trk,ip,in_port=ovs-p1, actions=ct(table=1, nat)
+table=1,ct_state=+trk+new,ip,in_port=ovs-p0, actions=ct(commit, nat(src=10.1.1.1)),ovs-p1
+table=1,ct_state=+trk+est,ip,in_port=ovs-p0, actions=ovs-p1
+table=1,ct_state=+trk+est,ip,in_port=ovs-p1, actions=ovs-p0
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+dnl Check connectivity
+NS_CHECK_EXEC([at_ns0], [ping -c 1 10.1.1.2 -M dont -s 4500 | FORMAT_PING], [0], [dnl
+1 packets transmitted, 1 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([conntrack - resubmit to ct multiple times])
CHECK_CONNTRACK()
diff --git a/tests/test-json.c b/tests/test-json.c
index a7ee595e0b..072a537252 100644
--- a/tests/test-json.c
+++ b/tests/test-json.c
@@ -22,6 +22,8 @@
#include <getopt.h>
#include <stdio.h>
#include "ovstest.h"
+#include "random.h"
+#include "timeval.h"
#include "util.h"
/* --pretty: If set, the JSON output is pretty-printed, instead of printed as
@@ -157,3 +159,69 @@ test_json_main(int argc, char *argv[])
}
OVSTEST_REGISTER("test-json", test_json_main);
+
+static void
+json_string_benchmark_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
+{
+ struct {
+ int n;
+ int quote_probablility;
+ int special_probability;
+ int iter;
+ } configs[] = {
+ { 100000, 0, 0, 1000, },
+ { 100000, 2, 1, 1000, },
+ { 100000, 10, 1, 1000, },
+ { 10000000, 0, 0, 100, },
+ { 10000000, 2, 1, 100, },
+ { 10000000, 10, 1, 100, },
+ { 100000000, 0, 0, 10. },
+ { 100000000, 2, 1, 10, },
+ { 100000000, 10, 1, 10, },
+ };
+
+ printf(" SIZE Q S TIME\n");
+ printf("--------------------------------------\n");
+
+ for (int i = 0; i < ARRAY_SIZE(configs); i++) {
+ int iter = configs[i].iter;
+ int n = configs[i].n;
+ char *str = xzalloc(n);
+
+ for (int j = 0; j < n - 1; j++) {
+ int r = random_range(100);
+
+ if (r < configs[i].special_probability) {
+ str[j] = random_range(' ' - 1) + 1;
+ } else if (r < (configs[i].special_probability
+ + configs[i].quote_probablility)) {
+ str[j] = '"';
+ } else {
+ str[j] = random_range(256 - ' ') + ' ';
+ }
+ }
+
+ printf("%-11d %-2d %-2d: ", n, configs[i].quote_probablility,
+ configs[i].special_probability);
+ fflush(stdout);
+
+ struct json *json = json_string_create_nocopy(str);
+ uint64_t start = time_msec();
+
+ char **res = xzalloc(iter * sizeof *res);
+ for (int j = 0; j < iter; j++) {
+ res[j] = json_to_string(json, 0);
+ }
+
+ printf("%16.3lf ms\n", (double) (time_msec() - start) / iter);
+ json_destroy(json);
+ for (int j = 0; j < iter; j++) {
+ free(res[j]);
+ }
+ free(res);
+ }
+
+ exit(0);
+}
+
+OVSTEST_REGISTER("json-string-benchmark", json_string_benchmark_main);
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index daa55dab7b..57572cd3ed 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -512,6 +512,18 @@ do_diff_data(struct ovs_cmdl_context *ctx)
ovs_fatal(0, "failed to apply diff");
}
+ /* Apply diff to 'old' in place. */
+ error = ovsdb_datum_apply_diff_in_place(&old, &diff, &type);
+ if (error) {
+ char *string = ovsdb_error_to_string_free(error);
+ ovs_fatal(0, "%s", string);
+ }
+
+ /* Test to make sure 'old' equals 'new' now. */
+ if (!ovsdb_datum_equals(&new, &old, &type)) {
+ ovs_fatal(0, "failed to apply diff in place");
+ }
+
/* Print diff */
json = ovsdb_datum_to_json(&diff, &type);
printf ("diff: ");
@@ -522,6 +534,11 @@ do_diff_data(struct ovs_cmdl_context *ctx)
printf ("apply diff: ");
print_and_free_json(json);
+ /* Print updated 'old' */
+ json = ovsdb_datum_to_json(&old, &type);
+ printf ("apply diff in place: ");
+ print_and_free_json(json);
+
ovsdb_datum_destroy(&new, &type);
ovsdb_datum_destroy(&old, &type);
ovsdb_datum_destroy(&diff, &type);
@@ -1862,7 +1879,8 @@ print_and_log(const char *format, ...)
}
static char *
-format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents)
+format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents,
+ bool terse)
{
const char *change_str =
!ovsdb_idl_track_is_set(row->table)
@@ -1873,9 +1891,13 @@ format_idl_row(const struct ovsdb_idl_row *row, int step, const char *contents)
? "deleted row: "
: "";
- return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT,
- step, row->table->class_->name, change_str, contents,
- UUID_ARGS(&row->uuid));
+ if (terse) {
+ return xasprintf("%03d: table %s", step, row->table->class_->name);
+ } else {
+ return xasprintf("%03d: table %s: %s%s uuid=" UUID_FMT,
+ step, row->table->class_->name, change_str,
+ contents, UUID_ARGS(&row->uuid));
+ }
}
static void
@@ -1998,7 +2020,7 @@ print_idl_row_updated_singleton(const struct idltest_singleton *sng, int step)
}
static void
-print_idl_row_simple(const struct idltest_simple *s, int step)
+print_idl_row_simple(const struct idltest_simple *s, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "i=%"PRId64" r=%g b=%s s=%s u="UUID_FMT" ia=[",
@@ -2025,7 +2047,7 @@ print_idl_row_simple(const struct idltest_simple *s, int step)
}
ds_put_cstr(&msg, "]");
- char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&s->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2034,7 +2056,7 @@ print_idl_row_simple(const struct idltest_simple *s, int step)
}
static void
-print_idl_row_link1(const struct idltest_link1 *l1, int step)
+print_idl_row_link1(const struct idltest_link1 *l1, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "i=%"PRId64" k=", l1->i);
@@ -2053,7 +2075,7 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step)
ds_put_format(&msg, "%"PRId64, l1->l2->i);
}
- char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&l1->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2062,7 +2084,7 @@ print_idl_row_link1(const struct idltest_link1 *l1, int step)
}
static void
-print_idl_row_link2(const struct idltest_link2 *l2, int step)
+print_idl_row_link2(const struct idltest_link2 *l2, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "i=%"PRId64" l1=", l2->i);
@@ -2070,7 +2092,7 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step)
ds_put_format(&msg, "%"PRId64, l2->l1->i);
}
- char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&l2->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2079,7 +2101,7 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step)
}
static void
-print_idl_row_simple3(const struct idltest_simple3 *s3, int step)
+print_idl_row_simple3(const struct idltest_simple3 *s3, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
size_t i;
@@ -2098,7 +2120,7 @@ print_idl_row_simple3(const struct idltest_simple3 *s3, int step)
}
ds_put_cstr(&msg, "]");
- char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&s3->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2107,12 +2129,12 @@ print_idl_row_simple3(const struct idltest_simple3 *s3, int step)
}
static void
-print_idl_row_simple4(const struct idltest_simple4 *s4, int step)
+print_idl_row_simple4(const struct idltest_simple4 *s4, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "name=%s", s4->name);
- char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&s4->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2121,7 +2143,7 @@ print_idl_row_simple4(const struct idltest_simple4 *s4, int step)
}
static void
-print_idl_row_simple6(const struct idltest_simple6 *s6, int step)
+print_idl_row_simple6(const struct idltest_simple6 *s6, int step, bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "name=%s ", s6->name);
@@ -2132,7 +2154,7 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step)
}
ds_put_cstr(&msg, "]");
- char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&s6->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2141,12 +2163,13 @@ print_idl_row_simple6(const struct idltest_simple6 *s6, int step)
}
static void
-print_idl_row_singleton(const struct idltest_singleton *sng, int step)
+print_idl_row_singleton(const struct idltest_singleton *sng, int step,
+ bool terse)
{
struct ds msg = DS_EMPTY_INITIALIZER;
ds_put_format(&msg, "name=%s", sng->name);
- char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg));
+ char *row_msg = format_idl_row(&sng->header_, step, ds_cstr(&msg), terse);
print_and_log("%s", row_msg);
ds_destroy(&msg);
free(row_msg);
@@ -2155,7 +2178,7 @@ print_idl_row_singleton(const struct idltest_singleton *sng, int step)
}
static void
-print_idl(struct ovsdb_idl *idl, int step)
+print_idl(struct ovsdb_idl *idl, int step, bool terse)
{
const struct idltest_simple3 *s3;
const struct idltest_simple4 *s4;
@@ -2167,31 +2190,31 @@ print_idl(struct ovsdb_idl *idl, int step)
int n = 0;
IDLTEST_SIMPLE_FOR_EACH (s, idl) {
- print_idl_row_simple(s, step);
+ print_idl_row_simple(s, step, terse);
n++;
}
IDLTEST_LINK1_FOR_EACH (l1, idl) {
- print_idl_row_link1(l1, step);
+ print_idl_row_link1(l1, step, terse);
n++;
}
IDLTEST_LINK2_FOR_EACH (l2, idl) {
- print_idl_row_link2(l2, step);
+ print_idl_row_link2(l2, step, terse);
n++;
}
IDLTEST_SIMPLE3_FOR_EACH (s3, idl) {
- print_idl_row_simple3(s3, step);
+ print_idl_row_simple3(s3, step, terse);
n++;
}
IDLTEST_SIMPLE4_FOR_EACH (s4, idl) {
- print_idl_row_simple4(s4, step);
+ print_idl_row_simple4(s4, step, terse);
n++;
}
IDLTEST_SIMPLE6_FOR_EACH (s6, idl) {
- print_idl_row_simple6(s6, step);
+ print_idl_row_simple6(s6, step, terse);
n++;
}
IDLTEST_SINGLETON_FOR_EACH (sng, idl) {
- print_idl_row_singleton(sng, step);
+ print_idl_row_singleton(sng, step, terse);
n++;
}
if (!n) {
@@ -2200,7 +2223,7 @@ print_idl(struct ovsdb_idl *idl, int step)
}
static void
-print_idl_track(struct ovsdb_idl *idl, int step)
+print_idl_track(struct ovsdb_idl *idl, int step, bool terse)
{
const struct idltest_simple3 *s3;
const struct idltest_simple4 *s4;
@@ -2211,27 +2234,27 @@ print_idl_track(struct ovsdb_idl *idl, int step)
int n = 0;
IDLTEST_SIMPLE_FOR_EACH_TRACKED (s, idl) {
- print_idl_row_simple(s, step);
+ print_idl_row_simple(s, step, terse);
n++;
}
IDLTEST_LINK1_FOR_EACH_TRACKED (l1, idl) {
- print_idl_row_link1(l1, step);
+ print_idl_row_link1(l1, step, terse);
n++;
}
IDLTEST_LINK2_FOR_EACH_TRACKED (l2, idl) {
- print_idl_row_link2(l2, step);
+ print_idl_row_link2(l2, step, terse);
n++;
}
IDLTEST_SIMPLE3_FOR_EACH_TRACKED (s3, idl) {
- print_idl_row_simple3(s3, step);
+ print_idl_row_simple3(s3, step, terse);
n++;
}
IDLTEST_SIMPLE4_FOR_EACH_TRACKED (s4, idl) {
- print_idl_row_simple4(s4, step);
+ print_idl_row_simple4(s4, step, terse);
n++;
}
IDLTEST_SIMPLE6_FOR_EACH_TRACKED (s6, idl) {
- print_idl_row_simple6(s6, step);
+ print_idl_row_simple6(s6, step, terse);
n++;
}
@@ -2634,6 +2657,13 @@ do_idl(struct ovs_cmdl_context *ctx)
char *arg = ctx->argv[i];
struct jsonrpc_msg *request, *reply;
+ bool terse = false;
+ if (*arg == '?') {
+ /* We're only interested in terse table contents. */
+ terse = true;
+ arg++;
+ }
+
if (*arg == '+') {
/* The previous transaction didn't change anything. */
arg++;
@@ -2654,10 +2684,10 @@ do_idl(struct ovs_cmdl_context *ctx)
/* Print update. */
if (track) {
- print_idl_track(idl, step++);
+ print_idl_track(idl, step++, terse);
ovsdb_idl_track_clear(idl);
} else {
- print_idl(idl, step++);
+ print_idl(idl, step++, terse);
}
}
seqno = ovsdb_idl_get_seqno(idl);
@@ -2710,7 +2740,7 @@ do_idl(struct ovs_cmdl_context *ctx)
ovsdb_idl_wait(idl);
poll_block();
}
- print_idl(idl, step++);
+ print_idl(idl, step++, false);
ovsdb_idl_track_clear(idl);
ovsdb_idl_destroy(idl);
print_and_log("%03d: done", step);
@@ -2727,13 +2757,15 @@ print_idl_row_simple2(const struct idltest_simple2 *s, int step)
printf("%03d: name=%s smap=[",
step, s->name);
for (i = 0; i < smap->n; i++) {
- printf("[%s : %s]%s", smap->keys[i].string, smap->values[i].string,
- i < smap->n-1? ",": "");
+ printf("[%s : %s]%s",
+ smap->keys[i].s->string, smap->values[i].s->string,
+ i < smap->n - 1 ? "," : "");
}
printf("] imap=[");
for (i = 0; i < imap->n; i++) {
- printf("[%"PRId64" : %s]%s", imap->keys[i].integer, imap->values[i].string,
- i < imap->n-1? ",":"");
+ printf("[%"PRId64" : %s]%s",
+ imap->keys[i].integer, imap->values[i].s->string,
+ i < imap->n - 1 ? "," : "");
}
printf("]\n");
}
@@ -2802,8 +2834,8 @@ do_idl_partial_update_map_column(struct ovs_cmdl_context *ctx)
myTxn = ovsdb_idl_txn_create(idl);
smap = idltest_simple2_get_smap(myRow, OVSDB_TYPE_STRING,
OVSDB_TYPE_STRING);
- strcpy(key_to_delete, smap->keys[0].string);
- idltest_simple2_update_smap_delkey(myRow, smap->keys[0].string);
+ ovs_strlcpy(key_to_delete, smap->keys[0].s->string, sizeof key_to_delete);
+ idltest_simple2_update_smap_delkey(myRow, smap->keys[0].s->string);
ovsdb_idl_txn_commit_block(myTxn);
ovsdb_idl_txn_destroy(myTxn);
ovsdb_idl_get_initial_snapshot(idl);
@@ -2829,7 +2861,7 @@ dump_simple3(struct ovsdb_idl *idl,
int step)
{
IDLTEST_SIMPLE3_FOR_EACH(myRow, idl) {
- print_idl_row_simple3(myRow, step);
+ print_idl_row_simple3(myRow, step, false);
}
}
@@ -2971,7 +3003,7 @@ do_idl_compound_index_with_ref(struct ovs_cmdl_context *ctx)
idltest_simple3_index_set_uref(equal, &myRow2, 1);
printf("%03d: Query using index with reference\n", step++);
IDLTEST_SIMPLE3_FOR_EACH_EQUAL (myRow, equal, index) {
- print_idl_row_simple3(myRow, step++);
+ print_idl_row_simple3(myRow, step++, false);
}
idltest_simple3_index_destroy_row(equal);
diff --git a/tests/test-ovsdb.py b/tests/test-ovsdb.py
index 5bc0bf6814..853264f22b 100644
--- a/tests/test-ovsdb.py
+++ b/tests/test-ovsdb.py
@@ -232,75 +232,87 @@ def get_singleton_table_printable_row(row):
return "name=%s" % row.name
-def print_row(table, row, step, contents):
- s = "%03d: table %s: %s " % (step, table, contents)
- s += get_simple_printable_row_string(row, ["uuid"])
+def print_row(table, row, step, contents, terse):
+ if terse:
+ s = "%03d: table %s" % (step, table)
+ else:
+ s = "%03d: table %s: %s " % (step, table, contents)
+ s += get_simple_printable_row_string(row, ["uuid"])
print(s)
-def print_idl(idl, step):
+def print_idl(idl, step, terse=False):
n = 0
if "simple" in idl.tables:
simple = idl.tables["simple"].rows
for row in simple.values():
print_row("simple", row, step,
- get_simple_table_printable_row(row))
+ get_simple_table_printable_row(row),
+ terse)
n += 1
if "simple2" in idl.tables:
simple2 = idl.tables["simple2"].rows
for row in simple2.values():
print_row("simple2", row, step,
- get_simple2_table_printable_row(row))
+ get_simple2_table_printable_row(row),
+ terse)
n += 1
if "simple3" in idl.tables:
simple3 = idl.tables["simple3"].rows
for row in simple3.values():
print_row("simple3", row, step,
- get_simple3_table_printable_row(row))
+ get_simple3_table_printable_row(row),
+ terse)
n += 1
if "simple4" in idl.tables:
simple4 = idl.tables["simple4"].rows
for row in simple4.values():
print_row("simple4", row, step,
- get_simple4_table_printable_row(row))
+ get_simple4_table_printable_row(row),
+ terse)
n += 1
if "simple5" in idl.tables:
simple5 = idl.tables["simple5"].rows
for row in simple5.values():
print_row("simple5", row, step,
- get_simple5_table_printable_row(row))
+ get_simple5_table_printable_row(row),
+ terse)
n += 1
if "simple6" in idl.tables:
simple6 = idl.tables["simple6"].rows
for row in simple6.values():
print_row("simple6", row, step,
- get_simple6_table_printable_row(row))
+ get_simple6_table_printable_row(row),
+ terse)
n += 1
if "link1" in idl.tables:
l1 = idl.tables["link1"].rows
for row in l1.values():
print_row("link1", row, step,
- get_link1_table_printable_row(row))
+ get_link1_table_printable_row(row),
+ terse)
n += 1
if "link2" in idl.tables:
l2 = idl.tables["link2"].rows
for row in l2.values():
print_row("link2", row, step,
- get_link2_table_printable_row(row))
+ get_link2_table_printable_row(row),
+ terse)
n += 1
if "singleton" in idl.tables:
sng = idl.tables["singleton"].rows
for row in sng.values():
print_row("singleton", row, step,
- get_singleton_table_printable_row(row))
+ get_singleton_table_printable_row(row),
+ terse)
n += 1
if not n:
@@ -701,6 +713,12 @@ def do_idl(schema_file, remote, *commands):
step += 1
for command in commands:
+ terse = False
+ if command.startswith("?"):
+ # We're only interested in terse table contents.
+ terse = True
+ command = command[1:]
+
if command.startswith("+"):
# The previous transaction didn't change anything.
command = command[1:]
@@ -714,7 +732,7 @@ def do_idl(schema_file, remote, *commands):
rpc.wait(poller)
poller.block()
- print_idl(idl, step)
+ print_idl(idl, step, terse)
step += 1
seqno = idl.change_seqno
diff --git a/tests/tunnel-push-pop-ipv6.at b/tests/tunnel-push-pop-ipv6.at
index 59723e63b8..c7665a1aeb 100644
--- a/tests/tunnel-push-pop-ipv6.at
+++ b/tests/tunnel-push-pop-ipv6.at
@@ -432,6 +432,42 @@ AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl
tunnel(tun_id=0x7b,ipv6_src=2001:cafe::92,ipv6_dst=2001:cafe::88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=3,rule_cookie=0,controller_id=0,max_len=65535))
])
+dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc1244cafe86dd60000000003a11402001cafe0000000000000000000000922001cafe000000000000000000000088c85312b5003abc700c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172'])
+
+ovs-appctl time/warp 1000
+ovs-appctl time/warp 1000
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=2])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:ca:fe,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1)
+])
+
+AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl
+2001:cafe::92 f8:bc:12:44:ca:fe br0
+2001:cafe::93 f8:bc:12:44:34:b7 br0
+])
+
+dnl Restore and check the cache entries
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc124434b686dd60000000003a11402001cafe0000000000000000000000922001cafe000000000000000000000088c85312b5003abc700c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172'])
+
+ovs-appctl time/warp 1000
+ovs-appctl time/warp 1000
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=2])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=70,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x86dd),ipv6(src=2001:cafe::88,dst=2001:cafe::92,label=0,proto=17,tclass=0x0,hlimit=64),udp(src=0,dst=4789,csum=0xffff),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1)
+])
+
+AT_CHECK([ovs-appctl tnl/arp/show | tail -n+3 | sort], [0], [dnl
+2001:cafe::92 f8:bc:12:44:34:b6 br0
+2001:cafe::93 f8:bc:12:44:34:b7 br0
+])
+
ovs-appctl time/warp 10000
AT_CHECK([ovs-vsctl del-port int-br t3 \
diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
index 48c5de9d19..6a597488e6 100644
--- a/tests/tunnel-push-pop.at
+++ b/tests/tunnel-push-pop.at
@@ -522,6 +522,41 @@ AT_CHECK([ovs-appctl dpif/dump-flows int-br | grep 'in_port(6081)'], [0], [dnl
tunnel(tun_id=0x7b,src=1.1.2.92,dst=1.1.2.88,geneve({class=0xffff,type=0x80,len=4,0xa/0xf}{class=0xffff,type=0,len=4}),flags(-df-csum+key)),recirc_id(0),in_port(6081),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=2,rule_cookie=0,controller_id=0,max_len=65535))
])
+dnl Receive VXLAN with different MAC and verify that the neigh cache gets updated
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc1244cafe08004500004e00010000401173e90101025c01010258c85312b5003a8cd40c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172'])
+
+ovs-appctl time/warp 1000
+ovs-appctl time/warp 1000
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br action=2])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:ca:fe,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1)
+])
+
+AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl
+1.1.2.92 f8:bc:12:44:ca:fe br0
+1.1.2.93 f8:bc:12:44:34:b7 br0
+])
+
+dnl Restore and check the cache entries
+AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000f8bc124434b608004500004e00010000401173e90101025c01010258c85312b5003a8cd40c00000300007b00ffffffffffff00000000000008004500001c0001000040117cce7f0000017f0000010035003500080172'])
+
+ovs-appctl time/warp 1000
+ovs-appctl time/warp 1000
+
+dnl Check VXLAN tunnel push
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=36:b1:ee:7c:01:01,dst=36:b1:ee:7c:01:02),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: clone(tnl_push(tnl_port(4789),header(size=50,type=4,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=4789,csum=0x0),vxlan(flags=0x8000000,vni=0x7b)),out_port(100)),1)
+])
+
+AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl
+1.1.2.92 f8:bc:12:44:34:b6 br0
+1.1.2.93 f8:bc:12:44:34:b7 br0
+])
+
ovs-appctl time/warp 10000
AT_CHECK([ovs-vsctl del-port int-br t3 \
@@ -595,6 +630,64 @@ OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | grep 50540000000a5054000000091235 | wc
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([tunnel_push_pop - packet_out debug_slow])
+
+OVS_VSWITCHD_START(
+ [add-port br0 p0 dnl
+ -- set Interface p0 type=dummy ofport_request=1 dnl
+ other-config:hwaddr=aa:55:aa:55:00:00])
+AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg])
+AT_CHECK([ovs-vsctl add-br int-br -- set bridge int-br datapath_type=dummy])
+AT_CHECK([ovs-vsctl add-port int-br t2 dnl
+ -- set Interface t2 type=geneve options:remote_ip=1.1.2.92 dnl
+ options:key=123 ofport_request=2])
+
+dnl First setup dummy interface IP address, then add the route
+dnl so that tnl-port table can get valid IP address for the device.
+AT_CHECK([ovs-appctl netdev-dummy/ip4addr br0 1.1.2.88/24], [0], [OK
+])
+AT_CHECK([ovs-appctl ovs/route/add 1.1.2.92/24 br0], [0], [OK
+])
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+
+dnl This ARP reply from p0 has two effects:
+dnl 1. The ARP cache will learn that 1.1.2.92 is at f8:bc:12:44:34:b6.
+dnl 2. The br0 mac learning will learn that f8:bc:12:44:34:b6 is on p0.
+AT_CHECK([
+ ovs-appctl netdev-dummy/receive p0 dnl
+ 'recirc_id(0),in_port(2),dnl
+ eth(src=f8:bc:12:44:34:b6,dst=ff:ff:ff:ff:ff:ff),eth_type(0x0806),dnl
+ arp(sip=1.1.2.92,tip=1.1.2.88,op=2,sha=f8:bc:12:44:34:b6,tha=00:00:00:00:00:00)'
+])
+
+AT_CHECK([ovs-vsctl -- set Interface p0 options:tx_pcap=p0.pcap])
+
+packet=50540000000a505400000009123
+dnl Source port is based on a packet hash, so it may differ depending on the
+dnl compiler flags and CPU type. Masked with '....'.
+encap=f8bc124434b6aa55aa5500000800450000320000400040113406010102580101025c....17c1001e00000000655800007b00
+
+dnl Output to tunnel from a int-br internal port.
+dnl Checking that the packet arrived and it was correctly encapsulated.
+AT_CHECK([ovs-ofctl add-flow int-br "in_port=LOCAL,actions=debug_slow,output:2"])
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"])
+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 1])
+dnl Sending again to exercise the non-miss upcall path.
+AT_CHECK([ovs-appctl netdev-dummy/receive int-br "${packet}4"])
+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}4" | wc -l` -ge 2])
+
+dnl Output to tunnel from the controller.
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out int-br CONTROLLER "debug_slow,output:2" "${packet}5"])
+OVS_WAIT_UNTIL([test `ovs-pcap p0.pcap | egrep "${encap}${packet}5" | wc -l` -ge 1])
+
+dnl Datapath actions should not have tunnel push action.
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q tnl_push], [1])
+dnl There should be slow_path action instead.
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep -q 'slow_path(action)'], [0])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([tunnel_push_pop - underlay bridge match])
OVS_VSWITCHD_START([add-port br0 p0 -- set Interface p0 type=dummy ofport_request=1 other-config:hwaddr=aa:55:aa:55:00:00])
diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in
index 71800795c0..e6e07f4763 100644
--- a/utilities/ovs-ctl.in
+++ b/utilities/ovs-ctl.in
@@ -421,7 +421,9 @@ Less important options for "start", "restart" and "force-reload-kmod":
--no-force-corefiles do not force on core dumps for OVS daemons
--no-mlockall do not lock all of ovs-vswitchd into memory
--ovsdb-server-priority=NICE set ovsdb-server's niceness (default: $OVSDB_SERVER_PRIORITY)
+ --ovsdb-server-options=OPTIONS additional options for ovsdb-server (example: '-vconsole:dbg -vfile:dbg')
--ovs-vswitchd-priority=NICE set ovs-vswitchd's niceness (default: $OVS_VSWITCHD_PRIORITY)
+ --ovs-vswitchd-options=OPTIONS additional options for ovs-vswitchd (example: '-vconsole:dbg -vfile:dbg')
--no-full-hostname set short hostname instead of full hostname
--no-record-hostname do not attempt to determine/record system
hostname as part of start command
diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in
index 3eda01d3c1..13477a6a9e 100644
--- a/utilities/ovs-lib.in
+++ b/utilities/ovs-lib.in
@@ -519,13 +519,13 @@ join_cluster() {
LOCAL_ADDR="$3"
REMOTE_ADDR="$4"
- if test ! -e "$DB_FILE"; then
- ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" "$REMOTE_ADDR"
- elif ovsdb_tool db-is-standalone "$DB_FILE"; then
- # Backup standalone database and join cluster.
+ if test -e "$DB_FILE" && ovsdb_tool db-is-standalone "$DB_FILE"; then
backup_db || return 1
+ rm $DB_FILE
+ fi
+ if test ! -e "$DB_FILE"; then
action "Joining $DB_FILE to cluster" \
- ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR"
+ ovsdb_tool join-cluster "$DB_FILE" "$SCHEMA_NAME" "$LOCAL_ADDR" "$REMOTE_ADDR"
fi
}
diff --git a/utilities/ovs-save b/utilities/ovs-save
index 27ce3a9aad..fb2025b765 100755
--- a/utilities/ovs-save
+++ b/utilities/ovs-save
@@ -150,7 +150,10 @@ save_flows () {
ovs-ofctl -O $ofp_version dump-flows --no-names --no-stats "$bridge" | \
sed -e '/NXST_FLOW/d' \
-e '/OFPST_FLOW/d' \
- -e 's/\(idle\|hard\)_age=[^,]*,//g' > \
+ -e 's/\(idle\|hard\)_age=[^,]*,//g' \
+ -e 's/igmp_type/tp_src/g' \
+ -e 's/igmp_code/tp_dst/g' \
+ -e 's/igmp/ip,nw_proto=2/g' > \
"$workdir/$bridge.flows.dump"
done
echo "rm -rf \"$workdir\""
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index cb7c5cb769..c790a56adf 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -4229,7 +4229,7 @@ bridge_configure_aa(struct bridge *br)
union ovsdb_atom atom;
atom.integer = m->isid;
- if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER) == UINT_MAX) {
+ if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_INTEGER, NULL)) {
VLOG_INFO("Deleting isid=%"PRIu32", vlan=%"PRIu16,
m->isid, m->vlan);
bridge_aa_mapping_destroy(m);
@@ -4826,7 +4826,7 @@ queue_ids_include(const struct ovsdb_datum *queues, int64_t target)
union ovsdb_atom atom;
atom.integer = target;
- return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER) != UINT_MAX;
+ return ovsdb_datum_find_key(queues, &atom, OVSDB_TYPE_INTEGER, NULL);
}
static void
@@ -5020,7 +5020,7 @@ bridge_configure_mirrors(struct bridge *br)
union ovsdb_atom atom;
atom.uuid = m->uuid;
- if (ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID) == UINT_MAX) {
+ if (!ovsdb_datum_find_key(mc, &atom, OVSDB_TYPE_UUID, NULL)) {
mirror_destroy(m);
}
}