diff --git a/.ci/dpdk-build.sh b/.ci/dpdk-build.sh new file mode 100755 index 000000000..02dcefef6 --- /dev/null +++ b/.ci/dpdk-build.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +set -o errexit +set -x + +function build_dpdk() +{ + local VERSION_FILE="dpdk-dir/cached-version" + local DPDK_VER=$1 + local DPDK_OPTS="" + + rm -rf dpdk-dir + + if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then + git clone --single-branch $DPDK_GIT dpdk-dir -b "${DPDK_VER##refs/*/}" + pushd dpdk-dir + git log -1 --oneline + else + wget https://fast.dpdk.org/rel/dpdk-$1.tar.xz + tar xvf dpdk-$1.tar.xz > /dev/null + DIR_NAME=$(tar -tf dpdk-$1.tar.xz | head -1 | cut -f1 -d"/") + mv ${DIR_NAME} dpdk-dir + pushd dpdk-dir + fi + + # Switching to 'default' machine to make dpdk-dir cache usable on + # different CPUs. We can't be sure that all CI machines are exactly same. + DPDK_OPTS="$DPDK_OPTS -Dmachine=default" + + # Disable building DPDK unit tests. Not needed for OVS build or tests. + DPDK_OPTS="$DPDK_OPTS -Dtests=false" + + # Disable DPDK developer mode, this results in less build checks and less + # meson verbose outputs. + DPDK_OPTS="$DPDK_OPTS -Ddeveloper_mode=disabled" + + # OVS compilation and "normal" unit tests (run in the CI) do not depend on + # any DPDK driver being present. + # We can disable all drivers to save compilation time. + DPDK_OPTS="$DPDK_OPTS -Ddisable_drivers=*/*" + + # Install DPDK using prefix. + DPDK_OPTS="$DPDK_OPTS --prefix=$(pwd)/build" + + meson $DPDK_OPTS build + ninja -C build + ninja -C build install + + echo "Installed DPDK in $(pwd)" + popd + echo "${DPDK_VER}" > ${VERSION_FILE} +} + +build_dpdk $DPDK_VER diff --git a/.ci/dpdk-prepare.sh b/.ci/dpdk-prepare.sh new file mode 100755 index 000000000..f7e6215dd --- /dev/null +++ b/.ci/dpdk-prepare.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +set -ev + +# Installing wheel separately because it may be needed to build some +# of the packages during dependency backtracking and pip >= 22.0 will +# abort backtracking on build failures: +# https://github.com/pypa/pip/issues/10655 +pip3 install --disable-pip-version-check --user wheel +pip3 install --disable-pip-version-check --user pyelftools +pip3 install --user 'meson==0.53.2' diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh index 10021fddb..8227a5748 100755 --- a/.ci/linux-build.sh +++ b/.ci/linux-build.sh @@ -9,9 +9,7 @@ EXTRA_OPTS="--enable-Werror" function install_dpdk() { - local DPDK_VER=$1 local VERSION_FILE="dpdk-dir/cached-version" - local DPDK_OPTS="" local DPDK_LIB=$(pwd)/dpdk-dir/build/lib/x86_64-linux-gnu if [ "$DPDK_SHARED" ]; then @@ -24,63 +22,14 @@ function install_dpdk() # Export the following path for pkg-config to find the .pc file. export PKG_CONFIG_PATH=$DPDK_LIB/pkgconfig/:$PKG_CONFIG_PATH - if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then - # Avoid using cache for git tree build. - rm -rf dpdk-dir - - DPDK_GIT=${DPDK_GIT:-https://dpdk.org/git/dpdk} - git clone --single-branch $DPDK_GIT dpdk-dir -b "${DPDK_VER##refs/*/}" - pushd dpdk-dir - git log -1 --oneline - else - if [ -f "${VERSION_FILE}" ]; then - VER=$(cat ${VERSION_FILE}) - if [ "${VER}" = "${DPDK_VER}" ]; then - # Update the library paths. - sudo ldconfig - echo "Found cached DPDK ${VER} build in $(pwd)/dpdk-dir" - return - fi - fi - # No cache or version mismatch. - rm -rf dpdk-dir - wget https://fast.dpdk.org/rel/dpdk-$1.tar.xz - tar xvf dpdk-$1.tar.xz > /dev/null - DIR_NAME=$(tar -tf dpdk-$1.tar.xz | head -1 | cut -f1 -d"/") - mv ${DIR_NAME} dpdk-dir - pushd dpdk-dir + if [ ! -f "${VERSION_FILE}" ]; then + echo "Could not find DPDK in $(pwd)/dpdk-dir" + return 1 fi - # Switching to 'default' machine to make dpdk-dir cache usable on - # different CPUs. We can't be sure that all CI machines are exactly same. - DPDK_OPTS="$DPDK_OPTS -Dmachine=default" - - # Disable building DPDK unit tests. Not needed for OVS build or tests. - DPDK_OPTS="$DPDK_OPTS -Dtests=false" - - # Disable DPDK developer mode, this results in less build checks and less - # meson verbose outputs. - DPDK_OPTS="$DPDK_OPTS -Ddeveloper_mode=disabled" - - # OVS compilation and "normal" unit tests (run in the CI) do not depend on - # any DPDK driver being present. - # We can disable all drivers to save compilation time. - DPDK_OPTS="$DPDK_OPTS -Ddisable_drivers=*/*" - - # Install DPDK using prefix. - DPDK_OPTS="$DPDK_OPTS --prefix=$(pwd)/build" - - CC=gcc meson $DPDK_OPTS build - ninja -C build - ninja -C build install - # Update the library paths. sudo ldconfig - - - echo "Installed DPDK source in $(pwd)" - popd - echo "${DPDK_VER}" > ${VERSION_FILE} + echo "Found cached DPDK $(cat ${VERSION_FILE}) build in $(pwd)/dpdk-dir" } function configure_ovs() @@ -130,10 +79,11 @@ assert ovs.json.from_string('{\"a\": 42}') == {'a': 42}" fi if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then - if [ -z "$DPDK_VER" ]; then - DPDK_VER="22.11.1" - fi - install_dpdk $DPDK_VER + install_dpdk +fi + +if [ "$STD" ]; then + CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} -std=$STD" fi if [ "$CC" = "clang" ]; then diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh index f414a879c..c28b6819a 100755 --- a/.ci/linux-prepare.sh +++ b/.ci/linux-prepare.sh @@ -23,8 +23,7 @@ cd .. # https://github.com/pypa/pip/issues/10655 pip3 install --disable-pip-version-check --user wheel pip3 install --disable-pip-version-check --user \ - flake8 'hacking>=3.0' netaddr pyparsing sphinx setuptools pyelftools -pip3 install --user 'meson==0.53.2' + flake8 'hacking>=3.0' netaddr pyparsing sphinx setuptools # Install python test dependencies pip3 install -r python/test_requirements.txt diff --git a/.ci/osx-build.sh b/.ci/osx-build.sh index 09df61826..b81744ec9 100755 --- a/.ci/osx-build.sh +++ b/.ci/osx-build.sh @@ -10,7 +10,7 @@ function configure_ovs() ./boot.sh && ./configure $* } -configure_ovs $EXTRA_OPTS $* +configure_ovs $EXTRA_OPTS $OPTS $* if [ "$CC" = "clang" ]; then make CFLAGS="$CFLAGS -Wno-error=unused-command-line-argument" diff --git a/.cirrus.yml b/.cirrus.yml index 952d96431..48931fa08 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -3,7 +3,7 @@ freebsd_build_task: freebsd_instance: matrix: image_family: freebsd-12-4-snap - image_family: freebsd-13-1-snap + image_family: freebsd-13-2-snap cpu: 4 memory: 4G diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 82675b973..80c449336 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -3,12 +3,80 @@ name: Build and Test on: [push, pull_request] jobs: + build-dpdk: + env: + dependencies: gcc libnuma-dev ninja-build + CC: gcc + DPDK_GIT: https://dpdk.org/git/dpdk-stable + DPDK_VER: 22.11.1 + name: dpdk gcc + outputs: + dpdk_key: ${{ steps.gen_dpdk_key.outputs.key }} + runs-on: ubuntu-20.04 + timeout-minutes: 30 + + steps: + - name: checkout + uses: actions/checkout@v3 + + - name: update PATH + run: | + echo "$HOME/bin" >> $GITHUB_PATH + echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: create ci signature file for the dpdk cache key + # This will collect most of DPDK related lines, so hash will be different + # if something changed in a way we're building DPDK including DPDK_VER. + # This also allows us to use cache from any branch as long as version + # and a way we're building DPDK stays the same. + run: | + grep -irE 'RTE_|DPDK|meson|ninja' .ci/dpdk-* > dpdk-ci-signature + grep -rwE 'DPDK_GIT|DPDK_VER' .github/ >> dpdk-ci-signature + if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then + git ls-remote --heads $DPDK_GIT $DPDK_VER >> dpdk-ci-signature + fi + cat dpdk-ci-signature + + - name: generate ci DPDK key + id: gen_dpdk_key + env: + ci_key: ${{ hashFiles('dpdk-ci-signature') }} + run: echo 'key=dpdk-${{ env.ci_key }}' >> $GITHUB_OUTPUT + + - name: cache + id: dpdk_cache + uses: actions/cache@v3 + with: + path: dpdk-dir + key: ${{ steps.gen_dpdk_key.outputs.key }} + + - name: set up python + if: steps.dpdk_cache.outputs.cache-hit != 'true' + uses: actions/setup-python@v4 + with: + python-version: '3.9' + + - name: update APT cache + if: steps.dpdk_cache.outputs.cache-hit != 'true' + run: sudo apt update || true + - name: install common dependencies + if: steps.dpdk_cache.outputs.cache-hit != 'true' + run: sudo apt install -y ${{ env.dependencies }} + + - name: prepare + if: steps.dpdk_cache.outputs.cache-hit != 'true' + run: ./.ci/dpdk-prepare.sh + + - name: build + if: steps.dpdk_cache.outputs.cache-hit != 'true' + run: ./.ci/dpdk-build.sh + build-linux: + needs: build-dpdk env: dependencies: | - automake libtool gcc bc libjemalloc2 libjemalloc-dev \ - libssl-dev llvm-dev libelf-dev libnuma-dev libpcap-dev \ - ninja-build selinux-policy-dev libbpf-dev + automake libtool gcc bc libjemalloc2 libjemalloc-dev libssl-dev \ + llvm-dev libnuma-dev libpcap-dev selinux-policy-dev libbpf-dev ASAN: ${{ matrix.asan }} UBSAN: ${{ matrix.ubsan }} CC: ${{ matrix.compiler }} @@ -17,6 +85,7 @@ jobs: LIBS: ${{ matrix.libs }} M32: ${{ matrix.m32 }} OPTS: ${{ matrix.opts }} + STD: ${{ matrix.std }} TESTSUITE: ${{ matrix.testsuite }} name: linux ${{ join(matrix.*, ' ') }} @@ -32,6 +101,11 @@ jobs: - compiler: clang opts: --disable-ssl + - compiler: gcc + std: c99 + - compiler: clang + std: c99 + - compiler: gcc testsuite: test - compiler: clang @@ -104,25 +178,12 @@ jobs: with: python-version: '3.9' - - name: create ci signature file for the dpdk cache key - if: matrix.dpdk != '' || matrix.dpdk_shared != '' - # This will collect most of DPDK related lines, so hash will be different - # if something changed in a way we're building DPDK including DPDK_VER. - # This also allows us to use cache from any branch as long as version - # and a way we're building DPDK stays the same. - run: | - grep -irE 'RTE_|DPDK|meson|ninja' -r .ci/ > dpdk-ci-signature - cat dpdk-ci-signature - - name: cache if: matrix.dpdk != '' || matrix.dpdk_shared != '' uses: actions/cache@v3 - env: - matrix_key: ${{ matrix.dpdk }}${{ matrix.dpdk_shared }} - ci_key: ${{ hashFiles('dpdk-ci-signature') }} with: path: dpdk-dir - key: ${{ env.matrix_key }}-${{ env.ci_key }} + key: ${{ needs.build-dpdk.outputs.dpdk_key }} - name: update APT cache run: sudo apt update || true diff --git a/Documentation/ref/ovs-actions.7.rst b/Documentation/ref/ovs-actions.7.rst index b59b7634f..36adcc5db 100644 --- a/Documentation/ref/ovs-actions.7.rst +++ b/Documentation/ref/ovs-actions.7.rst @@ -694,7 +694,8 @@ encapsulated in an OpenFlow ``packet-in`` message. The supported options are: Limit to *max_len* the number of bytes of the packet to send in the ``packet-in.`` A *max_len* of 0 prevents any of the packet from being sent (thus, only metadata is included). By default, the entire packet is - sent, equivalent to a *max_len* of 65535. + sent, equivalent to a *max_len* of 65535. This option has no effect in + Open vSwith 2.7 and later: the entire packet will always be sent. ``reason=``\ *reason* Specify *reason* as the reason for sending the message in the @@ -733,6 +734,12 @@ encapsulated in an OpenFlow ``packet-in`` message. The supported options are: options require the Open vSwitch ``NXAST_CONTROLLER`` extension action added in Open vSwitch 1.6. + Open vSwitch 2.7 and later is configured to not buffer packets for the + packet-in event. As a result, the full packet is always sent to + controllers. This means that the ``max_len`` option has no effect on the + ``controller`` action, and all values (even 0) are equivalent to the default + value of 65535. + The ``enqueue`` action ---------------------- @@ -1380,7 +1387,7 @@ The ``delete_field`` action | ``delete_field:``\ *field* The ``delete_field`` action deletes a *field* in the syntax described under -`Field Specifications`_ above. Currently, only the ``tun_metadta`` fields are +`Field Specifications`_ above. Currently, only the ``tun_metadata`` fields are supported. This action was added in Open vSwitch 2.14. diff --git a/Makefile.am b/Makefile.am index e605187b8..eddb981ae 100644 --- a/Makefile.am +++ b/Makefile.am @@ -75,6 +75,8 @@ EXTRA_DIST = \ MAINTAINERS.rst \ README.rst \ NOTICE \ + .ci/dpdk-build.sh \ + .ci/dpdk-prepare.sh \ .ci/linux-build.sh \ .ci/linux-prepare.sh \ .ci/osx-build.sh \ @@ -365,7 +367,7 @@ ALL_LOCAL += manpage-check manpage-check: $(man_MANS) $(dist_man_MANS) $(noinst_man_MANS) @error=false; \ for manpage in $?; do \ - LANG=en_US.UTF-8 groff -w mac -w delim -w escape -w input -w missing -w tab -T utf8 -man -p -z $$manpage >$@.tmp 2>&1; \ + LANG=en_US.UTF-8 groff -t -w mac -w delim -w escape -w input -w missing -w tab -T utf8 -man -p -z $$manpage >$@.tmp 2>&1; \ if grep warning: $@.tmp; then error=:; fi; \ rm -f $@.tmp; \ done; \ @@ -412,7 +414,7 @@ endif CLEANFILES += flake8-check -include manpages.mk -manpages.mk: $(MAN_ROOTS) build-aux/sodepends.py python/build/soutil.py +manpages.mk: $(MAN_ROOTS) build-aux/sodepends.py python/ovs_build_helpers/soutil.py @PYTHONPATH=$$PYTHONPATH$(psep)$(srcdir)/python $(PYTHON3) $(srcdir)/build-aux/sodepends.py -I. -I$(srcdir) $(MAN_ROOTS) >$(@F).tmp @if cmp -s $(@F).tmp $@; then \ touch $@; \ diff --git a/NEWS b/NEWS index 37a01dea5..8fbf7219d 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,16 @@ +v3.1.3 - xx xxx xxxx +-------------------- + +v3.1.2 - 27 Jun 2023 +-------------------- + - Bug fixes + +v3.1.1 - 06 Apr 2023 +-------------------- + - Bug fixes + - Security: + * Fixed vulnerability CVE-2023-1668. + v3.1.0 - 16 Feb 2023 -------------------- - ovs-vswitchd now detects changes in CPU affinity and adjusts the number diff --git a/build-aux/extract-ofp-fields b/build-aux/extract-ofp-fields index efec59c25..05d3e1df3 100755 --- a/build-aux/extract-ofp-fields +++ b/build-aux/extract-ofp-fields @@ -4,9 +4,9 @@ import getopt import sys import os.path import xml.dom.minidom -import build.nroff -from build.extract_ofp_fields import ( +from ovs_build_helpers import nroff +from ovs_build_helpers.extract_ofp_fields import ( extract_ofp_fields, PREREQS, OXM_CLASSES, @@ -216,7 +216,7 @@ def field_to_xml(field_node, f, body, summary): """.PP \\fB%s Field\\fR .TS -tab(;); +tab(;),nowarn; l lx. """ % title @@ -297,7 +297,7 @@ l lx. body += [".TE\n"] body += [".PP\n"] - body += [build.nroff.block_xml_to_nroff(field_node.childNodes)] + body += [nroff.block_xml_to_nroff(field_node.childNodes)] def group_xml_to_nroff(group_node, fields): @@ -310,14 +310,14 @@ def group_xml_to_nroff(group_node, fields): id_ = node.attributes["id"].nodeValue field_to_xml(node, fields[id_], body, summary) else: - body += [build.nroff.block_xml_to_nroff([node])] + body += [nroff.block_xml_to_nroff([node])] content = [ ".bp\n", - '.SH "%s"\n' % build.nroff.text_to_nroff(title.upper() + " FIELDS"), + '.SH "%s"\n' % nroff.text_to_nroff(title.upper() + " FIELDS"), '.SS "Summary:"\n', ".TS\n", - "tab(;);\n", + "tab(;),nowarn;\n", "l l l l l l l.\n", "Name;Bytes;Mask;RW?;Prereqs;NXM/OXM Support\n", "\_;\_;\_;\_;\_;\_\n", @@ -329,7 +329,7 @@ def group_xml_to_nroff(group_node, fields): def make_oxm_classes_xml(document): - s = """tab(;); + s = """tab(;),nowarn; l l l. Prefix;Vendor;Class \_;\_;\_ @@ -422,7 +422,7 @@ ovs\-fields \- protocol header fields in OpenFlow and Open vSwitch elif node.nodeType == node.COMMENT_NODE: pass else: - s += build.nroff.block_xml_to_nroff([node]) + s += nroff.block_xml_to_nroff([node]) for f in fields: if "used" not in f: diff --git a/build-aux/gen_ofp_field_decoders b/build-aux/gen_ofp_field_decoders index 0b797ee8c..0cb6108c2 100755 --- a/build-aux/gen_ofp_field_decoders +++ b/build-aux/gen_ofp_field_decoders @@ -2,7 +2,7 @@ import argparse -import build.extract_ofp_fields as extract_fields +from ovs_build_helpers.extract_ofp_fields import extract_ofp_fields def main(): @@ -19,7 +19,7 @@ def main(): args = parser.parse_args() - fields = extract_fields.extract_ofp_fields(args.metaflow) + fields = extract_ofp_fields(args.metaflow) field_decoders = {} aliases = {} diff --git a/build-aux/sodepends.py b/build-aux/sodepends.py index 45812bcbd..ac8dd61a4 100755 --- a/build-aux/sodepends.py +++ b/build-aux/sodepends.py @@ -14,9 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from build import soutil import sys +from ovs_build_helpers import soutil + def sodepends(include_dirs, filenames, dst): ok = True diff --git a/build-aux/soexpand.py b/build-aux/soexpand.py index 00adcf47a..7d4dc0486 100755 --- a/build-aux/soexpand.py +++ b/build-aux/soexpand.py @@ -14,9 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from build import soutil import sys +from ovs_build_helpers import soutil + def soexpand(include_dirs, src, dst): ok = True diff --git a/build-aux/xml2nroff b/build-aux/xml2nroff index ee5553f45..3e937910b 100755 --- a/build-aux/xml2nroff +++ b/build-aux/xml2nroff @@ -18,7 +18,7 @@ import getopt import sys import xml.dom.minidom -import build.nroff +from ovs_build_helpers import nroff argv0 = sys.argv[0] @@ -90,10 +90,10 @@ def manpage_to_nroff(xml_file, subst, include_path, version=None): . I "\\$1" . RE .. -''' % (build.nroff.text_to_nroff(program), build.nroff.text_to_nroff(section), - build.nroff.text_to_nroff(title), build.nroff.text_to_nroff(version)) +''' % (nroff.text_to_nroff(program), nroff.text_to_nroff(section), + nroff.text_to_nroff(title), nroff.text_to_nroff(version)) - s += build.nroff.block_xml_to_nroff(doc.childNodes) + "\n" + s += nroff.block_xml_to_nroff(doc.childNodes) + "\n" return s @@ -139,7 +139,7 @@ if __name__ == "__main__": try: s = manpage_to_nroff(args[0], subst, include_path, version) - except build.nroff.error.Error as e: + except nroff.error.Error as e: sys.stderr.write("%s: %s\n" % (argv0, e.msg)) sys.exit(1) for line in s.splitlines(): diff --git a/configure.ac b/configure.ac index 9bf896c01..f4fb551ae 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 3.1.0, bugs@openvswitch.org) +AC_INIT(openvswitch, 3.1.3, bugs@openvswitch.org) AC_CONFIG_SRCDIR([vswitchd/ovs-vswitchd.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) @@ -21,7 +21,11 @@ AC_CONFIG_HEADERS([config.h]) AC_CONFIG_TESTDIR([tests]) AM_INIT_AUTOMAKE([tar-pax]) -AC_PROG_CC_C99 +# AC_PROG_CC doesn't try enabling C99 in autoconf 2.69 and below, but +# AC_PROG_CC_C99 is deprecated in newer ones. In autoconf 2.70+ both +# will try enabling features up to C11. +m4_version_prereq([2.70], [AC_PROG_CC], [AC_PROG_CC_C99]) + AM_PROG_CC_C_O AC_PROG_CXX AC_PROG_CPP diff --git a/debian/changelog b/debian/changelog index a5ad222c4..b2f60255c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,21 @@ +openvswitch (3.1.3-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Tue, 27 Jun 2023 14:09:46 +0200 + +openvswitch (3.1.2-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Tue, 27 Jun 2023 14:09:46 +0200 + +openvswitch (3.1.1-1) unstable; urgency=low + [ Open vSwitch team ] + * New upstream version + + -- Open vSwitch team Thu, 06 Apr 2023 15:10:30 +0200 + openvswitch (3.1.0-1) unstable; urgency=low * New upstream version diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h index cf009f826..52614a5ac 100644 --- a/include/openvswitch/compiler.h +++ b/include/openvswitch/compiler.h @@ -37,6 +37,16 @@ #define OVS_NO_RETURN #endif +#ifndef typeof +#define typeof __typeof__ +#endif + +#ifndef __cplusplus +#ifndef asm +#define asm __asm__ +#endif +#endif + #if __GNUC__ && !__CHECKER__ #define OVS_UNUSED __attribute__((__unused__)) #define OVS_PRINTF_FORMAT(FMT, ARG1) __attribute__((__format__(printf, FMT, ARG1))) diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h index 045dce8f5..3b0220aaa 100644 --- a/include/openvswitch/meta-flow.h +++ b/include/openvswitch/meta-flow.h @@ -2366,6 +2366,10 @@ void mf_format_subvalue(const union mf_subvalue *subvalue, struct ds *s); void field_array_set(enum mf_field_id id, const union mf_value *, struct field_array *); +/* Mask the required l3 prerequisites if a 'set' action occurs. */ +void mf_set_mask_l3_prereqs(const struct mf_field *, const struct flow *, + struct flow_wildcards *); + #ifdef __cplusplus } #endif diff --git a/lib/automake.mk b/lib/automake.mk index e64ee76ce..24b0ffefe 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -451,7 +451,7 @@ lib_libsflow_la_SOURCES = \ lib/sflow_poller.c \ lib/sflow_receiver.c lib_libsflow_la_CPPFLAGS = $(AM_CPPFLAGS) -lib_libsflow_la_CFLAGS = $(AM_CFLAGS) +lib_libsflow_la_CFLAGS = $(AM_CFLAGS) -D_BSD_SOURCE -D_DEFAULT_SOURCE if HAVE_WNO_UNUSED lib_libsflow_la_CFLAGS += -Wno-unused endif diff --git a/lib/classifier.c b/lib/classifier.c index 0a89626cc..18dbfc83a 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -1695,6 +1695,8 @@ find_match_wc(const struct cls_subtable *subtable, ovs_version_t version, const struct cls_match *rule = NULL; struct flowmap stages_map = FLOWMAP_EMPTY_INITIALIZER; unsigned int mask_offset = 0; + bool adjust_ports_mask = false; + ovs_be32 ports_mask; int i; /* Try to finish early by checking fields in segments. */ @@ -1722,6 +1724,9 @@ find_match_wc(const struct cls_subtable *subtable, ovs_version_t version, subtable->index_maps[i], flow, wc)) { goto no_match; } + /* Accumulate the map used so far. */ + stages_map = flowmap_or(stages_map, subtable->index_maps[i]); + hash = flow_hash_in_minimask_range(flow, &subtable->mask, subtable->index_maps[i], &mask_offset, &basis); @@ -1731,14 +1736,16 @@ find_match_wc(const struct cls_subtable *subtable, ovs_version_t version, * unwildcarding all the ports bits, use the ports trie to figure out a * smaller set of bits to unwildcard. */ unsigned int mbits; - ovs_be32 value, plens, mask; + ovs_be32 value, plens; - mask = miniflow_get_ports(&subtable->mask.masks); - value = ((OVS_FORCE ovs_be32 *)flow)[TP_PORTS_OFS32] & mask; + ports_mask = miniflow_get_ports(&subtable->mask.masks); + value = ((OVS_FORCE ovs_be32 *) flow)[TP_PORTS_OFS32] & ports_mask; mbits = trie_lookup_value(&subtable->ports_trie, &value, &plens, 32); - ((OVS_FORCE ovs_be32 *)&wc->masks)[TP_PORTS_OFS32] |= - mask & be32_prefix_mask(mbits); + ports_mask &= be32_prefix_mask(mbits); + ports_mask |= ((OVS_FORCE ovs_be32 *) &wc->masks)[TP_PORTS_OFS32]; + + adjust_ports_mask = true; goto no_match; } @@ -1751,6 +1758,14 @@ no_match: /* Unwildcard the bits in stages so far, as they were used in determining * there is no match. */ flow_wildcards_fold_minimask_in_map(wc, &subtable->mask, stages_map); + if (adjust_ports_mask) { + /* This has to be done after updating flow wildcards to overwrite + * the ports mask back. We can't simply disable the corresponding bit + * in the stages map, because it has 64-bit resolution, i.e. one + * bit covers not only tp_src/dst, but also ct_tp_src/dst, which are + * not covered by the trie. */ + ((OVS_FORCE ovs_be32 *) &wc->masks)[TP_PORTS_OFS32] = ports_mask; + } return NULL; } diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index fae8b3a9b..ba6e75dee 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -49,6 +49,12 @@ struct ct_endpoint { * hashing in ct_endpoint_hash_add(). */ BUILD_ASSERT_DECL(sizeof(struct ct_endpoint) == sizeof(union ct_addr) + 4); +enum key_dir { + CT_DIR_FWD = 0, + CT_DIR_REV, + CT_DIRS, +}; + /* Changes to this structure need to be reflected in conn_key_hash() * and conn_key_cmp(). */ struct conn_key { @@ -112,20 +118,18 @@ enum ct_timeout { #define N_EXP_LISTS 100 -enum OVS_PACKED_ENUM ct_conn_type { - CT_CONN_TYPE_DEFAULT, - CT_CONN_TYPE_UN_NAT, +struct conn_key_node { + enum key_dir dir; + struct conn_key key; + struct cmap_node cm_node; }; struct conn { /* Immutable data. */ - struct conn_key key; - struct conn_key rev_key; + struct conn_key_node key_node[CT_DIRS]; struct conn_key parent_key; /* Only used for orig_tuple support. */ - struct cmap_node cm_node; uint16_t nat_action; char *alg; - struct conn *nat_conn; /* The NAT 'conn' context, if there is one. */ atomic_flag reclaimed; /* False during the lifetime of the connection, * True as soon as a thread has started freeing * its memory. */ @@ -150,7 +154,6 @@ struct conn { /* Immutable data. */ bool alg_related; /* True if alg data connection. */ - enum ct_conn_type conn_type; uint32_t tp_id; /* Timeout policy ID. */ }; diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c index 89cb2704a..2149fdc73 100644 --- a/lib/conntrack-tp.c +++ b/lib/conntrack-tp.c @@ -253,7 +253,8 @@ conn_update_expiration(struct conntrack *ct, struct conn *conn, } VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d " "val=%u sec.", - ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); + ct_timeout_str[tm], conn->key_node[CT_DIR_FWD].key.zone, + conn->tp_id, val); atomic_store_relaxed(&conn->expiration, now + val * 1000); } @@ -273,7 +274,8 @@ conn_init_expiration(struct conntrack *ct, struct conn *conn, } VLOG_DBG_RL(&rl, "Init timeout %s zone=%u with policy id=%d val=%u sec.", - ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); + ct_timeout_str[tm], conn->key_node[CT_DIR_FWD].key.zone, + conn->tp_id, val); conn->expiration = now + val * 1000; } diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h index 4d411d19f..7ece2eae2 100644 --- a/lib/conntrack-tp.h +++ b/lib/conntrack-tp.h @@ -17,8 +17,15 @@ #ifndef CONNTRACK_TP_H #define CONNTRACK_TP_H 1 +#include + #define CT_DPIF_NETDEV_TP_MIN 30 + enum ct_timeout; +struct conn; +struct conntrack; +struct timeout_policy; + void timeout_policy_init(struct conntrack *ct); int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp); int timeout_policy_delete(struct conntrack *ct, uint32_t tp_id); diff --git a/lib/conntrack.c b/lib/conntrack.c index 524670e45..2470c1689 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -101,7 +101,7 @@ static enum ct_update_res conn_update(struct conntrack *ct, struct conn *conn, struct conn_lookup_ctx *ctx, long long now); static long long int conn_expiration(const struct conn *); -static bool conn_expired(struct conn *, long long now); +static bool conn_expired(const struct conn *, long long now); static void conn_expire_push_front(struct conntrack *ct, struct conn *conn); static void set_mark(struct dp_packet *, struct conn *, uint32_t val, uint32_t mask); @@ -111,8 +111,7 @@ static void set_label(struct dp_packet *, struct conn *, static void *clean_thread_main(void *f_); static bool -nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, - struct conn *nat_conn, +nat_get_unique_tuple(struct conntrack *ct, struct conn *conn, const struct nat_action_info_t *nat_info); static uint8_t @@ -206,7 +205,7 @@ static alg_helper alg_helpers[] = { #define ALG_WC_SRC_PORT 0 /* If the total number of connections goes above this value, no new connections - * are accepted; this is for CT_CONN_TYPE_DEFAULT connections. */ + * are accepted. */ #define DEFAULT_N_CONN_LIMIT 3000000 /* Does a member by member comparison of two conn_keys; this @@ -232,61 +231,6 @@ conn_key_cmp(const struct conn_key *key1, const struct conn_key *key2) return 1; } -static void -ct_print_conn_info(const struct conn *c, const char *log_msg, - enum vlog_level vll, bool force, bool rl_on) -{ -#define CT_VLOG(RL_ON, LEVEL, ...) \ - do { \ - if (RL_ON) { \ - static struct vlog_rate_limit rl_ = VLOG_RATE_LIMIT_INIT(5, 5); \ - vlog_rate_limit(&this_module, LEVEL, &rl_, __VA_ARGS__); \ - } else { \ - vlog(&this_module, LEVEL, __VA_ARGS__); \ - } \ - } while (0) - - if (OVS_UNLIKELY(force || vlog_is_enabled(&this_module, vll))) { - if (c->key.dl_type == htons(ETH_TYPE_IP)) { - CT_VLOG(rl_on, vll, "%s: src ip "IP_FMT" dst ip "IP_FMT" rev src " - "ip "IP_FMT" rev dst ip "IP_FMT" src/dst ports " - "%"PRIu16"/%"PRIu16" rev src/dst ports " - "%"PRIu16"/%"PRIu16" zone/rev zone " - "%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto " - "%"PRIu8"/%"PRIu8, log_msg, - IP_ARGS(c->key.src.addr.ipv4), - IP_ARGS(c->key.dst.addr.ipv4), - IP_ARGS(c->rev_key.src.addr.ipv4), - IP_ARGS(c->rev_key.dst.addr.ipv4), - ntohs(c->key.src.port), ntohs(c->key.dst.port), - ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port), - c->key.zone, c->rev_key.zone, c->key.nw_proto, - c->rev_key.nw_proto); - } else { - char ip6_s[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, &c->key.src.addr.ipv6, ip6_s, sizeof ip6_s); - char ip6_d[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, &c->key.dst.addr.ipv6, ip6_d, sizeof ip6_d); - char ip6_rs[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, &c->rev_key.src.addr.ipv6, ip6_rs, - sizeof ip6_rs); - char ip6_rd[INET6_ADDRSTRLEN]; - inet_ntop(AF_INET6, &c->rev_key.dst.addr.ipv6, ip6_rd, - sizeof ip6_rd); - - CT_VLOG(rl_on, vll, "%s: src ip %s dst ip %s rev src ip %s" - " rev dst ip %s src/dst ports %"PRIu16"/%"PRIu16 - " rev src/dst ports %"PRIu16"/%"PRIu16" zone/rev zone " - "%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto " - "%"PRIu8"/%"PRIu8, log_msg, ip6_s, ip6_d, ip6_rs, - ip6_rd, ntohs(c->key.src.port), ntohs(c->key.dst.port), - ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port), - c->key.zone, c->rev_key.zone, c->key.nw_proto, - c->rev_key.nw_proto); - } - } -} - /* Initializes the connection tracker 'ct'. The caller is responsible for * calling 'conntrack_destroy()', when the instance is not needed anymore */ struct conntrack * @@ -474,28 +418,27 @@ conn_clean__(struct conntrack *ct, struct conn *conn) uint32_t hash; if (conn->alg) { - expectation_clean(ct, &conn->key); + expectation_clean(ct, &conn->key_node[CT_DIR_FWD].key); } - hash = conn_key_hash(&conn->key, ct->hash_basis); - cmap_remove(&ct->conns, &conn->cm_node, hash); + hash = conn_key_hash(&conn->key_node[CT_DIR_FWD].key, ct->hash_basis); + cmap_remove(&ct->conns, &conn->key_node[CT_DIR_FWD].cm_node, hash); - if (conn->nat_conn) { - hash = conn_key_hash(&conn->nat_conn->key, ct->hash_basis); - cmap_remove(&ct->conns, &conn->nat_conn->cm_node, hash); + if (conn->nat_action) { + hash = conn_key_hash(&conn->key_node[CT_DIR_REV].key, + ct->hash_basis); + cmap_remove(&ct->conns, &conn->key_node[CT_DIR_REV].cm_node, hash); } rculist_remove(&conn->node); } -/* Must be called with 'conn' of 'conn_type' CT_CONN_TYPE_DEFAULT. Also - * removes the associated nat 'conn' from the lookup datastructures. */ +/* Also removes the associated nat 'conn' from the lookup + datastructures. */ static void conn_clean(struct conntrack *ct, struct conn *conn) OVS_EXCLUDED(conn->lock, ct->ct_lock) { - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); - if (atomic_flag_test_and_set(&conn->reclaimed)) { return; } @@ -582,34 +525,39 @@ conn_key_lookup(struct conntrack *ct, const struct conn_key *key, uint32_t hash, long long now, struct conn **conn_out, bool *reply) { - struct conn *conn; + struct conn_key_node *keyn; + struct conn *conn = NULL; bool found = false; - CMAP_FOR_EACH_WITH_HASH (conn, cm_node, hash, &ct->conns) { + CMAP_FOR_EACH_WITH_HASH (keyn, cm_node, hash, &ct->conns) { + if (keyn->dir == CT_DIR_FWD) { + conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]); + } else { + conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_REV]); + } + if (conn_expired(conn, now)) { continue; } - if (!conn_key_cmp(&conn->key, key)) { - found = true; - if (reply) { - *reply = false; - } - break; - } - if (!conn_key_cmp(&conn->rev_key, key)) { - found = true; - if (reply) { - *reply = true; + + for (int i = CT_DIR_FWD; i < CT_DIRS; i++) { + if (!conn_key_cmp(&conn->key_node[i].key, key)) { + found = true; + if (reply) { + *reply = (i == CT_DIR_REV); + } + goto out_found; } - break; } } +out_found: if (found && conn_out) { *conn_out = conn; } else if (conn_out) { *conn_out = NULL; } + return found; } @@ -643,7 +591,7 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn, if (conn->alg_related) { key = &conn->parent_key; } else { - key = &conn->key; + key = &conn->key_node[CT_DIR_FWD].key; } } else if (alg_exp) { pkt->md.ct_mark = alg_exp->parent_mark; @@ -872,7 +820,8 @@ nat_inner_packet(struct dp_packet *pkt, struct conn_key *key, static void nat_packet(struct dp_packet *pkt, struct conn *conn, bool reply, bool related) { - struct conn_key *key = reply ? &conn->key : &conn->rev_key; + enum key_dir dir = reply ? CT_DIR_FWD : CT_DIR_REV; + struct conn_key *key = &conn->key_node[dir].key; uint16_t nat_action = reply ? nat_action_reverse(conn->nat_action) : conn->nat_action; @@ -906,7 +855,7 @@ conn_seq_skew_set(struct conntrack *ct, const struct conn *conn_in, { struct conn *conn; - conn_lookup(ct, &conn_in->key, now, &conn, NULL); + conn_lookup(ct, &conn_in->key_node[CT_DIR_FWD].key, now, &conn, NULL); if (conn && seq_skew) { conn->seq_skew = seq_skew; conn->seq_skew_dir = seq_skew_dir; @@ -942,7 +891,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, OVS_REQUIRES(ct->ct_lock) { struct conn *nc = NULL; - struct conn *nat_conn = NULL; if (!valid_new(pkt, &ctx->key)) { pkt->md.ct_state = CS_INVALID; @@ -956,6 +904,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, } if (commit) { + struct conn_key_node *fwd_key_node, *rev_key_node; struct zone_limit *zl = zone_limit_lookup_or_default(ct, ctx->key.zone); if (zl && atomic_count_get(&zl->czl.count) >= zl->czl.limit) { @@ -970,9 +919,12 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, } nc = new_conn(ct, pkt, &ctx->key, now, tp_id); - memcpy(&nc->key, &ctx->key, sizeof nc->key); - memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key); - conn_key_reverse(&nc->rev_key); + fwd_key_node = &nc->key_node[CT_DIR_FWD]; + rev_key_node = &nc->key_node[CT_DIR_REV]; + memcpy(&fwd_key_node->key, &ctx->key, sizeof fwd_key_node->key); + memcpy(&rev_key_node->key, &fwd_key_node->key, + sizeof rev_key_node->key); + conn_key_reverse(&rev_key_node->key); if (ct_verify_helper(helper, ct_alg_ctl)) { nc->alg = nullable_xstrdup(helper); @@ -987,46 +939,33 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, if (nat_action_info) { nc->nat_action = nat_action_info->nat_action; - nat_conn = xzalloc(sizeof *nat_conn); if (alg_exp) { if (alg_exp->nat_rpl_dst) { - nc->rev_key.dst.addr = alg_exp->alg_nat_repl_addr; + rev_key_node->key.dst.addr = alg_exp->alg_nat_repl_addr; nc->nat_action = NAT_ACTION_SRC; } else { - nc->rev_key.src.addr = alg_exp->alg_nat_repl_addr; + rev_key_node->key.src.addr = alg_exp->alg_nat_repl_addr; nc->nat_action = NAT_ACTION_DST; } } else { - memcpy(nat_conn, nc, sizeof *nat_conn); - bool nat_res = nat_get_unique_tuple(ct, nc, nat_conn, - nat_action_info); - + bool nat_res = nat_get_unique_tuple(ct, nc, nat_action_info); if (!nat_res) { goto nat_res_exhaustion; } - - /* Update nc with nat adjustments made to nat_conn by - * nat_get_unique_tuple(). */ - memcpy(nc, nat_conn, sizeof *nc); } nat_packet(pkt, nc, false, ctx->icmp_related); - memcpy(&nat_conn->key, &nc->rev_key, sizeof nat_conn->key); - memcpy(&nat_conn->rev_key, &nc->key, sizeof nat_conn->rev_key); - nat_conn->conn_type = CT_CONN_TYPE_UN_NAT; - nat_conn->nat_action = 0; - nat_conn->alg = NULL; - nat_conn->nat_conn = NULL; - uint32_t nat_hash = conn_key_hash(&nat_conn->key, ct->hash_basis); - cmap_insert(&ct->conns, &nat_conn->cm_node, nat_hash); + uint32_t rev_hash = conn_key_hash(&rev_key_node->key, + ct->hash_basis); + cmap_insert(&ct->conns, &rev_key_node->cm_node, rev_hash); } - nc->nat_conn = nat_conn; ovs_mutex_init_adaptive(&nc->lock); - nc->conn_type = CT_CONN_TYPE_DEFAULT; atomic_flag_clear(&nc->reclaimed); - cmap_insert(&ct->conns, &nc->cm_node, ctx->hash); + fwd_key_node->dir = CT_DIR_FWD; + rev_key_node->dir = CT_DIR_REV; + cmap_insert(&ct->conns, &fwd_key_node->cm_node, ctx->hash); conn_expire_push_front(ct, nc); atomic_count_inc(&ct->n_conn); ctx->conn = nc; /* For completeness. */ @@ -1047,7 +986,6 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, * firewall rules or a separate firewall. Also using zone partitioning * can limit DoS impact. */ nat_res_exhaustion: - free(nat_conn); delete_conn__(nc); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); VLOG_WARN_RL(&rl, "Unable to NAT due to tuple space exhaustion - " @@ -1060,7 +998,6 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, struct conn_lookup_ctx *ctx, struct conn *conn, long long now) { - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); bool create_new_conn = false; if (ctx->icmp_related) { @@ -1087,7 +1024,8 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, pkt->md.ct_state = CS_INVALID; break; case CT_UPDATE_NEW: - if (conn_lookup(ct, &conn->key, now, NULL, NULL)) { + if (conn_lookup(ct, &conn->key_node[CT_DIR_FWD].key, + now, NULL, NULL)) { conn_force_expire(conn); } create_new_conn = true; @@ -1263,8 +1201,10 @@ initial_conn_lookup(struct conntrack *ct, struct conn_lookup_ctx *ctx, if (natted) { if (OVS_LIKELY(ctx->conn)) { + enum key_dir dir; ctx->reply = !ctx->reply; - ctx->key = ctx->reply ? ctx->conn->rev_key : ctx->conn->key; + dir = ctx->reply ? CT_DIR_REV : CT_DIR_FWD; + ctx->key = ctx->conn->key_node[dir].key; ctx->hash = conn_key_hash(&ctx->key, ct->hash_basis); } else { /* A lookup failure does not necessarily imply that an @@ -1297,31 +1237,13 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, /* Delete found entry if in wrong direction. 'force' implies commit. */ if (OVS_UNLIKELY(force && ctx->reply && conn)) { - if (conn_lookup(ct, &conn->key, now, NULL, NULL)) { + if (conn_lookup(ct, &conn->key_node[CT_DIR_FWD].key, + now, NULL, NULL)) { conn_force_expire(conn); } conn = NULL; } - if (OVS_LIKELY(conn)) { - if (conn->conn_type == CT_CONN_TYPE_UN_NAT) { - - ctx->reply = true; - struct conn *rev_conn = conn; /* Save for debugging. */ - uint32_t hash = conn_key_hash(&conn->rev_key, ct->hash_basis); - conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply); - - if (!conn) { - pkt->md.ct_state |= CS_INVALID; - write_ct_md(pkt, zone, NULL, NULL, NULL); - char *log_msg = xasprintf("Missing parent conn %p", rev_conn); - ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true); - free(log_msg); - return; - } - } - } - enum ct_alg_ctl_type ct_alg_ctl = get_alg_ctl_type(pkt, tp_src, tp_dst, helper); @@ -1414,8 +1336,9 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, struct conn *conn = packet->md.conn; if (OVS_UNLIKELY(packet->md.ct_state == CS_INVALID)) { write_ct_md(packet, zone, NULL, NULL, NULL); - } else if (conn && conn->key.zone == zone && !force - && !get_alg_ctl_type(packet, tp_src, tp_dst, helper)) { + } else if (conn && + conn->key_node[CT_DIR_FWD].key.zone == zone && !force && + !get_alg_ctl_type(packet, tp_src, tp_dst, helper)) { process_one_fast(zone, setmark, setlabel, nat_action_info, conn, packet); } else if (OVS_UNLIKELY(!conn_key_extract(ct, packet, dl_type, &ctx, @@ -1512,12 +1435,12 @@ conntrack_clean(struct conntrack *ct, long long now) clean_end = n_conn_limit / 64; for (i = ct->next_sweep; i < N_EXP_LISTS; i++) { - count += ct_sweep(ct, &ct->exp_lists[i], now); - if (count > clean_end) { next_wakeup = 0; break; } + + count += ct_sweep(ct, &ct->exp_lists[i], now); } ct->next_sweep = (i < N_EXP_LISTS) ? i : 0; @@ -2172,7 +2095,7 @@ nat_ipv6_addr_increment(struct in6_addr *ipv6, uint32_t increment) } static uint32_t -nat_range_hash(const struct conn *conn, uint32_t basis, +nat_range_hash(const struct conn_key *key, uint32_t basis, const struct nat_action_info_t *nat_info) { uint32_t hash = basis; @@ -2182,11 +2105,11 @@ nat_range_hash(const struct conn *conn, uint32_t basis, hash = hash_add(hash, ((uint32_t) nat_info->max_port << 16) | nat_info->min_port); - hash = ct_endpoint_hash_add(hash, &conn->key.src); - hash = ct_endpoint_hash_add(hash, &conn->key.dst); - hash = hash_add(hash, (OVS_FORCE uint32_t) conn->key.dl_type); - hash = hash_add(hash, conn->key.nw_proto); - hash = hash_add(hash, conn->key.zone); + hash = ct_endpoint_hash_add(hash, &key->src); + hash = ct_endpoint_hash_add(hash, &key->dst); + hash = hash_add(hash, (OVS_FORCE uint32_t) key->dl_type); + hash = hash_add(hash, key->nw_proto); + hash = hash_add(hash, key->zone); /* The purpose of the second parameter is to distinguish hashes of data of * different length; our data always has the same length so there is no @@ -2260,7 +2183,7 @@ get_addr_in_range(union ct_addr *min, union ct_addr *max, } static void -find_addr(const struct conn *conn, union ct_addr *min, +find_addr(const struct conn_key *key, union ct_addr *min, union ct_addr *max, union ct_addr *curr, uint32_t hash, bool ipv4, const struct nat_action_info_t *nat_info) @@ -2270,9 +2193,9 @@ find_addr(const struct conn *conn, union ct_addr *min, /* All-zero case. */ if (!memcmp(min, &zero_ip, sizeof *min)) { if (nat_info->nat_action & NAT_ACTION_SRC) { - *curr = conn->key.src.addr; + *curr = key->src.addr; } else if (nat_info->nat_action & NAT_ACTION_DST) { - *curr = conn->key.dst.addr; + *curr = key->dst.addr; } } else { get_addr_in_range(min, max, curr, hash, ipv4); @@ -2291,7 +2214,7 @@ store_addr_to_key(union ct_addr *addr, struct conn_key *key, } static bool -nat_get_unique_l4(struct conntrack *ct, struct conn *nat_conn, +nat_get_unique_l4(struct conntrack *ct, struct conn_key *rev_key, ovs_be16 *port, uint16_t curr, uint16_t min, uint16_t max) { @@ -2314,8 +2237,7 @@ another_round: } *port = htons(curr); - if (!conn_lookup(ct, &nat_conn->rev_key, - time_msec(), NULL, NULL)) { + if (!conn_lookup(ct, rev_key, time_msec(), NULL, NULL)) { return true; } } @@ -2353,53 +2275,49 @@ another_round: * * If none can be found, return exhaustion to the caller. */ static bool -nat_get_unique_tuple(struct conntrack *ct, const struct conn *conn, - struct conn *nat_conn, +nat_get_unique_tuple(struct conntrack *ct, struct conn *conn, const struct nat_action_info_t *nat_info) { - uint32_t hash = nat_range_hash(conn, ct->hash_basis, nat_info); + struct conn_key *fwd_key = &conn->key_node[CT_DIR_FWD].key; + struct conn_key *rev_key = &conn->key_node[CT_DIR_REV].key; union ct_addr min_addr = {0}, max_addr = {0}, addr = {0}; - bool pat_proto = conn->key.nw_proto == IPPROTO_TCP || - conn->key.nw_proto == IPPROTO_UDP; + bool pat_proto = fwd_key->nw_proto == IPPROTO_TCP || + fwd_key->nw_proto == IPPROTO_UDP; uint16_t min_dport, max_dport, curr_dport; uint16_t min_sport, max_sport, curr_sport; + uint32_t hash; + hash = nat_range_hash(fwd_key, ct->hash_basis, nat_info); min_addr = nat_info->min_addr; max_addr = nat_info->max_addr; - find_addr(conn, &min_addr, &max_addr, &addr, hash, - (conn->key.dl_type == htons(ETH_TYPE_IP)), nat_info); + find_addr(fwd_key, &min_addr, &max_addr, &addr, hash, + (fwd_key->dl_type == htons(ETH_TYPE_IP)), nat_info); - set_sport_range(nat_info, &conn->key, hash, &curr_sport, + set_sport_range(nat_info, fwd_key, hash, &curr_sport, &min_sport, &max_sport); - set_dport_range(nat_info, &conn->key, hash, &curr_dport, + set_dport_range(nat_info, fwd_key, hash, &curr_dport, &min_dport, &max_dport); if (pat_proto) { - nat_conn->rev_key.src.port = htons(curr_dport); - nat_conn->rev_key.dst.port = htons(curr_sport); + rev_key->src.port = htons(curr_dport); + rev_key->dst.port = htons(curr_sport); } - store_addr_to_key(&addr, &nat_conn->rev_key, - nat_info->nat_action); + store_addr_to_key(&addr, rev_key, nat_info->nat_action); if (!pat_proto) { - if (!conn_lookup(ct, &nat_conn->rev_key, - time_msec(), NULL, NULL)) { - return true; - } - - return false; + return !conn_lookup(ct, rev_key, time_msec(), NULL, NULL); } bool found = false; if (nat_info->nat_action & NAT_ACTION_DST_PORT) { - found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.src.port, + found = nat_get_unique_l4(ct, rev_key, &rev_key->src.port, curr_dport, min_dport, max_dport); } if (!found) { - found = nat_get_unique_l4(ct, nat_conn, &nat_conn->rev_key.dst.port, + found = nat_get_unique_l4(ct, rev_key, &rev_key->dst.port, curr_sport, min_sport, max_sport); } @@ -2415,9 +2333,9 @@ conn_update(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt, struct conn_lookup_ctx *ctx, long long now) { ovs_mutex_lock(&conn->lock); + uint8_t nw_proto = conn->key_node[CT_DIR_FWD].key.nw_proto; enum ct_update_res update_res = - l4_protos[conn->key.nw_proto]->conn_update(ct, conn, pkt, ctx->reply, - now); + l4_protos[nw_proto]->conn_update(ct, conn, pkt, ctx->reply, now); ovs_mutex_unlock(&conn->lock); return update_res; } @@ -2443,12 +2361,9 @@ conn_expiration(const struct conn *conn) } static bool -conn_expired(struct conn *conn, long long now) +conn_expired(const struct conn *conn, long long now) { - if (conn->conn_type == CT_CONN_TYPE_DEFAULT) { - return now >= conn_expiration(conn); - } - return false; + return now >= conn_expiration(conn); } static bool @@ -2474,9 +2389,7 @@ delete_conn__(struct conn *conn) static void delete_conn(struct conn *conn) { - ovs_assert(conn->conn_type == CT_CONN_TYPE_DEFAULT); ovs_mutex_destroy(&conn->lock); - free(conn->nat_conn); delete_conn__(conn); } @@ -2569,15 +2482,18 @@ static void conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry, long long now) { + const struct conn_key *rev_key = &conn->key_node[CT_DIR_REV].key; + const struct conn_key *key = &conn->key_node[CT_DIR_FWD].key; + memset(entry, 0, sizeof *entry); - conn_key_to_tuple(&conn->key, &entry->tuple_orig); - conn_key_to_tuple(&conn->rev_key, &entry->tuple_reply); + conn_key_to_tuple(key, &entry->tuple_orig); + conn_key_to_tuple(rev_key, &entry->tuple_reply); if (conn->alg_related) { conn_key_to_tuple(&conn->parent_key, &entry->tuple_parent); } - entry->zone = conn->key.zone; + entry->zone = key->zone; ovs_mutex_lock(&conn->lock); entry->mark = conn->mark; @@ -2585,7 +2501,7 @@ conn_to_ct_dpif_entry(const struct conn *conn, struct ct_dpif_entry *entry, long long expiration = conn_expiration(conn) - now; - struct ct_l4_proto *class = l4_protos[conn->key.nw_proto]; + struct ct_l4_proto *class = l4_protos[key->nw_proto]; if (class->conn_get_protoinfo) { class->conn_get_protoinfo(conn, &entry->protoinfo); } @@ -2633,15 +2549,20 @@ conntrack_dump_next(struct conntrack_dump *dump, struct ct_dpif_entry *entry) if (!cm_node) { break; } + struct conn_key_node *keyn; struct conn *conn; - INIT_CONTAINER(conn, cm_node, cm_node); + INIT_CONTAINER(keyn, cm_node, cm_node); + if (keyn->dir != CT_DIR_FWD) { + continue; + } + + conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]); if (conn_expired(conn, now)) { continue; } - if ((!dump->filter_zone || conn->key.zone == dump->zone) && - (conn->conn_type != CT_CONN_TYPE_UN_NAT)) { + if (!dump->filter_zone || keyn->key.zone == dump->zone) { conn_to_ct_dpif_entry(conn, entry, now); return 0; } @@ -2659,14 +2580,15 @@ conntrack_dump_done(struct conntrack_dump *dump OVS_UNUSED) int conntrack_flush(struct conntrack *ct, const uint16_t *zone) { + struct conn_key_node *keyn; struct conn *conn; - CMAP_FOR_EACH (conn, cm_node, &ct->conns) { - if (conn->conn_type != CT_CONN_TYPE_DEFAULT) { + CMAP_FOR_EACH (keyn, cm_node, &ct->conns) { + if (keyn->dir != CT_DIR_FWD) { continue; } - - if (!zone || *zone == conn->key.zone) { + conn = CONTAINER_OF(keyn, struct conn, key_node[CT_DIR_FWD]); + if (!zone || *zone == keyn->key.zone) { conn_clean(ct, conn); } } @@ -2678,18 +2600,18 @@ int conntrack_flush_tuple(struct conntrack *ct, const struct ct_dpif_tuple *tuple, uint16_t zone) { - int error = 0; struct conn_key key; struct conn *conn; + int error = 0; memset(&key, 0, sizeof(key)); tuple_to_conn_key(tuple, zone, &key); conn_lookup(ct, &key, time_msec(), &conn, NULL); - if (conn && conn->conn_type == CT_CONN_TYPE_DEFAULT) { + if (conn) { conn_clean(ct, conn); } else { - VLOG_WARN("Must flush tuple using the original pre-NATed tuple"); + VLOG_WARN("Tuple not found"); error = ENOENT; } @@ -2832,50 +2754,54 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port, const struct conn *parent_conn, bool reply, bool src_ip_wc, bool skip_nat) { + const struct conn_key *pconn_key, *pconn_rev_key; union ct_addr src_addr; union ct_addr dst_addr; union ct_addr alg_nat_repl_addr; struct alg_exp_node *alg_exp_node = xzalloc(sizeof *alg_exp_node); + pconn_key = &parent_conn->key_node[CT_DIR_FWD].key; + pconn_rev_key = &parent_conn->key_node[CT_DIR_REV].key; + if (reply) { - src_addr = parent_conn->key.src.addr; - dst_addr = parent_conn->key.dst.addr; + src_addr = pconn_key->src.addr; + dst_addr = pconn_key->dst.addr; alg_exp_node->nat_rpl_dst = true; if (skip_nat) { alg_nat_repl_addr = dst_addr; } else if (parent_conn->nat_action & NAT_ACTION_DST) { - alg_nat_repl_addr = parent_conn->rev_key.src.addr; + alg_nat_repl_addr = pconn_rev_key->src.addr; alg_exp_node->nat_rpl_dst = false; } else { - alg_nat_repl_addr = parent_conn->rev_key.dst.addr; + alg_nat_repl_addr = pconn_rev_key->dst.addr; } } else { - src_addr = parent_conn->rev_key.src.addr; - dst_addr = parent_conn->rev_key.dst.addr; + src_addr = pconn_rev_key->src.addr; + dst_addr = pconn_rev_key->dst.addr; alg_exp_node->nat_rpl_dst = false; if (skip_nat) { alg_nat_repl_addr = src_addr; } else if (parent_conn->nat_action & NAT_ACTION_DST) { - alg_nat_repl_addr = parent_conn->key.dst.addr; + alg_nat_repl_addr = pconn_key->dst.addr; alg_exp_node->nat_rpl_dst = true; } else { - alg_nat_repl_addr = parent_conn->key.src.addr; + alg_nat_repl_addr = pconn_key->src.addr; } } if (src_ip_wc) { memset(&src_addr, 0, sizeof src_addr); } - alg_exp_node->key.dl_type = parent_conn->key.dl_type; - alg_exp_node->key.nw_proto = parent_conn->key.nw_proto; - alg_exp_node->key.zone = parent_conn->key.zone; + alg_exp_node->key.dl_type = pconn_key->dl_type; + alg_exp_node->key.nw_proto = pconn_key->nw_proto; + alg_exp_node->key.zone = pconn_key->zone; alg_exp_node->key.src.addr = src_addr; alg_exp_node->key.dst.addr = dst_addr; alg_exp_node->key.src.port = ALG_WC_SRC_PORT; alg_exp_node->key.dst.port = dst_port; alg_exp_node->parent_mark = parent_conn->mark; alg_exp_node->parent_label = parent_conn->label; - memcpy(&alg_exp_node->parent_key, &parent_conn->key, + memcpy(&alg_exp_node->parent_key, pconn_key, sizeof alg_exp_node->parent_key); /* Take the write lock here because it is almost 100% * likely that the lookup will fail and @@ -3127,12 +3053,16 @@ process_ftp_ctl_v4(struct conntrack *ct, switch (mode) { case CT_FTP_MODE_ACTIVE: - *v4_addr_rep = conn_for_expectation->rev_key.dst.addr.ipv4; - conn_ipv4_addr = conn_for_expectation->key.src.addr.ipv4; + *v4_addr_rep = + conn_for_expectation->key_node[CT_DIR_REV].key.dst.addr.ipv4; + conn_ipv4_addr = + conn_for_expectation->key_node[CT_DIR_FWD].key.src.addr.ipv4; break; case CT_FTP_MODE_PASSIVE: - *v4_addr_rep = conn_for_expectation->key.dst.addr.ipv4; - conn_ipv4_addr = conn_for_expectation->rev_key.src.addr.ipv4; + *v4_addr_rep = + conn_for_expectation->key_node[CT_DIR_FWD].key.dst.addr.ipv4; + conn_ipv4_addr = + conn_for_expectation->key_node[CT_DIR_REV].key.src.addr.ipv4; break; case CT_TFTP_MODE: default: @@ -3164,7 +3094,7 @@ skip_ipv6_digits(char *str) static enum ftp_ctl_pkt process_ftp_ctl_v6(struct conntrack *ct, struct dp_packet *pkt, - const struct conn *conn_for_expectation, + const struct conn *conn_for_exp, union ct_addr *v6_addr_rep, char **ftp_data_start, size_t *addr_offset_from_ftp_data_start, size_t *addr_size, enum ct_alg_mode *mode) @@ -3232,24 +3162,25 @@ process_ftp_ctl_v6(struct conntrack *ct, switch (*mode) { case CT_FTP_MODE_ACTIVE: - *v6_addr_rep = conn_for_expectation->rev_key.dst.addr; + *v6_addr_rep = conn_for_exp->key_node[CT_DIR_REV].key.dst.addr; /* Although most servers will block this exploit, there may be some * less well managed. */ if (memcmp(&ip6_addr, &v6_addr_rep->ipv6, sizeof ip6_addr) && - memcmp(&ip6_addr, &conn_for_expectation->key.src.addr.ipv6, + memcmp(&ip6_addr, + &conn_for_exp->key_node[CT_DIR_FWD].key.src.addr.ipv6, sizeof ip6_addr)) { return CT_FTP_CTL_INVALID; } break; case CT_FTP_MODE_PASSIVE: - *v6_addr_rep = conn_for_expectation->key.dst.addr; + *v6_addr_rep = conn_for_exp->key_node[CT_DIR_FWD].key.dst.addr; break; case CT_TFTP_MODE: default: OVS_NOT_REACHED(); } - expectation_create(ct, port, conn_for_expectation, + expectation_create(ct, port, conn_for_exp, !!(pkt->md.ct_state & CS_REPLY_DIR), false, false); return CT_FTP_CTL_INTEREST; } @@ -3403,7 +3334,8 @@ handle_tftp_ctl(struct conntrack *ct, long long now OVS_UNUSED, enum ftp_ctl_pkt ftp_ctl OVS_UNUSED, bool nat OVS_UNUSED) { - expectation_create(ct, conn_for_expectation->key.src.port, + expectation_create(ct, + conn_for_expectation->key_node[CT_DIR_FWD].key.src.port, conn_for_expectation, !!(pkt->md.ct_state & CS_REPLY_DIR), false, false); } diff --git a/lib/cpu.c b/lib/cpu.c index 0292f715e..fbbea4005 100644 --- a/lib/cpu.c +++ b/lib/cpu.c @@ -37,7 +37,9 @@ static bool x86_has_isa(uint32_t leaf, enum x86_reg reg, uint32_t bit) { uint32_t regs[4]; - ovs_assert(__get_cpuid_max(leaf & X86_LEAF_MASK, NULL) >= leaf); + if (__get_cpuid_max(leaf & X86_LEAF_MASK, NULL) < leaf) { + return false; + } __cpuid_count(leaf, 0, regs[EAX], regs[EBX], regs[ECX], regs[EDX]); return (regs[reg] & ((uint32_t) 1 << bit)) != 0; diff --git a/lib/db-ctl-base.c b/lib/db-ctl-base.c index 134496ef3..5d2635946 100644 --- a/lib/db-ctl-base.c +++ b/lib/db-ctl-base.c @@ -1492,7 +1492,7 @@ cmd_add(struct ctl_context *ctx) const struct ovsdb_idl_column *column; const struct ovsdb_idl_row *row; const struct ovsdb_type *type; - struct ovsdb_datum new; + struct ovsdb_datum old; int i; ctx->error = get_table(table_name, &table); @@ -1516,13 +1516,7 @@ cmd_add(struct ctl_context *ctx) } type = &column->type; - - if (ctx->last_command) { - ovsdb_datum_init_empty(&new); - } else { - ovsdb_datum_clone(&new, ovsdb_idl_read(row, column)); - } - + ovsdb_datum_clone(&old, ovsdb_idl_read(row, column)); for (i = 4; i < ctx->argc; i++) { struct ovsdb_type add_type; struct ovsdb_datum add; @@ -1533,41 +1527,23 @@ cmd_add(struct ctl_context *ctx) ctx->error = ovsdb_datum_from_string(&add, &add_type, ctx->argv[i], ctx->symtab); if (ctx->error) { - ovsdb_datum_destroy(&new, &column->type); + ovsdb_datum_destroy(&old, &column->type); return; } - ovsdb_datum_union(&new, &add, type); + ovsdb_datum_union(&old, &add, type); ovsdb_datum_destroy(&add, type); } - - if (!ctx->last_command && new.n > type->n_max) { + if (old.n > type->n_max) { ctl_error(ctx, "\"add\" operation would put %u %s in column %s of " "table %s but the maximum number is %u", - new.n, + old.n, type->value.type == OVSDB_TYPE_VOID ? "values" : "pairs", column->name, table->name, type->n_max); - ovsdb_datum_destroy(&new, &column->type); + ovsdb_datum_destroy(&old, &column->type); return; } - - if (ctx->last_command) { - /* Partial updates can only be made one by one. */ - for (i = 0; i < new.n; i++) { - struct ovsdb_datum *datum = xmalloc(sizeof *datum); - - ovsdb_datum_init_empty(datum); - ovsdb_datum_add_from_index_unsafe(datum, &new, i, type); - if (ovsdb_type_is_map(type)) { - ovsdb_idl_txn_write_partial_map(row, column, datum); - } else { - ovsdb_idl_txn_write_partial_set(row, column, datum); - } - } - ovsdb_datum_destroy(&new, &column->type); - } else { - ovsdb_idl_txn_verify(row, column); - ovsdb_idl_txn_write(row, column, &new); - } + ovsdb_idl_txn_verify(row, column); + ovsdb_idl_txn_write(row, column, &old); invalidate_cache(ctx); } diff --git a/lib/dpctl.c b/lib/dpctl.c index d12d9b8a5..41b23d8ae 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -673,7 +673,7 @@ show_dpif(struct dpif *dpif, struct dpctl_params *dpctl_p) } for (int i = 0; i < n_port_nos; i++) { - if (dpif_port_query_by_number(dpif, port_nos[i], &dpif_port)) { + if (dpif_port_query_by_number(dpif, port_nos[i], &dpif_port, true)) { continue; } @@ -1713,10 +1713,16 @@ dpctl_flush_conntrack(int argc, const char *argv[], uint16_t zone, *pzone = NULL; int error; int args = argc - 1; + int zone_pos = 1; + + if (dp_arg_exists(argc, argv)) { + args--; + zone_pos = 2; + } /* Parse zone. */ - if (args && !strncmp(argv[1], "zone=", 5)) { - if (!ovs_scan(argv[1], "zone=%"SCNu16, &zone)) { + if (args && !strncmp(argv[zone_pos], "zone=", 5)) { + if (!ovs_scan(argv[zone_pos], "zone=%"SCNu16, &zone)) { ds_put_cstr(&ds, "failed to parse zone"); error = EINVAL; goto error; @@ -1744,7 +1750,7 @@ dpctl_flush_conntrack(int argc, const char *argv[], } /* Report error if there is more than one unparsed argument. */ - if (args > 1) { + if (args > 0) { ds_put_cstr(&ds, "invalid arguments"); error = EINVAL; goto error; @@ -2196,7 +2202,7 @@ parse_ct_limit_zones(const char *argv, struct ovs_list *zone_limits, argcopy = xstrdup(argv + 5); next_zone = strtok_r(argcopy, ",", &save_ptr); - do { + while (next_zone != NULL) { if (ovs_scan(next_zone, "%"SCNu16, &zone)) { ct_dpif_push_zone_limit(zone_limits, zone, 0, 0); } else { @@ -2204,7 +2210,8 @@ parse_ct_limit_zones(const char *argv, struct ovs_list *zone_limits, free(argcopy); return EINVAL; } - } while ((next_zone = strtok_r(NULL, ",", &save_ptr)) != NULL); + next_zone = strtok_r(NULL, ",", &save_ptr); + } free(argcopy); return 0; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c9f7179c3..10371359c 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -3365,14 +3365,13 @@ static inline void netdev_flow_key_init(struct netdev_flow_key *key, const struct flow *flow) { - uint64_t *dst = miniflow_values(&key->mf); uint32_t hash = 0; uint64_t value; miniflow_map_init(&key->mf, flow); miniflow_init(&key->mf, flow); - size_t n = dst - miniflow_get_values(&key->mf); + size_t n = miniflow_n_values(&key->mf); FLOW_FOR_EACH_IN_MAPS (value, flow, key->mf.map) { hash = hash_add64(hash, value); @@ -4191,7 +4190,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, const struct dpif_flow_put *put, struct dpif_flow_stats *stats) { - struct dp_netdev_flow *netdev_flow; + struct dp_netdev_flow *netdev_flow = NULL; int error = 0; if (stats) { @@ -4199,16 +4198,35 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, } ovs_mutex_lock(&pmd->flow_mutex); - netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL); - if (!netdev_flow) { - if (put->flags & DPIF_FP_CREATE) { - dp_netdev_flow_add(pmd, match, ufid, put->actions, - put->actions_len, ODPP_NONE); + if (put->ufid) { + netdev_flow = dp_netdev_pmd_find_flow(pmd, put->ufid, + put->key, put->key_len); + } else { + /* Use key instead of the locally generated ufid + * to search netdev_flow. */ + netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL); + } + + if (put->flags & DPIF_FP_CREATE) { + if (!netdev_flow) { + dp_netdev_flow_add(pmd, match, ufid, + put->actions, put->actions_len, ODPP_NONE); } else { - error = ENOENT; + error = EEXIST; } - } else { - if (put->flags & DPIF_FP_MODIFY) { + goto exit; + } + + if (put->flags & DPIF_FP_MODIFY) { + if (!netdev_flow) { + error = ENOENT; + } else { + if (!put->ufid && !flow_equal(&match->flow, &netdev_flow->flow)) { + /* Overlapping flow. */ + error = EINVAL; + goto exit; + } + struct dp_netdev_actions *new_actions; struct dp_netdev_actions *old_actions; @@ -4239,15 +4257,11 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, * counter, and subtracting it before outputting the stats */ error = EOPNOTSUPP; } - ovsrcu_postpone(dp_netdev_actions_free, old_actions); - } else if (put->flags & DPIF_FP_CREATE) { - error = EEXIST; - } else { - /* Overlapping flow. */ - error = EINVAL; } } + +exit: ovs_mutex_unlock(&pmd->flow_mutex); return error; } @@ -9616,6 +9630,7 @@ dpif_netdev_bond_stats_get(struct dpif *dpif, uint32_t bond_id, const struct dpif_class dpif_netdev_class = { "netdev", true, /* cleanup_required */ + true, /* synced_dp_layers */ dpif_netdev_init, dpif_netdev_enumerate, dpif_netdev_port_open_type, diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 026b0daa8..dc56e863f 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -395,7 +395,7 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name, dp_request.user_features |= OVS_DP_F_UNALIGNED; dp_request.user_features |= OVS_DP_F_VPORT_PIDS; dp_request.user_features |= OVS_DP_F_UNSUPPORTED; - error = dpif_netlink_dp_transact(&dp_request, &dp, &buf); + error = dpif_netlink_dp_transact(&dp_request, NULL, NULL); if (error) { /* The Open vSwitch kernel module has two modes for dispatching * upcalls: per-vport and per-cpu. @@ -2582,7 +2582,7 @@ dpif_netlink_calculate_n_handlers(void) n_handlers = MIN(next_prime_num, total_cores); } - return n_handlers; + return MAX(n_handlers, 1); } static int @@ -4515,6 +4515,7 @@ dpif_netlink_cache_set_size(struct dpif *dpif_, uint32_t level, uint32_t size) const struct dpif_class dpif_netlink_class = { "system", false, /* cleanup_required */ + false, /* synced_dp_layers */ NULL, /* init */ dpif_netlink_enumerate, NULL, diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 12477a24f..b8ead8a02 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -127,6 +127,14 @@ struct dpif_class { * datapaths that can not exist without it (e.g. netdev datapath). */ bool cleanup_required; + /* If 'true' the specific dpif implementation synchronizes the various + * datapath implementation layers, i.e., the dpif's layer in combination + * with the underlying netdev offload layers. For example, dpif-netlink + * does not sync its kernel flows with the tc ones, i.e., only one gets + * installed. On the other hand, dpif-netdev installs both flows, + * internally keeps track of both, and represents them as one. */ + bool synced_dp_layers; + /* Called when the dpif provider is registered, typically at program * startup. Returning an error from this function will prevent any * datapath with this class from being created. diff --git a/lib/dpif.c b/lib/dpif.c index fe4db83fb..4397aeaf4 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -701,13 +701,14 @@ dpif_port_set_config(struct dpif *dpif, odp_port_t port_no, * initializes '*port' appropriately; on failure, returns a positive errno * value. * - * Retuns ENODEV if the port doesn't exist. + * Retuns ENODEV if the port doesn't exist. Will not log a warning in this + * case unless 'warn_if_not_found' is true. * * The caller owns the data in 'port' and must free it with * dpif_port_destroy() when it is no longer needed. */ int dpif_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, - struct dpif_port *port) + struct dpif_port *port, bool warn_if_not_found) { int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port); if (!error) { @@ -715,8 +716,13 @@ dpif_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, dpif_name(dpif), port_no, port->name); } else { memset(port, 0, sizeof *port); - VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu32": %s", - dpif_name(dpif), port_no, ovs_strerror(error)); + if (error == ENODEV && !warn_if_not_found) { + VLOG_DBG_RL(&dpmsg_rl, "%s: failed to query port %"PRIu32": %s", + dpif_name(dpif), port_no, ovs_strerror(error)); + } else { + VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu32": %s", + dpif_name(dpif), port_no, ovs_strerror(error)); + } } return error; } @@ -784,7 +790,7 @@ dpif_port_get_name(struct dpif *dpif, odp_port_t port_no, ovs_assert(name_size > 0); - error = dpif_port_query_by_number(dpif, port_no, &port); + error = dpif_port_query_by_number(dpif, port_no, &port, true); if (!error) { ovs_strlcpy(name, port.name, name_size); dpif_port_destroy(&port); @@ -2109,3 +2115,9 @@ dpif_cache_set_size(struct dpif *dpif, uint32_t level, uint32_t size) ? dpif->dpif_class->cache_set_size(dpif, level, size) : EOPNOTSUPP; } + +bool +dpif_synced_dp_layers(struct dpif *dpif) +{ + return dpif->dpif_class->synced_dp_layers; +} diff --git a/lib/dpif.h b/lib/dpif.h index 6cb4dae6d..2a3649720 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -461,7 +461,7 @@ void dpif_port_clone(struct dpif_port *, const struct dpif_port *); void dpif_port_destroy(struct dpif_port *); bool dpif_port_exists(const struct dpif *dpif, const char *devname); int dpif_port_query_by_number(const struct dpif *, odp_port_t port_no, - struct dpif_port *); + struct dpif_port *, bool warn_if_not_found); int dpif_port_query_by_name(const struct dpif *, const char *devname, struct dpif_port *); int dpif_port_get_name(struct dpif *, odp_port_t port_no, @@ -939,6 +939,7 @@ int dpif_get_pmds_for_port(const struct dpif * dpif, odp_port_t port_no, char *dpif_get_dp_version(const struct dpif *); bool dpif_supports_tnl_push_pop(const struct dpif *); bool dpif_supports_explicit_drop_action(const struct dpif *); +bool dpif_synced_dp_layers(struct dpif *); /* Log functions. */ struct vlog_module; diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c index bbb31ef27..7054f8e74 100644 --- a/lib/fatal-signal.c +++ b/lib/fatal-signal.c @@ -78,6 +78,39 @@ static void call_hooks(int sig_nr); static BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType); #endif +/* Sets up a pipe or event handle that will be used to wake up the current + * process after signal is received, so it can be processed outside of the + * signal handler context in fatal_signal_run(). */ +static void +fatal_signal_create_wakeup_events(void) +{ +#ifndef _WIN32 + xpipe_nonblocking(signal_fds); +#else + wevent = CreateEvent(NULL, TRUE, FALSE, NULL); + if (!wevent) { + char *msg_buf = ovs_lasterror_to_string(); + VLOG_FATAL("Failed to create a event (%s).", msg_buf); + } +#endif +} + +static void +fatal_signal_destroy_wakeup_events(void) +{ +#ifndef _WIN32 + close(signal_fds[0]); + signal_fds[0] = -1; + close(signal_fds[1]); + signal_fds[1] = -1; +#else + ResetEvent(wevent); + CloseHandle(wevent); + wevent = NULL; +#endif +} + + /* Initializes the fatal signal handling module. Calling this function is * optional, because calling any other function in the module will also * initialize it. However, in a multithreaded program, the module must be @@ -94,15 +127,10 @@ fatal_signal_init(void) inited = true; ovs_mutex_init_recursive(&mutex); -#ifndef _WIN32 - xpipe_nonblocking(signal_fds); -#else - wevent = CreateEvent(NULL, TRUE, FALSE, NULL); - if (!wevent) { - char *msg_buf = ovs_lasterror_to_string(); - VLOG_FATAL("Failed to create a event (%s).", msg_buf); - } + fatal_signal_create_wakeup_events(); + +#ifdef _WIN32 /* Register a function to handle Ctrl+C. */ SetConsoleCtrlHandler(ConsoleHandlerRoutine, true); #endif @@ -456,6 +484,9 @@ do_unlink_files(void) * hooks passed a 'cancel_cb' function to fatal_signal_add_hook(), then those * functions will be called, allowing them to free resources, etc. * + * Also re-creates wake-up events, so signals in one of the processes do not + * wake up the other one. + * * Following a fork, one of the resulting processes can call this function to * allow it to terminate without calling the hooks registered before calling * this function. New hooks registered after calling this function will take @@ -467,6 +498,9 @@ fatal_signal_fork(void) assert_single_threaded(); + fatal_signal_destroy_wakeup_events(); + fatal_signal_create_wakeup_events(); + for (i = 0; i < n_hooks; i++) { struct hook *h = &hooks[i]; if (h->cancel_cb) { diff --git a/lib/learning-switch.c b/lib/learning-switch.c index 8102475ca..cdf42935c 100644 --- a/lib/learning-switch.c +++ b/lib/learning-switch.c @@ -569,6 +569,7 @@ process_packet_in(struct lswitch *sw, const struct ofp_header *oh) } /* Prepare packet_out in case we need one. */ + match_init_catchall(&po.flow_metadata); po.buffer_id = buffer_id; if (buffer_id == UINT32_MAX) { po.packet = dp_packet_data(&pkt); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index c576ae620..474344194 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -3676,3 +3676,28 @@ mf_bitmap_not(struct mf_bitmap x) bitmap_not(x.bm, MFF_N_IDS); return x; } + +void +mf_set_mask_l3_prereqs(const struct mf_field *mf, const struct flow *fl, + struct flow_wildcards *wc) +{ + if (is_ip_any(fl) && + ((mf->id == MFF_IPV4_SRC) || + (mf->id == MFF_IPV4_DST) || + (mf->id == MFF_IPV6_SRC) || + (mf->id == MFF_IPV6_DST) || + (mf->id == MFF_IPV6_LABEL) || + (mf->id == MFF_IP_DSCP) || + (mf->id == MFF_IP_ECN) || + (mf->id == MFF_IP_TTL))) { + WC_MASK_FIELD(wc, nw_proto); + } else if ((fl->dl_type == htons(ETH_TYPE_ARP)) && + ((mf->id == MFF_ARP_OP) || + (mf->id == MFF_ARP_SHA) || + (mf->id == MFF_ARP_THA) || + (mf->id == MFF_ARP_SPA) || + (mf->id == MFF_ARP_TPA))) { + /* mask only the lower 8 bits. */ + wc->masks.nw_proto = 0xff; + } +} diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml index a1a20366d..ac72a44bc 100644 --- a/lib/meta-flow.xml +++ b/lib/meta-flow.xml @@ -3517,23 +3517,24 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123)

+tab(;); r r r r r. -Criteria OpenFlow 1.0 OpenFlow 1.1 OpenFlow 1.2+ NXM -\_ \_ \_ \_ \_ -[1] \fL????\fR/\fL1\fR,\fL??\fR/\fL?\fR \fL????\fR/\fL1\fR,\fL??\fR/\fL?\fR \fL0000\fR/\fL0000\fR,\fL--\fR \fL0000\fR/\fL0000\fR -[2] \fLffff\fR/\fL0\fR,\fL??\fR/\fL?\fR \fLffff\fR/\fL0\fR,\fL??\fR/\fL?\fR \fL0000\fR/\fLffff\fR,\fL--\fR \fL0000\fR/\fLffff\fR -[3] \fL0xxx\fR/\fL0\fR,\fL??\fR/\fL1\fR \fL0xxx\fR/\fL0\fR,\fL??\fR/\fL1\fR \fL1xxx\fR/\fLffff\fR,\fL--\fR \fL1xxx\fR/\fL1fff\fR -[4] \fL????\fR/\fL1\fR,\fL0y\fR/\fL0\fR \fLfffe\fR/\fL0\fR,\fL0y\fR/\fL0\fR \fL1000\fR/\fL1000\fR,\fL0y\fR \fLz000\fR/\fLf000\fR -[5] \fL0xxx\fR/\fL0\fR,\fL0y\fR/\fL0\fR \fL0xxx\fR/\fL0\fR,\fL0y\fR/\fL0\fR \fL1xxx\fR/\fLffff\fR,\fL0y\fR \fLzxxx\fR/\fLffff\fR +Criteria;OpenFlow 1.0;OpenFlow 1.1;OpenFlow 1.2+;NXM +\_;\_;\_;\_;\_ +[1];\fL????\fR/\fL1\fR,\fL??\fR/\fL?\fR;\fL????\fR/\fL1\fR,\fL??\fR/\fL?\fR;\fL0000\fR/\fL0000\fR,\fL--\fR;\fL0000\fR/\fL0000\fR +[2];\fLffff\fR/\fL0\fR,\fL??\fR/\fL?\fR;\fLffff\fR/\fL0\fR,\fL??\fR/\fL?\fR;\fL0000\fR/\fLffff\fR,\fL--\fR;\fL0000\fR/\fLffff\fR +[3];\fL0xxx\fR/\fL0\fR,\fL??\fR/\fL1\fR;\fL0xxx\fR/\fL0\fR,\fL??\fR/\fL1\fR;\fL1xxx\fR/\fLffff\fR,\fL--\fR;\fL1xxx\fR/\fL1fff\fR +[4];\fL????\fR/\fL1\fR,\fL0y\fR/\fL0\fR;\fLfffe\fR/\fL0\fR,\fL0y\fR/\fL0\fR;\fL1000\fR/\fL1000\fR,\fL0y\fR;\fLz000\fR/\fLf000\fR +[5];\fL0xxx\fR/\fL0\fR,\fL0y\fR/\fL0\fR;\fL0xxx\fR/\fL0\fR,\fL0y\fR/\fL0\fR;\fL1xxx\fR/\fLffff\fR,\fL0y\fR;\fLzxxx\fR/\fLffff\fR .T& -r r c c r. -[6] (none) (none) \fL1001\fR/\fL1001\fR,\fL--\fR \fL1001\fR/\fL1001\fR +r c c r r. +[6];(none);(none);\fL1001\fR/\fL1001\fR,\fL--\fR;\fL1001\fR/\fL1001\fR .T& -r r c c c. -[7] (none) (none) (none) \fL3000\fR/\fL3000\fR -[8] (none) (none) (none) \fL0000\fR/\fL0fff\fR -[9] (none) (none) (none) \fL0000\fR/\fLf000\fR -[10] (none) (none) (none) \fL0000\fR/\fLefff\fR +r c c c r. +[7];(none);(none);(none);\fL3000\fR/\fL3000\fR +[8];(none);(none);(none);\fL0000\fR/\fL0fff\fR +[9];(none);(none);(none);\fL0000\fR/\fLf000\fR +[10];(none);(none);(none);\fL0000\fR/\fLefff\fR

@@ -4312,9 +4313,9 @@ r r c c c. - + - + diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index ab5b8223e..ab4a4e9d8 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -489,6 +489,9 @@ struct netdev_dpdk { /* Array of vhost rxq states, see vring_state_changed. */ bool *vhost_rxq_enabled; + + /* Ensures that Rx metadata delivery is configured only once. */ + bool rx_metadata_delivery_configured; ); PADDED_MEMBERS(CACHE_LINE_SIZE, @@ -840,7 +843,7 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu) static struct dpdk_mp * dpdk_mp_get(struct netdev_dpdk *dev, int mtu) { - struct dpdk_mp *dmp, *next; + struct dpdk_mp *dmp = NULL, *next; bool reuse = false; ovs_mutex_lock(&dpdk_mp_mutex); @@ -1140,6 +1143,45 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex) } } +static void +dpdk_eth_dev_init_rx_metadata(struct netdev_dpdk *dev) +{ + uint64_t rx_metadata = 0; + int ret; + + if (dev->rx_metadata_delivery_configured) { + return; + } + + /* For the fallback offload (non-"transfer" rules). */ + rx_metadata |= RTE_ETH_RX_METADATA_USER_MARK; + +#ifdef ALLOW_EXPERIMENTAL_API + /* For the tunnel offload. */ + rx_metadata |= RTE_ETH_RX_METADATA_TUNNEL_ID; +#endif /* ALLOW_EXPERIMENTAL_API */ + + ret = rte_eth_rx_metadata_negotiate(dev->port_id, &rx_metadata); + if (ret == 0) { + if (!(rx_metadata & RTE_ETH_RX_METADATA_USER_MARK)) { + VLOG_DBG("%s: The NIC will not provide per-packet USER_MARK", + netdev_get_name(&dev->up)); + } +#ifdef ALLOW_EXPERIMENTAL_API + if (!(rx_metadata & RTE_ETH_RX_METADATA_TUNNEL_ID)) { + VLOG_DBG("%s: The NIC will not provide per-packet TUNNEL_ID", + netdev_get_name(&dev->up)); + } +#endif /* ALLOW_EXPERIMENTAL_API */ + } else { + VLOG(ret == -ENOTSUP ? VLL_DBG : VLL_WARN, + "%s: Cannot negotiate Rx metadata: %s", + netdev_get_name(&dev->up), rte_strerror(-ret)); + } + + dev->rx_metadata_delivery_configured = true; +} + static int dpdk_eth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex) @@ -1154,6 +1196,18 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_IPV4_CKSUM; + if (netdev_is_flow_api_enabled()) { + /* + * Full tunnel offload requires that tunnel ID metadata be + * delivered with "miss" packets from the hardware to the + * PMD. The same goes for megaflow mark metadata which is + * used in MARK + RSS offload scenario. + * + * Request delivery of such metadata. + */ + dpdk_eth_dev_init_rx_metadata(dev); + } + rte_eth_dev_info_get(dev->port_id, &info); if (strstr(info.driver_name, "vf") != NULL) { @@ -1320,6 +1374,8 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, /* Initilize the hardware offload flags to 0 */ dev->hw_ol_features = 0; + dev->rx_metadata_delivery_configured = false; + dev->flags = NETDEV_UP | NETDEV_PROMISC; ovs_list_push_back(&dpdk_list, &dev->list_node); diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 65bdd51db..94b99a7be 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -2712,8 +2712,16 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate, err = tc_transact(&request, &reply); if (!err) { - struct tcmsg *tc = - ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc); + struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size); + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc); + + if (!nlmsg || !tc) { + VLOG_ERR_RL(&rl, + "Failed to add match all policer, malformed reply"); + ofpbuf_delete(reply); + return EPROTO; + } ofpbuf_delete(reply); } @@ -5732,26 +5740,27 @@ static int tc_update_policer_action_stats(struct ofpbuf *msg, struct ofputil_meter_stats *stats) { + struct ofpbuf b = ofpbuf_const_initializer(msg->data, msg->size); + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca); struct ovs_flow_stats stats_dropped; struct ovs_flow_stats stats_hw; struct ovs_flow_stats stats_sw; const struct nlattr *act; struct nlattr *prio; - struct tcamsg *tca; int error = 0; if (!stats) { goto exit; } - if (NLMSG_HDRLEN + sizeof *tca > msg->size) { + if (!nlmsg || !tca) { VLOG_ERR_RL(&rl, "Failed to get action stats, size error"); error = EPROTO; goto exit; } - tca = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tca); - act = nl_attr_find(msg, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB); + act = nl_attr_find(&b, 0, TCA_ACT_TAB); if (!act) { VLOG_ERR_RL(&rl, "Failed to get action stats, can't find attribute"); error = EPROTO; @@ -6016,20 +6025,26 @@ static int tc_parse_class(const struct ofpbuf *msg, unsigned int *handlep, struct nlattr **options, struct netdev_queue_stats *stats) { + struct ofpbuf b = ofpbuf_const_initializer(msg->data, msg->size); + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc); static const struct nl_policy tca_policy[] = { [TCA_OPTIONS] = { .type = NL_A_NESTED, .optional = false }, [TCA_STATS2] = { .type = NL_A_NESTED, .optional = false }, }; struct nlattr *ta[ARRAY_SIZE(tca_policy)]; - if (!nl_policy_parse(msg, NLMSG_HDRLEN + sizeof(struct tcmsg), - tca_policy, ta, ARRAY_SIZE(ta))) { + if (!nlmsg || !tc) { + VLOG_ERR_RL(&rl, "failed to parse class message, malformed reply"); + goto error; + } + + if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) { VLOG_WARN_RL(&rl, "failed to parse class message"); goto error; } if (handlep) { - struct tcmsg *tc = ofpbuf_at_assert(msg, NLMSG_HDRLEN, sizeof *tc); *handlep = tc->tcm_handle; } diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index b89dfdd52..6c9094638 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -320,7 +320,7 @@ netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data, } static void * -udp_build_header(struct netdev_tunnel_config *tnl_cfg, +udp_build_header(const struct netdev_tunnel_config *tnl_cfg, struct ovs_action_push_tnl *data, const struct netdev_tnl_build_header_params *params) { @@ -452,7 +452,6 @@ netdev_gre_push_header(const struct netdev *netdev, const struct ovs_action_push_tnl *data) { struct netdev_vport *dev = netdev_vport_cast(netdev); - struct netdev_tunnel_config *tnl_cfg; struct gre_base_hdr *greh; int ip_tot_size; @@ -468,8 +467,7 @@ netdev_gre_push_header(const struct netdev *netdev, int seq_ofs = gre_header_len(greh->flags) - 4; ovs_16aligned_be32 *seq_opt = ALIGNED_CAST(ovs_16aligned_be32 *, (char *)greh + seq_ofs); - tnl_cfg = &dev->tnl_cfg; - put_16aligned_be32(seq_opt, htonl(tnl_cfg->seqno++)); + put_16aligned_be32(seq_opt, htonl(atomic_count_inc(&dev->gre_seqno))); } } @@ -478,16 +476,11 @@ netdev_gre_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data, const struct netdev_tnl_build_header_params *params) { - struct netdev_vport *dev = netdev_vport_cast(netdev); - struct netdev_tunnel_config *tnl_cfg; + const struct netdev_tunnel_config *tnl_cfg; struct gre_base_hdr *greh; ovs_16aligned_be32 *options; unsigned int hlen; - /* XXX: RCUfy tnl_cfg. */ - ovs_mutex_lock(&dev->mutex); - tnl_cfg = &dev->tnl_cfg; - greh = netdev_tnl_ip_build_header(data, params, IPPROTO_GRE); if (params->flow->packet_type == htonl(PT_ETH)) { @@ -495,8 +488,7 @@ netdev_gre_build_header(const struct netdev *netdev, } else if (pt_ns(params->flow->packet_type) == OFPHTN_ETHERTYPE) { greh->protocol = pt_ns_type_be(params->flow->packet_type); } else { - ovs_mutex_unlock(&dev->mutex); - return 1; + return EINVAL; } greh->flags = 0; @@ -507,6 +499,8 @@ netdev_gre_build_header(const struct netdev *netdev, options++; } + tnl_cfg = netdev_get_tunnel_config(netdev); + if (tnl_cfg->out_key_present) { greh->flags |= htons(GRE_KEY); put_16aligned_be32(options, be64_to_be32(params->flow->tunnel.tun_id)); @@ -519,8 +513,6 @@ netdev_gre_build_header(const struct netdev *netdev, options++; } - ovs_mutex_unlock(&dev->mutex); - hlen = (uint8_t *) options - (uint8_t *) greh; data->header_len += hlen; @@ -605,7 +597,6 @@ netdev_erspan_push_header(const struct netdev *netdev, const struct ovs_action_push_tnl *data) { struct netdev_vport *dev = netdev_vport_cast(netdev); - struct netdev_tunnel_config *tnl_cfg; struct erspan_base_hdr *ersh; struct gre_base_hdr *greh; struct erspan_md2 *md2; @@ -615,9 +606,8 @@ netdev_erspan_push_header(const struct netdev *netdev, data->header_len, &ip_tot_size); /* update GRE seqno */ - tnl_cfg = &dev->tnl_cfg; ovs_16aligned_be32 *seqno = (ovs_16aligned_be32 *) (greh + 1); - put_16aligned_be32(seqno, htonl(tnl_cfg->seqno++)); + put_16aligned_be32(seqno, htonl(atomic_count_inc(&dev->gre_seqno))); /* update v2 timestamp */ if (greh->protocol == htons(ETH_TYPE_ERSPAN2)) { @@ -632,8 +622,7 @@ netdev_erspan_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data, const struct netdev_tnl_build_header_params *params) { - struct netdev_vport *dev = netdev_vport_cast(netdev); - struct netdev_tunnel_config *tnl_cfg; + const struct netdev_tunnel_config *tnl_cfg; struct gre_base_hdr *greh; struct erspan_base_hdr *ersh; unsigned int hlen; @@ -641,21 +630,19 @@ netdev_erspan_build_header(const struct netdev *netdev, int erspan_ver; uint16_t sid; - /* XXX: RCUfy tnl_cfg. */ - ovs_mutex_lock(&dev->mutex); - tnl_cfg = &dev->tnl_cfg; greh = netdev_tnl_ip_build_header(data, params, IPPROTO_GRE); ersh = ERSPAN_HDR(greh); tun_id = ntohl(be64_to_be32(params->flow->tunnel.tun_id)); /* ERSPAN only has 10-bit session ID */ if (tun_id & ~ERSPAN_SID_MASK) { - ovs_mutex_unlock(&dev->mutex); - return 1; + return EINVAL; } else { sid = (uint16_t) tun_id; } + tnl_cfg = netdev_get_tunnel_config(netdev); + if (tnl_cfg->erspan_ver_flow) { erspan_ver = params->flow->tunnel.erspan_ver; } else { @@ -702,12 +689,9 @@ netdev_erspan_build_header(const struct netdev *netdev, hlen = ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V2_MDSIZE; } else { VLOG_WARN_RL(&err_rl, "ERSPAN version error %d", tnl_cfg->erspan_ver); - ovs_mutex_unlock(&dev->mutex); - return 1; + return EINVAL; } - ovs_mutex_unlock(&dev->mutex); - data->header_len += hlen; if (params->is_ipv6) { @@ -786,7 +770,6 @@ netdev_gtpu_push_header(const struct netdev *netdev, const struct ovs_action_push_tnl *data) { struct netdev_vport *dev = netdev_vport_cast(netdev); - struct netdev_tunnel_config *tnl_cfg; struct udp_header *udp; struct gtpuhdr *gtpuh; int ip_tot_size; @@ -801,10 +784,9 @@ netdev_gtpu_push_header(const struct netdev *netdev, gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1); - tnl_cfg = &dev->tnl_cfg; - if (tnl_cfg->set_seq) { + if (gtpuh->md.flags & GTPU_S_MASK) { ovs_be16 *seqno = ALIGNED_CAST(ovs_be16 *, gtpuh + 1); - *seqno = htons(tnl_cfg->seqno++); + *seqno = htons(atomic_count_inc(&dev->gre_seqno)); payload_len += sizeof(struct gtpuhdr_opt); } gtpuh->len = htons(payload_len); @@ -815,13 +797,12 @@ netdev_gtpu_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data, const struct netdev_tnl_build_header_params *params) { - struct netdev_vport *dev = netdev_vport_cast(netdev); - struct netdev_tunnel_config *tnl_cfg; + const struct netdev_tunnel_config *tnl_cfg; struct gtpuhdr *gtph; unsigned int gtpu_hlen; - ovs_mutex_lock(&dev->mutex); - tnl_cfg = &dev->tnl_cfg; + tnl_cfg = netdev_get_tunnel_config(netdev); + gtph = udp_build_header(tnl_cfg, data, params); /* Set to default if not set in flow. */ @@ -837,7 +818,6 @@ netdev_gtpu_build_header(const struct netdev *netdev, gtph->md.flags |= GTPU_S_MASK; gtpu_hlen += sizeof(struct gtpuhdr_opt); } - ovs_mutex_unlock(&dev->mutex); data->header_len += gtpu_hlen; data->tnl_type = OVS_VPORT_TYPE_GTPU; @@ -920,13 +900,10 @@ netdev_vxlan_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data, const struct netdev_tnl_build_header_params *params) { - struct netdev_vport *dev = netdev_vport_cast(netdev); - struct netdev_tunnel_config *tnl_cfg; + const struct netdev_tunnel_config *tnl_cfg; struct vxlanhdr *vxh; - /* XXX: RCUfy tnl_cfg. */ - ovs_mutex_lock(&dev->mutex); - tnl_cfg = &dev->tnl_cfg; + tnl_cfg = netdev_get_tunnel_config(netdev); vxh = udp_build_header(tnl_cfg, data, params); @@ -951,10 +928,10 @@ netdev_vxlan_build_header(const struct netdev *netdev, vxh->vx_gpe.next_protocol = VXLAN_GPE_NP_ETHERNET; break; default: - goto drop; + return EINVAL; } } else { - goto drop; + return EINVAL; } } else { put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS)); @@ -962,14 +939,9 @@ netdev_vxlan_build_header(const struct netdev *netdev, htonl(ntohll(params->flow->tunnel.tun_id) << 8)); } - ovs_mutex_unlock(&dev->mutex); data->header_len += sizeof *vxh; data->tnl_type = OVS_VPORT_TYPE_VXLAN; return 0; - -drop: - ovs_mutex_unlock(&dev->mutex); - return 1; } struct dp_packet * @@ -1033,22 +1005,14 @@ netdev_geneve_build_header(const struct netdev *netdev, struct ovs_action_push_tnl *data, const struct netdev_tnl_build_header_params *params) { - struct netdev_vport *dev = netdev_vport_cast(netdev); - struct netdev_tunnel_config *tnl_cfg; struct genevehdr *gnh; int opt_len; bool crit_opt; - /* XXX: RCUfy tnl_cfg. */ - ovs_mutex_lock(&dev->mutex); - tnl_cfg = &dev->tnl_cfg; - - gnh = udp_build_header(tnl_cfg, data, params); + gnh = udp_build_header(netdev_get_tunnel_config(netdev), data, params); put_16aligned_be32(&gnh->vni, htonl(ntohll(params->flow->tunnel.tun_id) << 8)); - ovs_mutex_unlock(&dev->mutex); - opt_len = tun_metadata_to_geneve_header(¶ms->flow->tunnel, gnh->options, &crit_opt); diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index b3421c099..2d7858f51 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -2345,13 +2345,13 @@ netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data) ovsrcu_get(void *, &netdev->hw_info.offload_data); data->rte_flow_counters[tid]--; - ufid_to_rte_flow_disassociate(rte_flow_data); VLOG_DBG_RL(&rl, "%s/%s: rte_flow 0x%"PRIxPTR " flow destroy %d ufid " UUID_FMT, netdev_get_name(netdev), netdev_get_name(physdev), (intptr_t) rte_flow, netdev_dpdk_get_port_id(physdev), UUID_ARGS((struct uuid *) ufid)); + ufid_to_rte_flow_disassociate(rte_flow_data); } else { VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT, netdev_get_name(netdev), netdev_get_name(physdev), diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 4c78c4816..09f6393f8 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -276,8 +276,9 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid, } err = tc_del_flower_filter(id); - if (!err) { + if (!err || err == ENODEV) { del_ufid_tc_mapping(ufid); + return 0; } return err; } @@ -524,7 +525,11 @@ delete_chains_from_netdev(struct netdev *netdev, struct tcf_id *id) */ HMAP_FOR_EACH_POP (chain_node, node, &map) { id->chain = chain_node->chain; - tc_del_flower_filter(id); + /* Delete empty chain doesn't seem to work with + * tc_del_flower_filter() so use tc_del_filter() + * without specifying TCA_KIND. + */ + tc_del_filter(id, NULL); free(chain_node); } } @@ -871,7 +876,7 @@ parse_tc_flower_to_actions__(struct tc_flower *flower, struct ofpbuf *buf, outport = netdev_ifindex_to_odp_port(action->out.ifindex_out); if (!outport) { - return ENOENT; + return -ENOENT; } } nl_msg_put_u32(buf, OVS_ACTION_ATTR_OUTPUT, odp_to_u32(outport)); @@ -964,7 +969,7 @@ parse_tc_flower_to_actions__(struct tc_flower *flower, struct ofpbuf *buf, uint32_t meter_id; if (police_idx_lookup(action->police.index, &meter_id)) { - return ENOENT; + return -ENOENT; } nl_msg_put_u32(buf, OVS_ACTION_ATTR_METER, meter_id); } @@ -983,6 +988,9 @@ parse_tc_flower_to_actions__(struct tc_flower *flower, struct ofpbuf *buf, buf, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); i = parse_tc_flower_to_actions__(flower, buf, i + 1, action->police.result_jump); + if (i < 0) { + return i; + } nl_msg_end_nested(buf, act_offset); act_offset = nl_msg_start_nested( @@ -994,6 +1002,9 @@ parse_tc_flower_to_actions__(struct tc_flower *flower, struct ofpbuf *buf, } if (jump != 0) { i = parse_tc_flower_to_actions__(flower, buf, i, jump); + if (i < 0) { + return i; + } } nl_msg_end_nested(buf, act_offset); @@ -1013,11 +1024,11 @@ parse_tc_flower_to_actions__(struct tc_flower *flower, struct ofpbuf *buf, return i; } -static void +static int parse_tc_flower_to_actions(struct tc_flower *flower, struct ofpbuf *buf) { - parse_tc_flower_to_actions__(flower, buf, 0, 0); + return parse_tc_flower_to_actions__(flower, buf, 0, 0); } static int @@ -1030,9 +1041,10 @@ parse_tc_flower_to_match(const struct netdev *netdev, struct ofpbuf *buf, bool terse) { - size_t act_off; struct tc_flower_key *key = &flower->key; struct tc_flower_key *mask = &flower->mask; + size_t act_off; + int err; if (terse) { return parse_tc_flower_terse_to_match(flower, match, stats, attrs); @@ -1229,7 +1241,10 @@ parse_tc_flower_to_match(const struct netdev *netdev, } act_off = nl_msg_start_nested(buf, OVS_FLOW_ATTR_ACTIONS); - parse_tc_flower_to_actions(flower, buf); + err = parse_tc_flower_to_actions(flower, buf); + if (err < 0) { + return -err; + } nl_msg_end_nested(buf, act_off); *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr)); @@ -1272,8 +1287,8 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump, continue; } - if (flower.act_cookie.len) { - *ufid = *((ovs_u128 *) flower.act_cookie.data); + if (flower.act_cookie.len >= sizeof *ufid) { + *ufid = get_32aligned_u128(flower.act_cookie.data); } else if (!find_ufid(netdev, &id, ufid)) { continue; } @@ -2490,15 +2505,23 @@ netdev_tc_flow_get(struct netdev *netdev, err = tc_get_flower(&id, &flower); if (err) { - VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s", + VLOG_ERR_RL(&error_rl, + "flow get failed (dev %s prio %d handle %d): %s", netdev_get_name(netdev), id.prio, id.handle, ovs_strerror(err)); return err; } in_port = netdev_ifindex_to_odp_port(id.ifindex); - parse_tc_flower_to_match(netdev, &flower, match, actions, - stats, attrs, buf, false); + err = parse_tc_flower_to_match(netdev, &flower, match, actions, + stats, attrs, buf, false); + if (err) { + VLOG_ERR_RL(&error_rl, + "flow get parse failed (dev %s prio %d handle %d): %s", + netdev_get_name(netdev), id.prio, id.handle, + ovs_strerror(err)); + return err; + } if (stats) { struct dpif_flow_stats adjust_stats; @@ -2860,8 +2883,9 @@ netdev_tc_init_flow_api(struct netdev *netdev) error = tc_add_del_qdisc(ifindex, true, block_id, hook); if (error && error != EEXIST) { - VLOG_INFO("failed adding ingress qdisc required for offloading: %s", - ovs_strerror(error)); + VLOG_INFO("failed adding ingress qdisc required for offloading " + "on %s: %s", + netdev_get_name(netdev), ovs_strerror(error)); return error; } diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c index 4592262bd..a5fa62487 100644 --- a/lib/netdev-offload.c +++ b/lib/netdev-offload.c @@ -485,11 +485,13 @@ netdev_set_hw_info(struct netdev *netdev, int type, int val) } /* Protects below port hashmaps. */ -static struct ovs_rwlock netdev_hmap_rwlock = OVS_RWLOCK_INITIALIZER; +static struct ovs_rwlock ifindex_to_port_rwlock = OVS_RWLOCK_INITIALIZER; +static struct ovs_rwlock port_to_netdev_rwlock + OVS_ACQ_BEFORE(ifindex_to_port_rwlock) = OVS_RWLOCK_INITIALIZER; -static struct hmap port_to_netdev OVS_GUARDED_BY(netdev_hmap_rwlock) +static struct hmap port_to_netdev OVS_GUARDED_BY(port_to_netdev_rwlock) = HMAP_INITIALIZER(&port_to_netdev); -static struct hmap ifindex_to_port OVS_GUARDED_BY(netdev_hmap_rwlock) +static struct hmap ifindex_to_port OVS_GUARDED_BY(ifindex_to_port_rwlock) = HMAP_INITIALIZER(&ifindex_to_port); struct port_to_netdev_data { @@ -506,12 +508,12 @@ struct port_to_netdev_data { */ bool netdev_any_oor(void) - OVS_EXCLUDED(netdev_hmap_rwlock) + OVS_EXCLUDED(port_to_netdev_rwlock) { struct port_to_netdev_data *data; bool oor = false; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { struct netdev *dev = data->netdev; @@ -520,7 +522,7 @@ netdev_any_oor(void) break; } } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return oor; } @@ -594,13 +596,13 @@ netdev_ports_flow_flush(const char *dpif_type) { struct port_to_netdev_data *data; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { if (netdev_get_dpif_type(data->netdev) == dpif_type) { netdev_flow_flush(data->netdev); } } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); } void @@ -610,7 +612,7 @@ netdev_ports_traverse(const char *dpif_type, { struct port_to_netdev_data *data; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { if (netdev_get_dpif_type(data->netdev) == dpif_type) { if (cb(data->netdev, data->dpif_port.port_no, aux)) { @@ -618,7 +620,7 @@ netdev_ports_traverse(const char *dpif_type, } } } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); } struct netdev_flow_dump ** @@ -629,7 +631,7 @@ netdev_ports_flow_dump_create(const char *dpif_type, int *ports, bool terse) int count = 0; int i = 0; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { if (netdev_get_dpif_type(data->netdev) == dpif_type) { count++; @@ -648,7 +650,7 @@ netdev_ports_flow_dump_create(const char *dpif_type, int *ports, bool terse) i++; } } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); *ports = i; return dumps; @@ -660,15 +662,15 @@ netdev_ports_flow_del(const char *dpif_type, const ovs_u128 *ufid, { struct port_to_netdev_data *data; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { if (netdev_get_dpif_type(data->netdev) == dpif_type && !netdev_flow_del(data->netdev, ufid, stats)) { - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return 0; } } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return ENOENT; } @@ -681,16 +683,16 @@ netdev_ports_flow_get(const char *dpif_type, struct match *match, { struct port_to_netdev_data *data; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { if (netdev_get_dpif_type(data->netdev) == dpif_type && !netdev_flow_get(data->netdev, match, actions, ufid, stats, attrs, buf)) { - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return 0; } } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return ENOENT; } @@ -702,7 +704,7 @@ netdev_ports_hash(odp_port_t port, const char *dpif_type) static struct port_to_netdev_data * netdev_ports_lookup(odp_port_t port_no, const char *dpif_type) - OVS_REQ_RDLOCK(netdev_hmap_rwlock) + OVS_REQ_RDLOCK(port_to_netdev_rwlock) { struct port_to_netdev_data *data; @@ -726,9 +728,9 @@ netdev_ports_insert(struct netdev *netdev, struct dpif_port *dpif_port) ovs_assert(dpif_type); - ovs_rwlock_wrlock(&netdev_hmap_rwlock); + ovs_rwlock_wrlock(&port_to_netdev_rwlock); if (netdev_ports_lookup(dpif_port->port_no, dpif_type)) { - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return EEXIST; } @@ -738,14 +740,16 @@ netdev_ports_insert(struct netdev *netdev, struct dpif_port *dpif_port) if (ifindex >= 0) { data->ifindex = ifindex; + ovs_rwlock_wrlock(&ifindex_to_port_rwlock); hmap_insert(&ifindex_to_port, &data->ifindex_node, ifindex); + ovs_rwlock_unlock(&ifindex_to_port_rwlock); } else { data->ifindex = -1; } hmap_insert(&port_to_netdev, &data->portno_node, netdev_ports_hash(dpif_port->port_no, dpif_type)); - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); netdev_init_flow_api(netdev); @@ -758,12 +762,12 @@ netdev_ports_get(odp_port_t port_no, const char *dpif_type) struct port_to_netdev_data *data; struct netdev *ret = NULL; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); data = netdev_ports_lookup(port_no, dpif_type); if (data) { ret = netdev_ref(data->netdev); } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return ret; } @@ -774,19 +778,21 @@ netdev_ports_remove(odp_port_t port_no, const char *dpif_type) struct port_to_netdev_data *data; int ret = ENOENT; - ovs_rwlock_wrlock(&netdev_hmap_rwlock); + ovs_rwlock_wrlock(&port_to_netdev_rwlock); data = netdev_ports_lookup(port_no, dpif_type); if (data) { dpif_port_destroy(&data->dpif_port); netdev_close(data->netdev); /* unref and possibly close */ hmap_remove(&port_to_netdev, &data->portno_node); if (data->ifindex >= 0) { + ovs_rwlock_wrlock(&ifindex_to_port_rwlock); hmap_remove(&ifindex_to_port, &data->ifindex_node); + ovs_rwlock_unlock(&ifindex_to_port_rwlock); } free(data); ret = 0; } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return ret; } @@ -798,7 +804,7 @@ netdev_ports_get_n_flows(const char *dpif_type, odp_port_t port_no, struct port_to_netdev_data *data; int ret = EOPNOTSUPP; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); data = netdev_ports_lookup(port_no, dpif_type); if (data) { uint64_t thread_n_flows[MAX_OFFLOAD_THREAD_NB] = {0}; @@ -812,7 +818,7 @@ netdev_ports_get_n_flows(const char *dpif_type, odp_port_t port_no, } } } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); return ret; } @@ -822,14 +828,14 @@ netdev_ifindex_to_odp_port(int ifindex) struct port_to_netdev_data *data; odp_port_t ret = 0; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&ifindex_to_port_rwlock); HMAP_FOR_EACH_WITH_HASH (data, ifindex_node, ifindex, &ifindex_to_port) { if (data->ifindex == ifindex) { ret = data->dpif_port.port_no; break; } } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&ifindex_to_port_rwlock); return ret; } @@ -847,11 +853,11 @@ netdev_ports_flow_init(void) { struct port_to_netdev_data *data; - ovs_rwlock_rdlock(&netdev_hmap_rwlock); + ovs_rwlock_rdlock(&port_to_netdev_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { netdev_init_flow_api(data->netdev); } - ovs_rwlock_unlock(&netdev_hmap_rwlock); + ovs_rwlock_unlock(&port_to_netdev_rwlock); } void diff --git a/lib/netdev-vport-private.h b/lib/netdev-vport-private.h index d89a28c66..586231057 100644 --- a/lib/netdev-vport-private.h +++ b/lib/netdev-vport-private.h @@ -22,11 +22,17 @@ #include "compiler.h" #include "netdev.h" #include "netdev-provider.h" +#include "ovs-atomic.h" #include "ovs-thread.h" struct netdev_vport { struct netdev up; + OVSRCU_TYPE(const struct netdev_tunnel_config *) tnl_cfg; + + /* Sequence number for outgoing GRE packets. */ + atomic_count gre_seqno; + /* Protects all members below. */ struct ovs_mutex mutex; @@ -34,7 +40,6 @@ struct netdev_vport { struct netdev_stats stats; /* Tunnels. */ - struct netdev_tunnel_config tnl_cfg; char egress_iface[IFNAMSIZ]; bool carrier_status; diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 3b3927865..4cfe15d5a 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -37,6 +37,7 @@ #include "netdev-provider.h" #include "netdev-vport-private.h" #include "openvswitch/dynamic-string.h" +#include "ovs-atomic.h" #include "ovs-router.h" #include "packets.h" #include "openvswitch/poll-loop.h" @@ -68,8 +69,8 @@ static int get_patch_config(const struct netdev *netdev, struct smap *args); static int get_tunnel_config(const struct netdev *, struct smap *args); static bool tunnel_check_status_change__(struct netdev_vport *); static void update_vxlan_global_cfg(struct netdev *, - struct netdev_tunnel_config *, - struct netdev_tunnel_config *); + const struct netdev_tunnel_config *, + const struct netdev_tunnel_config *); struct vport_class { const char *dpif_port; @@ -90,10 +91,16 @@ vport_class_cast(const struct netdev_class *class) return CONTAINER_OF(class, struct vport_class, netdev_class); } +static const struct netdev_tunnel_config * +vport_tunnel_config(struct netdev_vport *netdev) +{ + return ovsrcu_get(const struct netdev_tunnel_config *, &netdev->tnl_cfg); +} + static const struct netdev_tunnel_config * get_netdev_tunnel_config(const struct netdev *netdev) { - return &netdev_vport_cast(netdev)->tnl_cfg; + return vport_tunnel_config(netdev_vport_cast(netdev)); } bool @@ -134,8 +141,6 @@ netdev_vport_get_dpif_port(const struct netdev *netdev, } if (netdev_vport_needs_dst_port(netdev)) { - const struct netdev_vport *vport = netdev_vport_cast(netdev); - /* * Note: IFNAMSIZ is 16 bytes long. Implementations should choose * a dpif port name that is short enough to fit including any @@ -144,7 +149,7 @@ netdev_vport_get_dpif_port(const struct netdev *netdev, BUILD_ASSERT(NETDEV_VPORT_NAME_BUFSIZE >= IFNAMSIZ); ovs_assert(strlen(dpif_port) + 6 < IFNAMSIZ); snprintf(namebuf, bufsize, "%s_%d", dpif_port, - ntohs(vport->tnl_cfg.dst_port)); + ntohs(netdev_get_tunnel_config(netdev)->dst_port)); return namebuf; } else { return dpif_port; @@ -162,12 +167,14 @@ netdev_vport_route_changed(void) vports = netdev_get_vports(&n_vports); for (i = 0; i < n_vports; i++) { + const struct netdev_tunnel_config *tnl_cfg; struct netdev *netdev_ = vports[i]; struct netdev_vport *netdev = netdev_vport_cast(netdev_); ovs_mutex_lock(&netdev->mutex); /* Finds all tunnel vports. */ - if (ipv6_addr_is_set(&netdev->tnl_cfg.ipv6_dst)) { + tnl_cfg = netdev_get_tunnel_config(netdev_); + if (tnl_cfg && ipv6_addr_is_set(&tnl_cfg->ipv6_dst)) { if (tunnel_check_status_change__(netdev)) { netdev_change_seq_changed(netdev_); } @@ -198,6 +205,7 @@ netdev_vport_construct(struct netdev *netdev_) uint16_t port = 0; ovs_mutex_init(&dev->mutex); + atomic_count_init(&dev->gre_seqno, 0); eth_addr_random(&dev->etheraddr); if (name && dpif_port && (strlen(name) > strlen(dpif_port) + 1) && @@ -206,26 +214,31 @@ netdev_vport_construct(struct netdev *netdev_) port = atoi(p); } + struct netdev_tunnel_config *tnl_cfg = xzalloc(sizeof *tnl_cfg); + /* If a destination port for tunnel ports is specified in the netdev * name, use it instead of the default one. Otherwise, use the default * destination port */ if (!strcmp(type, "geneve")) { - dev->tnl_cfg.dst_port = port ? htons(port) : htons(GENEVE_DST_PORT); + tnl_cfg->dst_port = port ? htons(port) : htons(GENEVE_DST_PORT); } else if (!strcmp(type, "vxlan")) { - dev->tnl_cfg.dst_port = port ? htons(port) : htons(VXLAN_DST_PORT); - update_vxlan_global_cfg(netdev_, NULL, &dev->tnl_cfg); + tnl_cfg->dst_port = port ? htons(port) : htons(VXLAN_DST_PORT); + update_vxlan_global_cfg(netdev_, NULL, tnl_cfg); } else if (!strcmp(type, "lisp")) { - dev->tnl_cfg.dst_port = port ? htons(port) : htons(LISP_DST_PORT); + tnl_cfg->dst_port = port ? htons(port) : htons(LISP_DST_PORT); } else if (!strcmp(type, "stt")) { - dev->tnl_cfg.dst_port = port ? htons(port) : htons(STT_DST_PORT); + tnl_cfg->dst_port = port ? htons(port) : htons(STT_DST_PORT); } else if (!strcmp(type, "gtpu")) { - dev->tnl_cfg.dst_port = port ? htons(port) : htons(GTPU_DST_PORT); + tnl_cfg->dst_port = port ? htons(port) : htons(GTPU_DST_PORT); } else if (!strcmp(type, "bareudp")) { - dev->tnl_cfg.dst_port = htons(port); + tnl_cfg->dst_port = htons(port); } - dev->tnl_cfg.dont_fragment = true; - dev->tnl_cfg.ttl = DEFAULT_TTL; + tnl_cfg->dont_fragment = true; + tnl_cfg->ttl = DEFAULT_TTL; + + ovsrcu_set(&dev->tnl_cfg, tnl_cfg); + return 0; } @@ -233,12 +246,15 @@ static void netdev_vport_destruct(struct netdev *netdev_) { struct netdev_vport *netdev = netdev_vport_cast(netdev_); + const struct netdev_tunnel_config *tnl_cfg = vport_tunnel_config(netdev); const char *type = netdev_get_type(netdev_); if (!strcmp(type, "vxlan")) { - update_vxlan_global_cfg(netdev_, &netdev->tnl_cfg, NULL); + update_vxlan_global_cfg(netdev_, tnl_cfg, NULL); } + ovsrcu_set(&netdev->tnl_cfg, NULL); + ovsrcu_postpone(free, CONST_CAST(struct netdev_tunnel_config *, tnl_cfg)); free(netdev->peer); ovs_mutex_destroy(&netdev->mutex); } @@ -281,15 +297,16 @@ static bool tunnel_check_status_change__(struct netdev_vport *netdev) OVS_REQUIRES(netdev->mutex) { + const struct netdev_tunnel_config *tnl_cfg = vport_tunnel_config(netdev); + const struct in6_addr *route; char iface[IFNAMSIZ]; bool status = false; - struct in6_addr *route; struct in6_addr gw; uint32_t mark; iface[0] = '\0'; - route = &netdev->tnl_cfg.ipv6_dst; - mark = netdev->tnl_cfg.egress_pkt_mark; + route = &tnl_cfg->ipv6_dst; + mark = tnl_cfg->egress_pkt_mark; if (ovs_router_lookup(mark, route, iface, NULL, &gw)) { struct netdev *egress_netdev; @@ -465,8 +482,8 @@ vxlan_get_port_ext_gbp_str(uint16_t port, bool gbp, static void update_vxlan_global_cfg(struct netdev *netdev, - struct netdev_tunnel_config *old_cfg, - struct netdev_tunnel_config *new_cfg) + const struct netdev_tunnel_config *old_cfg, + const struct netdev_tunnel_config *new_cfg) { unsigned int count; char namebuf[20]; @@ -510,19 +527,20 @@ static bool is_concomitant_vxlan_tunnel_present(struct netdev_vport *dev, const struct netdev_tunnel_config *tnl_cfg) { - char namebuf[20]; - const char *type = netdev_get_type(&dev->up); + const struct netdev_tunnel_config *dev_tnl_cfg = vport_tunnel_config(dev); struct vport_class *vclass = vport_class_cast(netdev_get_class(&dev->up)); + const char *type = netdev_get_type(&dev->up); + char namebuf[20]; if (strcmp(type, "vxlan")) { return false; } - if (dev->tnl_cfg.dst_port == tnl_cfg->dst_port && - (dev->tnl_cfg.exts & (1 << OVS_VXLAN_EXT_GBP)) == + if (dev_tnl_cfg->dst_port == tnl_cfg->dst_port && + (dev_tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP)) == (tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP))) { - if (ntohs(dev->tnl_cfg.dst_port) == VXLAN_DST_PORT) { + if (ntohs(dev_tnl_cfg->dst_port) == VXLAN_DST_PORT) { /* Special case where we kept the default port/gbp, only ok if the opposite of the default does not exits */ vxlan_get_port_ext_gbp_str(ntohs(tnl_cfg->dst_port), @@ -538,9 +556,9 @@ is_concomitant_vxlan_tunnel_present(struct netdev_vport *dev, } /* Same port: ok if no one is left with the previous configuration */ - if (dev->tnl_cfg.dst_port == tnl_cfg->dst_port) { - vxlan_get_port_ext_gbp_str(ntohs(dev->tnl_cfg.dst_port), - dev->tnl_cfg.exts & + if (dev_tnl_cfg->dst_port == tnl_cfg->dst_port) { + vxlan_get_port_ext_gbp_str(ntohs(dev_tnl_cfg->dst_port), + dev_tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GBP), namebuf, sizeof(namebuf)); @@ -568,6 +586,7 @@ static int set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) { struct netdev_vport *dev = netdev_vport_cast(dev_); + const struct netdev_tunnel_config *curr_tnl_cfg; const char *name = netdev_get_name(dev_); const char *type = netdev_get_type(dev_); struct ds errors = DS_EMPTY_INITIALIZER; @@ -858,11 +877,16 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) err = EEXIST; goto out; } - update_vxlan_global_cfg(dev_, &dev->tnl_cfg, &tnl_cfg); ovs_mutex_lock(&dev->mutex); - if (memcmp(&dev->tnl_cfg, &tnl_cfg, sizeof tnl_cfg)) { - dev->tnl_cfg = tnl_cfg; + + curr_tnl_cfg = vport_tunnel_config(dev); + update_vxlan_global_cfg(dev_, curr_tnl_cfg, &tnl_cfg); + + if (memcmp(curr_tnl_cfg, &tnl_cfg, sizeof tnl_cfg)) { + ovsrcu_set(&dev->tnl_cfg, xmemdup(&tnl_cfg, sizeof tnl_cfg)); + ovsrcu_postpone(free, CONST_CAST(struct netdev_tunnel_config *, + curr_tnl_cfg)); tunnel_check_status_change__(dev); netdev_change_seq_changed(dev_); } @@ -887,61 +911,60 @@ out: static int get_tunnel_config(const struct netdev *dev, struct smap *args) { - struct netdev_vport *netdev = netdev_vport_cast(dev); + const struct netdev_tunnel_config *tnl_cfg = netdev_get_tunnel_config(dev); const char *type = netdev_get_type(dev); - struct netdev_tunnel_config tnl_cfg; - ovs_mutex_lock(&netdev->mutex); - tnl_cfg = netdev->tnl_cfg; - ovs_mutex_unlock(&netdev->mutex); + if (!tnl_cfg) { + return 0; + } - if (ipv6_addr_is_set(&tnl_cfg.ipv6_dst)) { - smap_add_ipv6(args, "remote_ip", &tnl_cfg.ipv6_dst); - } else if (tnl_cfg.ip_dst_flow) { + if (ipv6_addr_is_set(&tnl_cfg->ipv6_dst)) { + smap_add_ipv6(args, "remote_ip", &tnl_cfg->ipv6_dst); + } else if (tnl_cfg->ip_dst_flow) { smap_add(args, "remote_ip", "flow"); } - if (ipv6_addr_is_set(&tnl_cfg.ipv6_src)) { - smap_add_ipv6(args, "local_ip", &tnl_cfg.ipv6_src); - } else if (tnl_cfg.ip_src_flow) { + if (ipv6_addr_is_set(&tnl_cfg->ipv6_src)) { + smap_add_ipv6(args, "local_ip", &tnl_cfg->ipv6_src); + } else if (tnl_cfg->ip_src_flow) { smap_add(args, "local_ip", "flow"); } - if (tnl_cfg.in_key_flow && tnl_cfg.out_key_flow) { + if (tnl_cfg->in_key_flow && tnl_cfg->out_key_flow) { smap_add(args, "key", "flow"); - } else if (tnl_cfg.in_key_present && tnl_cfg.out_key_present - && tnl_cfg.in_key == tnl_cfg.out_key) { - smap_add_format(args, "key", "%"PRIu64, ntohll(tnl_cfg.in_key)); + } else if (tnl_cfg->in_key_present && tnl_cfg->out_key_present + && tnl_cfg->in_key == tnl_cfg->out_key) { + smap_add_format(args, "key", "%"PRIu64, ntohll(tnl_cfg->in_key)); } else { - if (tnl_cfg.in_key_flow) { + if (tnl_cfg->in_key_flow) { smap_add(args, "in_key", "flow"); - } else if (tnl_cfg.in_key_present) { + } else if (tnl_cfg->in_key_present) { smap_add_format(args, "in_key", "%"PRIu64, - ntohll(tnl_cfg.in_key)); + ntohll(tnl_cfg->in_key)); } - if (tnl_cfg.out_key_flow) { + if (tnl_cfg->out_key_flow) { smap_add(args, "out_key", "flow"); - } else if (tnl_cfg.out_key_present) { + } else if (tnl_cfg->out_key_present) { smap_add_format(args, "out_key", "%"PRIu64, - ntohll(tnl_cfg.out_key)); + ntohll(tnl_cfg->out_key)); } } - if (tnl_cfg.ttl_inherit) { + if (tnl_cfg->ttl_inherit) { smap_add(args, "ttl", "inherit"); - } else if (tnl_cfg.ttl != DEFAULT_TTL) { - smap_add_format(args, "ttl", "%"PRIu8, tnl_cfg.ttl); + } else if (tnl_cfg->ttl != DEFAULT_TTL) { + smap_add_format(args, "ttl", "%"PRIu8, tnl_cfg->ttl); } - if (tnl_cfg.tos_inherit) { + if (tnl_cfg->tos_inherit) { smap_add(args, "tos", "inherit"); - } else if (tnl_cfg.tos) { - smap_add_format(args, "tos", "0x%x", tnl_cfg.tos); + } else if (tnl_cfg->tos) { + smap_add_format(args, "tos", "0x%x", tnl_cfg->tos); } - if (tnl_cfg.dst_port) { - uint16_t dst_port = ntohs(tnl_cfg.dst_port); + if (tnl_cfg->dst_port) { + uint16_t dst_port = ntohs(tnl_cfg->dst_port); if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) || (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) || @@ -953,33 +976,33 @@ get_tunnel_config(const struct netdev *dev, struct smap *args) } } - if (tnl_cfg.csum) { + if (tnl_cfg->csum) { smap_add(args, "csum", "true"); } - if (tnl_cfg.set_seq) { + if (tnl_cfg->set_seq) { smap_add(args, "seq", "true"); } - enum tunnel_layers layers = tunnel_supported_layers(type, &tnl_cfg); - if (tnl_cfg.pt_mode != default_pt_mode(layers)) { + enum tunnel_layers layers = tunnel_supported_layers(type, tnl_cfg); + if (tnl_cfg->pt_mode != default_pt_mode(layers)) { smap_add(args, "packet_type", - tnl_cfg.pt_mode == NETDEV_PT_LEGACY_L2 ? "legacy_l2" - : tnl_cfg.pt_mode == NETDEV_PT_LEGACY_L3 ? "legacy_l3" + tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L2 ? "legacy_l2" + : tnl_cfg->pt_mode == NETDEV_PT_LEGACY_L3 ? "legacy_l3" : "ptap"); } - if (!tnl_cfg.dont_fragment) { + if (!tnl_cfg->dont_fragment) { smap_add(args, "df_default", "false"); } - if (tnl_cfg.set_egress_pkt_mark) { + if (tnl_cfg->set_egress_pkt_mark) { smap_add_format(args, "egress_pkt_mark", - "%"PRIu32, tnl_cfg.egress_pkt_mark); + "%"PRIu32, tnl_cfg->egress_pkt_mark); } if (!strcmp("erspan", type) || !strcmp("ip6erspan", type)) { - if (tnl_cfg.erspan_ver_flow) { + if (tnl_cfg->erspan_ver_flow) { /* since version number is not determined, * assume print all other as flow */ @@ -988,27 +1011,27 @@ get_tunnel_config(const struct netdev *dev, struct smap *args) smap_add(args, "erspan_dir", "flow"); smap_add(args, "erspan_hwid", "flow"); } else { - smap_add_format(args, "erspan_ver", "%d", tnl_cfg.erspan_ver); + smap_add_format(args, "erspan_ver", "%d", tnl_cfg->erspan_ver); - if (tnl_cfg.erspan_ver == 1) { - if (tnl_cfg.erspan_idx_flow) { + if (tnl_cfg->erspan_ver == 1) { + if (tnl_cfg->erspan_idx_flow) { smap_add(args, "erspan_idx", "flow"); } else { smap_add_format(args, "erspan_idx", "0x%x", - tnl_cfg.erspan_idx); + tnl_cfg->erspan_idx); } - } else if (tnl_cfg.erspan_ver == 2) { - if (tnl_cfg.erspan_dir_flow) { + } else if (tnl_cfg->erspan_ver == 2) { + if (tnl_cfg->erspan_dir_flow) { smap_add(args, "erspan_dir", "flow"); } else { smap_add_format(args, "erspan_dir", "%d", - tnl_cfg.erspan_dir); + tnl_cfg->erspan_dir); } - if (tnl_cfg.erspan_hwid_flow) { + if (tnl_cfg->erspan_hwid_flow) { smap_add(args, "erspan_hwid", "flow"); } else { smap_add_format(args, "erspan_hwid", "0x%x", - tnl_cfg.erspan_hwid); + tnl_cfg->erspan_hwid); } } } @@ -1138,9 +1161,11 @@ netdev_vport_get_stats(const struct netdev *netdev, struct netdev_stats *stats) static enum netdev_pt_mode netdev_vport_get_pt_mode(const struct netdev *netdev) { - struct netdev_vport *dev = netdev_vport_cast(netdev); + const struct netdev_tunnel_config *tnl_cfg; + + tnl_cfg = netdev_get_tunnel_config(netdev); - return dev->tnl_cfg.pt_mode; + return tnl_cfg ? tnl_cfg->pt_mode : NETDEV_PT_UNKNOWN; } diff --git a/lib/netdev-windows.c b/lib/netdev-windows.c index 4ad45ffa1..3fad501e3 100644 --- a/lib/netdev-windows.c +++ b/lib/netdev-windows.c @@ -156,6 +156,7 @@ netdev_windows_system_construct(struct netdev *netdev_) struct netdev_windows_netdev_info info; struct ofpbuf *buf; int ret; + const char *type = NULL; /* Query the attributes and runtime status of the netdev. */ ret = query_netdev(netdev_get_name(&netdev->up), &info, &buf); @@ -167,6 +168,16 @@ netdev_windows_system_construct(struct netdev *netdev_) } ofpbuf_delete(buf); + /* Don't create netdev if ovs-type is "internal" + * but the type of netdev->up is "system". */ + type = netdev_get_type(&netdev->up); + if (type && !strcmp(type, "system") && + (info.ovs_type == OVS_VPORT_TYPE_INTERNAL)) { + VLOG_DBG("construct device %s, ovs_type: %u failed", + netdev_get_name(&netdev->up), info.ovs_type); + return 1; + } + netdev->change_seq = 1; netdev->dev_type = info.ovs_type; netdev->port_no = info.port_no; diff --git a/lib/netdev.h b/lib/netdev.h index acf174927..47c15bde7 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -72,6 +72,9 @@ struct sset; struct ovs_action_push_tnl; enum netdev_pt_mode { + /* Unknown mode. The netdev is not configured yet. */ + NETDEV_PT_UNKNOWN = 0, + /* The netdev is packet type aware. It can potentially carry any kind of * packet. This "modern" mode is appropriate for both netdevs that handle * only a single kind of packet (such as a virtual or physical Ethernet @@ -130,7 +133,6 @@ struct netdev_tunnel_config { enum netdev_pt_mode pt_mode; bool set_seq; - uint32_t seqno; uint32_t erspan_idx; uint8_t erspan_ver; uint8_t erspan_dir; diff --git a/lib/netlink-conntrack.c b/lib/netlink-conntrack.c index 4fcde9ba1..492bfcffb 100644 --- a/lib/netlink-conntrack.c +++ b/lib/netlink-conntrack.c @@ -579,7 +579,8 @@ nl_ct_put_tuple_proto(struct ofpbuf *buf, const struct ct_dpif_tuple *tuple) nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_TYPE, tuple->icmp_type); nl_msg_put_u8(buf, CTA_PROTO_ICMPV6_CODE, tuple->icmp_code); } else if (tuple->ip_proto == IPPROTO_TCP || - tuple->ip_proto == IPPROTO_UDP) { + tuple->ip_proto == IPPROTO_UDP || + tuple->ip_proto == IPPROTO_SCTP) { nl_msg_put_be16(buf, CTA_PROTO_SRC_PORT, tuple->src_port); nl_msg_put_be16(buf, CTA_PROTO_DST_PORT, tuple->dst_port); } else { diff --git a/lib/ofp-parse.c b/lib/ofp-parse.c index a90b926ef..102b183a8 100644 --- a/lib/ofp-parse.c +++ b/lib/ofp-parse.c @@ -71,16 +71,13 @@ str_to_u16(const char *str, const char *name, uint16_t *valuep) char * OVS_WARN_UNUSED_RESULT str_to_u32(const char *str, uint32_t *valuep) { - char *tail; - uint32_t value; + unsigned long long value; if (!str[0]) { return xstrdup("missing required numeric argument"); } - errno = 0; - value = strtoul(str, &tail, 0); - if (errno == EINVAL || errno == ERANGE || *tail) { + if (!str_to_ullong(str, 0, &value) || value > UINT32_MAX) { return xasprintf("invalid numeric format %s", str); } *valuep = value; diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c index 2d382f1e8..ac5d2c3d0 100644 --- a/lib/ovs-thread.c +++ b/lib/ovs-thread.c @@ -674,7 +674,7 @@ count_cpu_cores(void) static int cpu_cores; ovs_mutex_lock(&cpu_cores_mutex); - if (now - last_updated >= COUNT_CPU_UPDATE_TIME_MS) { + if (!last_updated || now - last_updated >= COUNT_CPU_UPDATE_TIME_MS) { last_updated = now; cpu_cores = count_cpu_cores__(); } diff --git a/lib/ovs.tmac b/lib/ovs.tmac index 5f8f20afa..97b6fa3df 100644 --- a/lib/ovs.tmac +++ b/lib/ovs.tmac @@ -175,7 +175,7 @@ . nr mE \\n(.f . nf . nh -. ft CW +. ft CR .. . . diff --git a/lib/smap.c b/lib/smap.c index c1633e2a1..47fb34502 100644 --- a/lib/smap.c +++ b/lib/smap.c @@ -100,7 +100,7 @@ smap_add_format(struct smap *smap, const char *key, const char *format, ...) /* Adds 'key' paired with a string representation of 'addr'. It is the * caller's responsibility to avoid duplicate keys if desirable. */ void -smap_add_ipv6(struct smap *smap, const char *key, struct in6_addr *addr) +smap_add_ipv6(struct smap *smap, const char *key, const struct in6_addr *addr) { char buf[INET6_ADDRSTRLEN]; ipv6_string_mapped(buf, addr); diff --git a/lib/smap.h b/lib/smap.h index 2fe6c540a..d1d2ae6f2 100644 --- a/lib/smap.h +++ b/lib/smap.h @@ -100,7 +100,7 @@ struct smap_node *smap_add_nocopy(struct smap *, char *, char *); bool smap_add_once(struct smap *, const char *, const char *); void smap_add_format(struct smap *, const char *key, const char *, ...) OVS_PRINTF_FORMAT(3, 4); -void smap_add_ipv6(struct smap *, const char *, struct in6_addr *); +void smap_add_ipv6(struct smap *, const char *, const struct in6_addr *); void smap_replace(struct smap *, const char *, const char *); void smap_replace_nocopy(struct smap *, const char *, char *); diff --git a/lib/stream-ssl.c b/lib/stream-ssl.c index 62da9febb..86747e58b 100644 --- a/lib/stream-ssl.c +++ b/lib/stream-ssl.c @@ -1075,7 +1075,13 @@ do_ssl_init(void) VLOG_ERR("SSL_CTX_new: %s", ERR_error_string(ERR_get_error(), NULL)); return ENOPROTOOPT; } - SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3); + + long options = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3; +#ifdef SSL_OP_IGNORE_UNEXPECTED_EOF + options |= SSL_OP_IGNORE_UNEXPECTED_EOF; +#endif + SSL_CTX_set_options(ctx, options); + #if OPENSSL_VERSION_NUMBER < 0x3000000fL SSL_CTX_set_tmp_dh_callback(ctx, tmp_dh_callback); #else diff --git a/lib/tc.c b/lib/tc.c index 4c07e2216..270dc95ce 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -36,6 +36,7 @@ #include #include "byte-order.h" +#include "coverage.h" #include "netlink-socket.h" #include "netlink.h" #include "openvswitch/ofpbuf.h" @@ -67,6 +68,8 @@ VLOG_DEFINE_THIS_MODULE(tc); +COVERAGE_DEFINE(tc_netlink_malformed_reply); + static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5); static enum tc_offload_policy tc_policy = TC_POLICY_NONE; @@ -2190,18 +2193,19 @@ int parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id, struct tc_flower *flower, bool terse) { - struct tcmsg *tc; + struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size); + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc); struct nlattr *ta[ARRAY_SIZE(tca_policy)]; const char *kind; - if (NLMSG_HDRLEN + sizeof *tc > reply->size) { + if (!nlmsg || !tc) { + COVERAGE_INC(tc_netlink_malformed_reply); return EPROTO; } memset(flower, 0, sizeof *flower); - tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc); - flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info); flower->mask.eth_type = OVS_BE16_MAX; id->prio = tc_get_major(tc->tcm_info); @@ -2215,8 +2219,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id, return EAGAIN; } - if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc, - tca_policy, ta, ARRAY_SIZE(ta))) { + if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) { VLOG_ERR_RL(&error_rl, "failed to parse tca policy"); return EPROTO; } @@ -2237,13 +2240,17 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id, int parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain) { + struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size); + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc); struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)]; - struct tcmsg *tc; - tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc); + if (!nlmsg || !tc) { + COVERAGE_INC(tc_netlink_malformed_reply); + return EPROTO; + } - if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc, - tca_chain_policy, ta, ARRAY_SIZE(ta))) { + if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) { VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy"); return EINVAL; } @@ -2307,21 +2314,27 @@ int parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[]) { static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = {}; + struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size); struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)]; + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); const int max_size = ARRAY_SIZE(actions_orders_policy); + struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca); const struct nlattr *actions; struct tc_flower flower; - struct tcamsg *tca; int i, cnt = 0; int err; + if (!nlmsg || !tca) { + COVERAGE_INC(tc_netlink_malformed_reply); + return EPROTO; + } + for (i = 0; i < max_size; i++) { actions_orders_policy[i].type = NL_A_NESTED; actions_orders_policy[i].optional = true; } - tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca); - actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB); + actions = nl_attr_find(&b, 0, TCA_ACT_TAB); if (!actions || !nl_parse_nested(actions, actions_orders_policy, actions_orders, max_size)) { VLOG_ERR_RL(&error_rl, @@ -2354,7 +2367,9 @@ tc_del_filter(struct tcf_id *id, const char *kind) struct ofpbuf request; request_from_tcf_id(id, 0, RTM_DELTFILTER, NLM_F_ACK, &request); - nl_msg_put_string(&request, TCA_KIND, kind); + if (kind) { + nl_msg_put_string(&request, TCA_KIND, kind); + } return tc_transact(&request, NULL); } @@ -3821,8 +3836,15 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower *flower) error = tc_transact(&request, &reply); if (!error) { - struct tcmsg *tc = - ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc); + struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size); + struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc); + + if (!nlmsg || !tc) { + COVERAGE_INC(tc_netlink_malformed_reply); + ofpbuf_delete(reply); + return EPROTO; + } id->prio = tc_get_major(tc->tcm_info); id->handle = tc->tcm_handle; diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 7b14cae77..b092e9e04 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -1649,6 +1649,8 @@ connmgr_send_table_status(struct connmgr *mgr, } } +COVERAGE_DEFINE(connmgr_async_unsent); + /* Given 'pin', sends an OFPT_PACKET_IN message to each OpenFlow controller as * necessary according to their individual configurations. */ void @@ -1656,6 +1658,7 @@ connmgr_send_async_msg(struct connmgr *mgr, const struct ofproto_async_msg *am) { struct ofconn *ofconn; + bool sent = false; LIST_FOR_EACH (ofconn, connmgr_node, &mgr->conns) { enum ofputil_protocol protocol = ofconn_get_protocol(ofconn); @@ -1677,6 +1680,11 @@ connmgr_send_async_msg(struct connmgr *mgr, am->pin.up.base.flow_metadata.flow.in_port.ofp_port, msg, &txq); do_send_packet_ins(ofconn, &txq); + sent = true; + } + + if (!sent) { + COVERAGE_INC(connmgr_async_unsent); } } diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c index 742eed399..f13478a88 100644 --- a/ofproto/ofproto-dpif-ipfix.c +++ b/ofproto/ofproto-dpif-ipfix.c @@ -124,11 +124,18 @@ struct dpif_ipfix_port { uint32_t ifindex; }; +struct dpif_ipfix_domain { + struct hmap_node hmap_node; /* In struct dpif_ipfix_exporter's domains. */ + time_t last_template_set_time; +}; + struct dpif_ipfix_exporter { uint32_t exporter_id; /* Exporting Process identifier */ - struct collectors *collectors; uint32_t seq_number; - time_t last_template_set_time; + struct collectors *collectors; + struct hmap domains; /* Contains struct dpif_ipfix_domain indexed by + observation domain id. */ + time_t last_stats_sent_time; struct hmap cache_flow_key_map; /* ipfix_flow_cache_entry. */ struct ovs_list cache_flow_start_timestamp_list; /* ipfix_flow_cache_entry. */ uint32_t cache_active_timeout; /* In seconds. */ @@ -617,6 +624,9 @@ static void get_export_time_now(uint64_t *, uint32_t *); static void dpif_ipfix_cache_expire_now(struct dpif_ipfix_exporter *, bool); +static void dpif_ipfix_exporter_del_domain(struct dpif_ipfix_exporter *, + struct dpif_ipfix_domain *); + static bool ofproto_ipfix_bridge_exporter_options_equal( const struct ofproto_ipfix_bridge_exporter_options *a, @@ -697,13 +707,14 @@ dpif_ipfix_exporter_init(struct dpif_ipfix_exporter *exporter) exporter->exporter_id = ++exporter_total_count; exporter->collectors = NULL; exporter->seq_number = 1; - exporter->last_template_set_time = 0; + exporter->last_stats_sent_time = 0; hmap_init(&exporter->cache_flow_key_map); ovs_list_init(&exporter->cache_flow_start_timestamp_list); exporter->cache_active_timeout = 0; exporter->cache_max_flows = 0; exporter->virtual_obs_id = NULL; exporter->virtual_obs_len = 0; + hmap_init(&exporter->domains); memset(&exporter->ipfix_global_stats, 0, sizeof(struct dpif_ipfix_global_stats)); @@ -711,6 +722,7 @@ dpif_ipfix_exporter_init(struct dpif_ipfix_exporter *exporter) static void dpif_ipfix_exporter_clear(struct dpif_ipfix_exporter *exporter) + OVS_REQUIRES(mutex) { /* Flush the cache with flow end reason "forced end." */ dpif_ipfix_cache_expire_now(exporter, true); @@ -719,22 +731,29 @@ dpif_ipfix_exporter_clear(struct dpif_ipfix_exporter *exporter) exporter->exporter_id = 0; exporter->collectors = NULL; exporter->seq_number = 1; - exporter->last_template_set_time = 0; + exporter->last_stats_sent_time = 0; exporter->cache_active_timeout = 0; exporter->cache_max_flows = 0; free(exporter->virtual_obs_id); exporter->virtual_obs_id = NULL; exporter->virtual_obs_len = 0; + struct dpif_ipfix_domain *dom; + HMAP_FOR_EACH_SAFE (dom, hmap_node, &exporter->domains) { + dpif_ipfix_exporter_del_domain(exporter, dom); + } + memset(&exporter->ipfix_global_stats, 0, sizeof(struct dpif_ipfix_global_stats)); } static void dpif_ipfix_exporter_destroy(struct dpif_ipfix_exporter *exporter) + OVS_REQUIRES(mutex) { dpif_ipfix_exporter_clear(exporter); hmap_destroy(&exporter->cache_flow_key_map); + hmap_destroy(&exporter->domains); } static bool @@ -742,7 +761,7 @@ dpif_ipfix_exporter_set_options(struct dpif_ipfix_exporter *exporter, const struct sset *targets, const uint32_t cache_active_timeout, const uint32_t cache_max_flows, - const char *virtual_obs_id) + const char *virtual_obs_id) OVS_REQUIRES(mutex) { size_t virtual_obs_len; collectors_destroy(exporter->collectors); @@ -769,6 +788,37 @@ dpif_ipfix_exporter_set_options(struct dpif_ipfix_exporter *exporter, return true; } +static struct dpif_ipfix_domain * +dpif_ipfix_exporter_find_domain(const struct dpif_ipfix_exporter *exporter, + uint32_t domain_id) OVS_REQUIRES(mutex) +{ + struct dpif_ipfix_domain *dom; + HMAP_FOR_EACH_WITH_HASH (dom, hmap_node, hash_int(domain_id, 0), + &exporter->domains) { + return dom; + } + return NULL; +} + +static struct dpif_ipfix_domain * +dpif_ipfix_exporter_insert_domain(struct dpif_ipfix_exporter *exporter, + const uint32_t domain_id) OVS_REQUIRES(mutex) +{ + struct dpif_ipfix_domain *dom = xmalloc(sizeof *dom); + dom->last_template_set_time = 0; + hmap_insert(&exporter->domains, &dom->hmap_node, hash_int(domain_id, 0)); + return dom; +} + +static void +dpif_ipfix_exporter_del_domain(struct dpif_ipfix_exporter *exporter, + struct dpif_ipfix_domain *dom) + OVS_REQUIRES(mutex) +{ + hmap_remove(&exporter->domains, &dom->hmap_node); + free(dom); +} + static struct dpif_ipfix_port * dpif_ipfix_find_port(const struct dpif_ipfix *di, odp_port_t odp_port) OVS_REQUIRES(mutex) @@ -909,6 +959,7 @@ dpif_ipfix_bridge_exporter_init(struct dpif_ipfix_bridge_exporter *exporter) static void dpif_ipfix_bridge_exporter_clear(struct dpif_ipfix_bridge_exporter *exporter) + OVS_REQUIRES(mutex) { dpif_ipfix_exporter_clear(&exporter->exporter); ofproto_ipfix_bridge_exporter_options_destroy(exporter->options); @@ -918,6 +969,7 @@ dpif_ipfix_bridge_exporter_clear(struct dpif_ipfix_bridge_exporter *exporter) static void dpif_ipfix_bridge_exporter_destroy(struct dpif_ipfix_bridge_exporter *exporter) + OVS_REQUIRES(mutex) { dpif_ipfix_bridge_exporter_clear(exporter); dpif_ipfix_exporter_destroy(&exporter->exporter); @@ -927,7 +979,7 @@ static void dpif_ipfix_bridge_exporter_set_options( struct dpif_ipfix_bridge_exporter *exporter, const struct ofproto_ipfix_bridge_exporter_options *options, - bool *options_changed) + bool *options_changed) OVS_REQUIRES(mutex) { if (!options || sset_is_empty(&options->targets)) { /* No point in doing any work if there are no targets. */ @@ -1003,6 +1055,7 @@ dpif_ipfix_flow_exporter_init(struct dpif_ipfix_flow_exporter *exporter) static void dpif_ipfix_flow_exporter_clear(struct dpif_ipfix_flow_exporter *exporter) + OVS_REQUIRES(mutex) { dpif_ipfix_exporter_clear(&exporter->exporter); ofproto_ipfix_flow_exporter_options_destroy(exporter->options); @@ -1011,6 +1064,7 @@ dpif_ipfix_flow_exporter_clear(struct dpif_ipfix_flow_exporter *exporter) static void dpif_ipfix_flow_exporter_destroy(struct dpif_ipfix_flow_exporter *exporter) + OVS_REQUIRES(mutex) { dpif_ipfix_flow_exporter_clear(exporter); dpif_ipfix_exporter_destroy(&exporter->exporter); @@ -1020,7 +1074,7 @@ static bool dpif_ipfix_flow_exporter_set_options( struct dpif_ipfix_flow_exporter *exporter, const struct ofproto_ipfix_flow_exporter_options *options, - bool *options_changed) + bool *options_changed) OVS_REQUIRES(mutex) { if (sset_is_empty(&options->targets)) { /* No point in doing any work if there are no targets. */ @@ -1071,6 +1125,7 @@ dpif_ipfix_flow_exporter_set_options( static void remove_flow_exporter(struct dpif_ipfix *di, struct dpif_ipfix_flow_exporter_map_node *node) + OVS_REQUIRES(mutex) { hmap_remove(&di->flow_exporter_map, &node->node); dpif_ipfix_flow_exporter_destroy(&node->exporter); @@ -2000,6 +2055,7 @@ static void ipfix_cache_update(struct dpif_ipfix_exporter *exporter, struct ipfix_flow_cache_entry *entry, enum ipfix_sampled_packet_type sampled_pkt_type) + OVS_REQUIRES(mutex) { struct ipfix_flow_cache_entry *old_entry; size_t current_flows = 0; @@ -2811,14 +2867,36 @@ dpif_ipfix_flow_sample(struct dpif_ipfix *di, const struct dp_packet *packet, ovs_mutex_unlock(&mutex); } +static bool +dpif_ipfix_should_send_template(struct dpif_ipfix_exporter *exporter, + const uint32_t observation_domain_id, + const uint32_t export_time_sec) + OVS_REQUIRES(mutex) +{ + struct dpif_ipfix_domain *domain; + domain = dpif_ipfix_exporter_find_domain(exporter, + observation_domain_id); + if (!domain) { + /* First time we see this obs_domain_id. */ + domain = dpif_ipfix_exporter_insert_domain(exporter, + observation_domain_id); + } + + if ((domain->last_template_set_time + IPFIX_TEMPLATE_INTERVAL) + <= export_time_sec) { + domain->last_template_set_time = export_time_sec; + return true; + } + return false; +} + static void dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter, bool forced_end, const uint64_t export_time_usec, - const uint32_t export_time_sec) + const uint32_t export_time_sec) OVS_REQUIRES(mutex) { struct ipfix_flow_cache_entry *entry; uint64_t max_flow_start_timestamp_usec; - bool template_msg_sent = false; enum ipfix_flow_end_reason flow_end_reason; if (ovs_list_is_empty(&exporter->cache_flow_start_timestamp_list)) { @@ -2844,25 +2922,28 @@ dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter, break; } - ovs_list_remove(&entry->cache_flow_start_timestamp_list_node); - hmap_remove(&exporter->cache_flow_key_map, - &entry->flow_key_map_node); + /* XXX: Make frequency of the (Options) Template and Exporter Process + * Statistics transmission configurable. + * Cf. IETF RFC 5101 Section 4.3. and 10.3.6. */ + if ((exporter->last_stats_sent_time + IPFIX_TEMPLATE_INTERVAL) + <= export_time_sec) { + exporter->last_stats_sent_time = export_time_sec; + ipfix_send_exporter_data_msg(exporter, export_time_sec); + } - /* XXX: Make frequency of the (Options) Template and Exporter Process - * Statistics transmission configurable. - * Cf. IETF RFC 5101 Section 4.3. and 10.3.6. */ - if (!template_msg_sent - && (exporter->last_template_set_time + IPFIX_TEMPLATE_INTERVAL) - <= export_time_sec) { + if (dpif_ipfix_should_send_template(exporter, + entry->flow_key.obs_domain_id, + export_time_sec)) { + VLOG_DBG("Sending templates for ObservationDomainID %"PRIu32, + entry->flow_key.obs_domain_id); ipfix_send_template_msgs(exporter, export_time_sec, entry->flow_key.obs_domain_id); - exporter->last_template_set_time = export_time_sec; - template_msg_sent = true; - - /* Send Exporter Process Statistics. */ - ipfix_send_exporter_data_msg(exporter, export_time_sec); } + ovs_list_remove(&entry->cache_flow_start_timestamp_list_node); + hmap_remove(&exporter->cache_flow_key_map, + &entry->flow_key_map_node); + /* XXX: Group multiple data records for the same obs domain id * into the same message. */ ipfix_send_data_msg(exporter, export_time_sec, entry, flow_end_reason); @@ -2883,7 +2964,7 @@ get_export_time_now(uint64_t *export_time_usec, uint32_t *export_time_sec) static void dpif_ipfix_cache_expire_now(struct dpif_ipfix_exporter *exporter, - bool forced_end) + bool forced_end) OVS_REQUIRES(mutex) { uint64_t export_time_usec; uint32_t export_time_sec; diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index e05ffe312..b44c72969 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -47,17 +47,20 @@ #define UPCALL_MAX_BATCH 64 #define REVALIDATE_MAX_BATCH 50 +#define UINT64_THREE_QUARTERS (UINT64_MAX / 4 * 3) VLOG_DEFINE_THIS_MODULE(ofproto_dpif_upcall); COVERAGE_DEFINE(dumped_duplicate_flow); COVERAGE_DEFINE(dumped_new_flow); COVERAGE_DEFINE(handler_duplicate_upcall); -COVERAGE_DEFINE(upcall_ukey_contention); -COVERAGE_DEFINE(upcall_ukey_replace); COVERAGE_DEFINE(revalidate_missed_dp_flow); +COVERAGE_DEFINE(ukey_dp_change); +COVERAGE_DEFINE(ukey_invalid_stat_reset); COVERAGE_DEFINE(upcall_flow_limit_hit); COVERAGE_DEFINE(upcall_flow_limit_kill); +COVERAGE_DEFINE(upcall_ukey_contention); +COVERAGE_DEFINE(upcall_ukey_replace); /* A thread that reads upcalls from dpif, forwards each upcall's packet, * and possibly sets up a kernel flow as a cache. */ @@ -287,6 +290,7 @@ struct udpif_key { struct ovs_mutex mutex; /* Guards the following. */ struct dpif_flow_stats stats OVS_GUARDED; /* Last known stats.*/ + const char *dp_layer OVS_GUARDED; /* Last known dp_layer. */ long long int created OVS_GUARDED; /* Estimate of creation time. */ uint64_t dump_seq OVS_GUARDED; /* Tracks udpif->dump_seq. */ uint64_t reval_seq OVS_GUARDED; /* Tracks udpif->reval_seq. */ @@ -780,6 +784,17 @@ udpif_get_n_flows(struct udpif *udpif) atomic_store_relaxed(&udpif->n_flows_timestamp, now); dpif_get_dp_stats(udpif->dpif, &stats); flow_count = stats.n_flows; + + if (!dpif_synced_dp_layers(udpif->dpif)) { + /* If the dpif layer does not sync the flows, we need to include + * the hardware offloaded flows separately. */ + uint64_t hw_flows; + + if (!dpif_get_n_offloaded_flows(udpif->dpif, &hw_flows)) { + flow_count += hw_flows; + } + } + atomic_store_relaxed(&udpif->n_flows, flow_count); ovs_mutex_unlock(&udpif->n_flows_mutex); } else { @@ -1766,6 +1781,7 @@ ukey_create__(const struct nlattr *key, size_t key_len, ukey->created = ukey->flow_time = time_msec(); memset(&ukey->stats, 0, sizeof ukey->stats); ukey->stats.used = used; + ukey->dp_layer = NULL; ukey->xcache = NULL; ukey->offloaded = false; @@ -2095,10 +2111,12 @@ ukey_delete(struct umap *umap, struct udpif_key *ukey) } static bool -should_revalidate(const struct udpif *udpif, uint64_t packets, - long long int used) +should_revalidate(const struct udpif *udpif, const struct udpif_key *ukey, + uint64_t packets) + OVS_REQUIRES(ukey->mutex) { long long int metric, now, duration; + long long int used = ukey->stats.used; if (!used) { /* Always revalidate the first time a flow is dumped. */ @@ -2125,8 +2143,12 @@ should_revalidate(const struct udpif *udpif, uint64_t packets, duration = now - used; metric = duration / packets; - if (metric < 1000 / ofproto_min_revalidate_pps) { - /* The flow is receiving more than min-revalidate-pps, so keep it. */ + if (metric < 1000 / ofproto_min_revalidate_pps || + (ukey->offloaded && duration < ofproto_offloaded_stats_delay)) { + /* The flow is receiving more than min-revalidate-pps, so keep it. + * Or it's a hardware offloaded flow that might take up to X seconds + * to update its statistics. Until we are sure the statistics had a + * chance to be updated, also keep it. */ return true; } return false; @@ -2302,6 +2324,27 @@ exit: return result; } +static void +log_unexpected_stats_jump(struct udpif_key *ukey, + const struct dpif_flow_stats *stats) + OVS_REQUIRES(ukey->mutex) +{ + static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5); + struct ds ds = DS_EMPTY_INITIALIZER; + struct ofpbuf *actions; + + odp_format_ufid(&ukey->ufid, &ds); + ds_put_cstr(&ds, ", "); + odp_flow_key_format(ukey->key, ukey->key_len, &ds); + ds_put_cstr(&ds, ", actions:"); + actions = ovsrcu_get(struct ofpbuf *, &ukey->actions); + format_odp_actions(&ds, actions->data, actions->size, NULL); + VLOG_WARN_RL(&rll, "Unexpected jump in packet stats from %"PRIu64 + " to %"PRIu64" when handling ukey %s", + ukey->stats.n_packets, stats->n_packets, ds_cstr(&ds)); + ds_destroy(&ds); +} + /* Verifies that the datapath actions of 'ukey' are still correct, and pushes * 'stats' for it. * @@ -2324,7 +2367,7 @@ static enum reval_result revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, const struct dpif_flow_stats *stats, struct ofpbuf *odp_actions, uint64_t reval_seq, - struct recirc_refs *recircs, bool offloaded) + struct recirc_refs *recircs) OVS_REQUIRES(ukey->mutex) { bool need_revalidate = ukey->reval_seq != reval_seq; @@ -2335,15 +2378,19 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, push.used = stats->used; push.tcp_flags = stats->tcp_flags; - push.n_packets = (stats->n_packets > ukey->stats.n_packets - ? stats->n_packets - ukey->stats.n_packets - : 0); - push.n_bytes = (stats->n_bytes > ukey->stats.n_bytes - ? stats->n_bytes - ukey->stats.n_bytes - : 0); + push.n_packets = stats->n_packets - ukey->stats.n_packets; + push.n_bytes = stats->n_bytes - ukey->stats.n_bytes; + + if (stats->n_packets < ukey->stats.n_packets && + ukey->stats.n_packets < UINT64_THREE_QUARTERS) { + /* Report cases where the packet counter is lower than the previous + * instance, but exclude the potential wrapping of an uint64_t. */ + COVERAGE_INC(ukey_invalid_stat_reset); + log_unexpected_stats_jump(ukey, stats); + } if (need_revalidate) { - if (should_revalidate(udpif, push.n_packets, ukey->stats.used)) { + if (should_revalidate(udpif, ukey, push.n_packets)) { if (!ukey->xcache) { ukey->xcache = xlate_cache_new(); } else { @@ -2359,7 +2406,7 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, /* Stats for deleted flows will be attributed upon flow deletion. Skip. */ if (result != UKEY_DELETE) { - xlate_push_stats(ukey->xcache, &push, offloaded); + xlate_push_stats(ukey->xcache, &push, ukey->offloaded); ukey->stats = *stats; ukey->reval_seq = reval_seq; } @@ -2455,6 +2502,15 @@ push_dp_ops(struct udpif *udpif, struct ukey_op *ops, size_t n_ops) push->tcp_flags = stats->tcp_flags | op->ukey->stats.tcp_flags; push->n_packets = stats->n_packets - op->ukey->stats.n_packets; push->n_bytes = stats->n_bytes - op->ukey->stats.n_bytes; + + if (stats->n_packets < op->ukey->stats.n_packets && + op->ukey->stats.n_packets < UINT64_THREE_QUARTERS) { + /* Report cases where the packet counter is lower than the + * previous instance, but exclude the potential wrapping of an + * uint64_t. */ + COVERAGE_INC(ukey_invalid_stat_reset); + } + ovs_mutex_unlock(&op->ukey->mutex); } else { push = stats; @@ -2759,6 +2815,22 @@ revalidate(struct revalidator *revalidator) continue; } + ukey->offloaded = f->attrs.offloaded; + if (!ukey->dp_layer + || (!dpif_synced_dp_layers(udpif->dpif) + && strcmp(ukey->dp_layer, f->attrs.dp_layer))) { + + if (ukey->dp_layer) { + /* The dp_layer has changed this is probably due to an + * earlier revalidate cycle moving it to/from hw offload. + * In this case we should reset the ukey stored statistics, + * as they are from the deleted DP flow. */ + COVERAGE_INC(ukey_dp_change); + memset(&ukey->stats, 0, sizeof ukey->stats); + } + ukey->dp_layer = f->attrs.dp_layer; + } + already_dumped = ukey->dump_seq == dump_seq; if (already_dumped) { /* The flow has already been handled during this flow dump @@ -2790,8 +2862,7 @@ revalidate(struct revalidator *revalidator) result = UKEY_DELETE; } else { result = revalidate_ukey(udpif, ukey, &stats, &odp_actions, - reval_seq, &recircs, - f->attrs.offloaded); + reval_seq, &recircs); } ukey->dump_seq = dump_seq; @@ -2876,7 +2947,7 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) COVERAGE_INC(revalidate_missed_dp_flow); memcpy(&stats, &ukey->stats, sizeof stats); result = revalidate_ukey(udpif, ukey, &stats, &odp_actions, - reval_seq, &recircs, false); + reval_seq, &recircs); } if (result != UKEY_KEEP) { /* Clears 'recircs' if filled by revalidate_ukey(). */ diff --git a/ofproto/ofproto-dpif-xlate-cache.c b/ofproto/ofproto-dpif-xlate-cache.c index 9224ee2e6..2e1fcb3a6 100644 --- a/ofproto/ofproto-dpif-xlate-cache.c +++ b/ofproto/ofproto-dpif-xlate-cache.c @@ -125,7 +125,7 @@ xlate_push_stats_entry(struct xc_entry *entry, case XC_LEARN: { enum ofperr error; error = ofproto_flow_mod_learn(entry->learn.ofm, true, - entry->learn.limit, NULL); + entry->learn.limit, NULL, stats->used); if (error) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "xcache LEARN action execution failed."); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index a9cf3cbee..18ff9e5a4 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -66,6 +66,7 @@ #include "tunnel.h" #include "util.h" #include "uuid.h" +#include "vlan-bitmap.h" COVERAGE_DEFINE(xlate_actions); COVERAGE_DEFINE(xlate_actions_oversize); @@ -500,6 +501,84 @@ ctx_cancel_freeze(struct xlate_ctx *ctx) static void finish_freezing(struct xlate_ctx *ctx); +/* These functions and structure are used to save stack space in actions that + * need to retain a large amount of xlate_ctx state. */ +struct xretained_state { + union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)]; + uint64_t actset_stub[1024 / 8]; + struct ofpbuf old_stack; + struct ofpbuf old_action_set; + struct flow old_flow; + struct flow old_base; + struct flow_tnl flow_tnl_mask; +}; + +/* The return of this function must be freed by + * xretain_state_restore_and_free(). */ +static struct xretained_state * +xretain_state_save(struct xlate_ctx *ctx) +{ + struct xretained_state *retained = xmalloc(sizeof *retained); + + retained->old_flow = ctx->xin->flow; + retained->old_stack = ctx->stack; + retained->old_action_set = ctx->action_set; + ofpbuf_use_stub(&ctx->stack, retained->new_stack, + sizeof retained->new_stack); + ofpbuf_use_stub(&ctx->action_set, retained->actset_stub, + sizeof retained->actset_stub); + + return retained; +} + +static void +xretain_tunnel_mask_save(const struct xlate_ctx *ctx, + struct xretained_state *retained) +{ + retained->flow_tnl_mask = ctx->wc->masks.tunnel; +} + +static void +xretain_base_flow_save(const struct xlate_ctx *ctx, + struct xretained_state *retained) +{ + retained->old_base = ctx->base_flow; +} + +static void +xretain_base_flow_restore(struct xlate_ctx *ctx, + const struct xretained_state *retained) +{ + ctx->base_flow = retained->old_base; +} + +static void +xretain_flow_restore(struct xlate_ctx *ctx, + const struct xretained_state *retained) +{ + ctx->xin->flow = retained->old_flow; +} + +static void +xretain_tunnel_mask_restore(struct xlate_ctx *ctx, + const struct xretained_state *retained) +{ + ctx->wc->masks.tunnel = retained->flow_tnl_mask; +} + +static void +xretain_state_restore_and_free(struct xlate_ctx *ctx, + struct xretained_state *retained) +{ + ctx->xin->flow = retained->old_flow; + ofpbuf_uninit(&ctx->action_set); + ctx->action_set = retained->old_action_set; + ofpbuf_uninit(&ctx->stack); + ctx->stack = retained->old_stack; + + free(retained); +} + /* A controller may use OFPP_NONE as the ingress port to indicate that * it did not arrive on a "real" port. 'ofpp_none_bundle' exists for * when an input bundle is needed for validation (e.g., mirroring or @@ -1028,7 +1107,10 @@ xlate_xbundle_set(struct xbundle *xbundle, xbundle->qinq_ethtype = qinq_ethtype; xbundle->vlan = vlan; xbundle->trunks = trunks; - xbundle->cvlans = cvlans; + if (!vlan_bitmap_equal(xbundle->cvlans, cvlans)) { + free(xbundle->cvlans); + xbundle->cvlans = vlan_bitmap_clone(cvlans); + } xbundle->use_priority_tags = use_priority_tags; xbundle->floodable = floodable; xbundle->protected = protected; @@ -1380,6 +1462,7 @@ xlate_xbundle_remove(struct xlate_cfg *xcfg, struct xbundle *xbundle) ovs_list_remove(&xbundle->list_node); bond_unref(xbundle->bond); lacp_unref(xbundle->lacp); + free(xbundle->cvlans); free(xbundle->name); free(xbundle); } @@ -1532,7 +1615,8 @@ xlate_lookup_ofproto_(const struct dpif_backer *backer, } ofp_port_t in_port = recirc_id_node->state.metadata.in_port; - if (in_port != OFPP_NONE && in_port != OFPP_CONTROLLER) { + if (in_port != OFPP_NONE && in_port != OFPP_CONTROLLER && + !uuid_is_zero(&recirc_id_node->state.xport_uuid)) { struct uuid xport_uuid = recirc_id_node->state.xport_uuid; xport = xport_lookup_by_uuid(xcfg, &xport_uuid); if (xport && xport->xbridge && xport->xbridge->ofproto) { @@ -1543,11 +1627,19 @@ xlate_lookup_ofproto_(const struct dpif_backer *backer, * that the packet originated from the controller via an OpenFlow * "packet-out". The right thing to do is to find just the * ofproto. There is no xport, which is OK. + * Also a zeroed xport_uuid with a valid in_port, means that + * the packet originated from OFPP_CONTROLLER passed + * through a patch port. * * OFPP_NONE can also indicate that a bond caused recirculation. */ struct uuid uuid = recirc_id_node->state.ofproto_uuid; const struct xbridge *bridge = xbridge_lookup_by_uuid(xcfg, &uuid); + if (bridge && bridge->ofproto) { + if (in_port != OFPP_CONTROLLER && in_port != OFPP_NONE && + !get_ofp_port(bridge, in_port)) { + goto xport_lookup; + } if (errorp) { *errorp = NULL; } @@ -1560,6 +1652,7 @@ xlate_lookup_ofproto_(const struct dpif_backer *backer, } } +xport_lookup: xport = xport_lookup(xcfg, tnl_port_should_receive(flow) ? tnl_port_receive(flow) : odp_port_to_ofport(backer, flow->in_port.odp_port)); @@ -3906,20 +3999,17 @@ static void patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, struct xport *out_dev, bool is_last_action) { + bool old_was_mpls = ctx->was_mpls; struct flow *flow = &ctx->xin->flow; - struct flow old_flow = ctx->xin->flow; - struct flow_tnl old_flow_tnl_wc = ctx->wc->masks.tunnel; bool old_conntrack = ctx->conntracked; - bool old_was_mpls = ctx->was_mpls; - ovs_version_t old_version = ctx->xin->tables_version; - struct ofpbuf old_stack = ctx->stack; - uint8_t new_stack[1024]; - struct ofpbuf old_action_set = ctx->action_set; + struct xretained_state *retained_state; struct ovs_list *old_trace = ctx->xin->trace; - uint64_t actset_stub[1024 / 8]; + ovs_version_t old_version = ctx->xin->tables_version; + + retained_state = xretain_state_save(ctx); + + xretain_tunnel_mask_save(ctx, retained_state); - ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack); - ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub); flow->in_port.ofp_port = out_dev->ofp_port; flow->metadata = htonll(0); memset(&flow->tunnel, 0, sizeof flow->tunnel); @@ -3958,14 +4048,15 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, } else { /* Forwarding is disabled by STP and RSTP. Let OFPP_NORMAL and * the learning action look at the packet, then drop it. */ - struct flow old_base_flow = ctx->base_flow; size_t old_size = ctx->odp_actions->size; + + xretain_base_flow_save(ctx, retained_state); mirror_mask_t old_mirrors2 = ctx->mirrors; xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true, false, is_last_action, clone_xlate_actions); ctx->mirrors = old_mirrors2; - ctx->base_flow = old_base_flow; + xretain_base_flow_restore(ctx, retained_state); ctx->odp_actions->size = old_size; /* Undo changes that may have been done for freezing. */ @@ -3977,18 +4068,15 @@ patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev, if (independent_mirrors) { ctx->mirrors = old_mirrors; } - ctx->xin->flow = old_flow; ctx->xbridge = in_dev->xbridge; - ofpbuf_uninit(&ctx->action_set); - ctx->action_set = old_action_set; - ofpbuf_uninit(&ctx->stack); - ctx->stack = old_stack; /* Restore calling bridge's lookup version. */ ctx->xin->tables_version = old_version; - /* Restore to calling bridge tunneling information */ - ctx->wc->masks.tunnel = old_flow_tnl_wc; + /* Restore to calling bridge tunneling information; the ctx flow, actions, + * and stack. And free the retained state. */ + xretain_tunnel_mask_restore(ctx, retained_state); + xretain_state_restore_and_free(ctx, retained_state); /* The out bridge popping MPLS should have no effect on the original * bridge. */ @@ -4238,7 +4326,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port); struct flow_wildcards *wc = ctx->wc; struct flow *flow = &ctx->xin->flow; - struct flow_tnl flow_tnl; + struct flow_tnl *flow_tnl = NULL; union flow_vlan_hdr flow_vlans[FLOW_MAX_VLAN_HEADERS]; uint8_t flow_nw_tos; odp_port_t out_port, odp_port, odp_tnl_port; @@ -4252,7 +4340,6 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); - memset(&flow_tnl, 0, sizeof flow_tnl); if (!check_output_prerequisites(ctx, xport, flow, check_stp)) { return; @@ -4296,7 +4383,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, * the Logical (tunnel) Port are not visible for any further * matches, while explicit set actions on tunnel metadata are. */ - flow_tnl = flow->tunnel; + flow_tnl = xmemdup(&flow->tunnel, sizeof *flow_tnl); odp_port = tnl_port_send(xport->ofport, flow, ctx->wc); if (odp_port == ODPP_NONE) { xlate_report(ctx, OFT_WARN, "Tunneling decided against output"); @@ -4327,7 +4414,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, tnl_type = tnl_port_get_type(xport->ofport); commit_odp_tunnel_action(flow, &ctx->base_flow, ctx->odp_actions, tnl_type); - flow->tunnel = flow_tnl; /* Restore tunnel metadata */ + flow->tunnel = *flow_tnl; /* Restore tunnel metadata. */ } } else { odp_port = xport->odp_port; @@ -4371,7 +4458,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* Output to native tunnel port. */ native_tunnel_output(ctx, xport, flow, odp_port, truncate, is_last_action); - flow->tunnel = flow_tnl; /* Restore tunnel metadata */ + ovs_assert(flow_tnl); + flow->tunnel = *flow_tnl; /* Restore tunnel metadata. */ } else if (terminate_native_tunnel(ctx, xport, flow, wc, &odp_tnl_port)) { @@ -4414,7 +4502,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, xport->xbundle)); } - out: +out: /* Restore flow */ memcpy(flow->vlans, flow_vlans, sizeof flow->vlans); flow->nw_tos = flow_nw_tos; @@ -4422,6 +4510,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, flow->dl_src = flow_dl_src; flow->packet_type = flow_packet_type; flow->dl_type = flow_dl_type; + free(flow_tnl); } static void @@ -5211,6 +5300,7 @@ compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids) } ctx->wc->masks.nw_ttl = 0xff; + WC_MASK_FIELD(ctx->wc, nw_proto); if (flow->nw_ttl > 1) { flow->nw_ttl--; return false; @@ -5399,15 +5489,15 @@ xlate_output_reg_action(struct xlate_ctx *ctx, { uint64_t port = mf_get_subfield(&or->src, &ctx->xin->flow); if (port <= UINT16_MAX) { - xlate_report(ctx, OFT_DETAIL, "output port is %"PRIu64, port); - - union mf_subvalue value; + union mf_subvalue *value = xmalloc(sizeof *value); - memset(&value, 0xff, sizeof value); - mf_write_subfield_flow(&or->src, &value, &ctx->wc->masks); + xlate_report(ctx, OFT_DETAIL, "output port is %"PRIu64, port); + memset(value, 0xff, sizeof *value); + mf_write_subfield_flow(&or->src, value, &ctx->wc->masks); xlate_output_action(ctx, u16_to_ofp(port), or->max_len, false, is_last_action, false, group_bucket_action); + free(value); } else { xlate_report(ctx, OFT_WARN, "output port %"PRIu64" is out of range", port); @@ -5616,8 +5706,16 @@ xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn) if (!error) { bool success = true; if (ctx->xin->allow_side_effects) { + long long int last_used; + + if (ctx->xin->resubmit_stats) { + last_used = ctx->xin->resubmit_stats->used; + } else { + last_used = time_msec(); + } error = ofproto_flow_mod_learn(ofm, ctx->xin->xcache != NULL, - learn->limit, &success); + learn->limit, &success, + last_used); } else if (learn->limit) { if (!ofm->temp_rule || ofm->temp_rule->state != RULE_INSERTED) { @@ -5748,13 +5846,15 @@ xlate_sample_action(struct xlate_ctx *ctx, struct flow *flow = &ctx->xin->flow; tnl_port_send(xport->ofport, flow, ctx->wc); if (!ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) { - struct flow_tnl flow_tnl = flow->tunnel; + struct flow_tnl *flow_tnl; const char *tnl_type; + flow_tnl = xmemdup(&flow->tunnel, sizeof *flow_tnl); tnl_type = tnl_port_get_type(xport->ofport); commit_odp_tunnel_action(flow, &ctx->base_flow, ctx->odp_actions, tnl_type); - flow->tunnel = flow_tnl; + flow->tunnel = *flow_tnl; + free(flow_tnl); } } else { xlate_report_error(ctx, @@ -5864,21 +5964,12 @@ clone_xlate_actions(const struct ofpact *actions, size_t actions_len, struct xlate_ctx *ctx, bool is_last_action, bool group_bucket_action OVS_UNUSED) { - struct ofpbuf old_stack = ctx->stack; - union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)]; - ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack); - ofpbuf_put(&ctx->stack, old_stack.data, old_stack.size); - - struct ofpbuf old_action_set = ctx->action_set; - uint64_t actset_stub[1024 / 8]; - ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub); - ofpbuf_put(&ctx->action_set, old_action_set.data, old_action_set.size); - + struct xretained_state *retained_state; size_t offset, ac_offset; - struct flow old_flow = ctx->xin->flow; + + retained_state = xretain_state_save(ctx); if (reversible_actions(actions, actions_len) || is_last_action) { - old_flow = ctx->xin->flow; do_xlate_actions(actions, actions_len, ctx, is_last_action, false); if (!ctx->freezing) { xlate_action_set(ctx); @@ -5893,7 +5984,8 @@ clone_xlate_actions(const struct ofpact *actions, size_t actions_len, * avoid emitting those actions twice. Once inside * the clone, another time for the action after clone. */ xlate_commit_actions(ctx); - struct flow old_base = ctx->base_flow; + xretain_base_flow_save(ctx, retained_state); + bool old_was_mpls = ctx->was_mpls; bool old_conntracked = ctx->conntracked; @@ -5950,14 +6042,10 @@ dp_clone_done: ctx->was_mpls = old_was_mpls; /* Restore the 'base_flow' for the next action. */ - ctx->base_flow = old_base; + xretain_base_flow_restore(ctx, retained_state); xlate_done: - ofpbuf_uninit(&ctx->action_set); - ctx->action_set = old_action_set; - ofpbuf_uninit(&ctx->stack); - ctx->stack = old_stack; - ctx->xin->flow = old_flow; + xretain_state_restore_and_free(ctx, retained_state); } static void @@ -6333,8 +6421,8 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, { uint16_t zone; if (ofc->zone_src.field) { - union mf_subvalue value; - memset(&value, 0xff, sizeof(value)); + union mf_subvalue *value = xmalloc(sizeof *value); + memset(value, 0xff, sizeof *value); zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow); if (ctx->xin->frozen_state) { @@ -6344,12 +6432,13 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, * which will invalidate the megaflow with old the recirc_id. */ if (!mf_is_frozen_metadata(ofc->zone_src.field)) { - mf_write_subfield_flow(&ofc->zone_src, &value, + mf_write_subfield_flow(&ofc->zone_src, value, &ctx->wc->masks); } } else { - mf_write_subfield_flow(&ofc->zone_src, &value, &ctx->wc->masks); + mf_write_subfield_flow(&ofc->zone_src, value, &ctx->wc->masks); } + free(value); } else { zone = ofc->zone_imm; } @@ -6439,16 +6528,16 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, const struct ofpact *remaining_acts, size_t remaining_acts_len) { - union mf_subvalue value; - memset(&value, 0, sizeof value); + union mf_subvalue *value = xmalloc(sizeof *value); + memset(value, 0, sizeof *value); if (!ctx->xbridge->support.check_pkt_len) { uint8_t is_pkt_larger = 0; if (ctx->xin->packet) { is_pkt_larger = dp_packet_size(ctx->xin->packet) > check_pkt_larger->pkt_len; } - value.u8_val = is_pkt_larger; - mf_write_subfield_flow(&check_pkt_larger->dst, &value, + value->u8_val = is_pkt_larger; + mf_write_subfield_flow(&check_pkt_larger->dst, value, &ctx->xin->flow); /* If datapath doesn't support check_pkt_len action, then set the * SLOW_ACTION flag. If we don't set SLOW_ACTION, we @@ -6458,22 +6547,17 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, * the packet length. This results in wrong actions being applied. */ ctx->xout->slow |= SLOW_ACTION; + free(value); return; } - struct ofpbuf old_stack = ctx->stack; - union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)]; - ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack); - ofpbuf_put(&ctx->stack, old_stack.data, old_stack.size); + struct xretained_state *retained_state; - struct ofpbuf old_action_set = ctx->action_set; - uint64_t actset_stub[1024 / 8]; - ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub); - ofpbuf_put(&ctx->action_set, old_action_set.data, old_action_set.size); + retained_state = xretain_state_save(ctx); - struct flow old_flow = ctx->xin->flow; xlate_commit_actions(ctx); - struct flow old_base = ctx->base_flow; + xretain_base_flow_save(ctx, retained_state); + bool old_was_mpls = ctx->was_mpls; bool old_conntracked = ctx->conntracked; @@ -6483,8 +6567,8 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, check_pkt_larger->pkt_len); size_t offset_attr = nl_msg_start_nested( ctx->odp_actions, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); - value.u8_val = 1; - mf_write_subfield_flow(&check_pkt_larger->dst, &value, &ctx->xin->flow); + value->u8_val = 1; + mf_write_subfield_flow(&check_pkt_larger->dst, value, &ctx->xin->flow); do_xlate_actions(remaining_acts, remaining_acts_len, ctx, true, false); if (!ctx->freezing) { xlate_action_set(ctx); @@ -6494,10 +6578,10 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, } nl_msg_end_nested(ctx->odp_actions, offset_attr); - ctx->base_flow = old_base; + xretain_base_flow_restore(ctx, retained_state); + xretain_flow_restore(ctx, retained_state); ctx->was_mpls = old_was_mpls; ctx->conntracked = old_conntracked; - ctx->xin->flow = old_flow; /* If the flow translation for the IF_GREATER case requires freezing, * then ctx->exit would be true. Reset to false so that we can @@ -6508,8 +6592,8 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, offset_attr = nl_msg_start_nested( ctx->odp_actions, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); - value.u8_val = 0; - mf_write_subfield_flow(&check_pkt_larger->dst, &value, &ctx->xin->flow); + value->u8_val = 0; + mf_write_subfield_flow(&check_pkt_larger->dst, value, &ctx->xin->flow); do_xlate_actions(remaining_acts, remaining_acts_len, ctx, true, false); if (!ctx->freezing) { xlate_action_set(ctx); @@ -6520,15 +6604,12 @@ xlate_check_pkt_larger(struct xlate_ctx *ctx, nl_msg_end_nested(ctx->odp_actions, offset_attr); nl_msg_end_nested(ctx->odp_actions, offset); - ofpbuf_uninit(&ctx->action_set); - ctx->action_set = old_action_set; - ofpbuf_uninit(&ctx->stack); - ctx->stack = old_stack; - ctx->base_flow = old_base; ctx->was_mpls = old_was_mpls; ctx->conntracked = old_conntracked; - ctx->xin->flow = old_flow; ctx->exit = old_exit; + xretain_base_flow_restore(ctx, retained_state); + xretain_state_restore_and_free(ctx, retained_state); + free(value); } static void @@ -6979,6 +7060,132 @@ xlate_ofpact_unroll_xlate(struct xlate_ctx *ctx, "cookie=%#"PRIx64, a->rule_table_id, a->rule_cookie); } +/* Reset the mirror context if we modify the packet and would like to mirror + * the new copy. */ +static void +reset_mirror_ctx(struct xlate_ctx *ctx, const struct flow *flow, + const struct ofpact *a) +{ + switch (a->type) { + case OFPACT_STRIP_VLAN: + case OFPACT_PUSH_VLAN: + case OFPACT_SET_ETH_SRC: + case OFPACT_SET_ETH_DST: + case OFPACT_PUSH_MPLS: + case OFPACT_POP_MPLS: + case OFPACT_SET_MPLS_LABEL: + case OFPACT_SET_MPLS_TC: + case OFPACT_SET_MPLS_TTL: + case OFPACT_DEC_MPLS_TTL: + case OFPACT_DEC_NSH_TTL: + case OFPACT_DEC_TTL: + case OFPACT_SET_VLAN_VID: + case OFPACT_SET_VLAN_PCP: + case OFPACT_ENCAP: + case OFPACT_DECAP: + case OFPACT_NAT: + ctx->mirrors = 0; + return; + + case OFPACT_SET_FIELD: { + const struct ofpact_set_field *set_field; + const struct mf_field *mf; + + set_field = ofpact_get_SET_FIELD(a); + mf = set_field->field; + if (mf_are_prereqs_ok(mf, flow, NULL)) { + ctx->mirrors = 0; + } + return; + } + + case OFPACT_SET_IPV4_SRC: + case OFPACT_SET_IPV4_DST: + if (flow->dl_type == htons(ETH_TYPE_IP)) { + ctx->mirrors = 0; + } + return; + + case OFPACT_SET_IP_DSCP: + case OFPACT_SET_IP_ECN: + case OFPACT_SET_IP_TTL: + if (is_ip_any(flow)) { + ctx->mirrors = 0; + } + return; + + case OFPACT_SET_L4_SRC_PORT: + case OFPACT_SET_L4_DST_PORT: + if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) { + ctx->mirrors = 0; + } + return; + + case OFPACT_OUTPUT_REG: + case OFPACT_OUTPUT_TRUNC: + case OFPACT_GROUP: + case OFPACT_OUTPUT: + case OFPACT_CONTROLLER: + case OFPACT_RESUBMIT: + case OFPACT_GOTO_TABLE: + case OFPACT_WRITE_METADATA: + case OFPACT_SET_TUNNEL: + case OFPACT_REG_MOVE: + case OFPACT_STACK_PUSH: + case OFPACT_STACK_POP: + case OFPACT_LEARN: + case OFPACT_ENQUEUE: + case OFPACT_SET_QUEUE: + case OFPACT_POP_QUEUE: + case OFPACT_MULTIPATH: + case OFPACT_BUNDLE: + case OFPACT_EXIT: + case OFPACT_UNROLL_XLATE: + case OFPACT_FIN_TIMEOUT: + case OFPACT_CLEAR_ACTIONS: + case OFPACT_WRITE_ACTIONS: + case OFPACT_METER: + case OFPACT_SAMPLE: + case OFPACT_CLONE: + case OFPACT_DEBUG_RECIRC: + case OFPACT_DEBUG_SLOW: + case OFPACT_CT: + case OFPACT_CT_CLEAR: + case OFPACT_CHECK_PKT_LARGER: + case OFPACT_DELETE_FIELD: + case OFPACT_NOTE: + case OFPACT_CONJUNCTION: + return; + } + + OVS_NOT_REACHED(); +} + +static void +xlate_trace(struct xlate_ctx *ctx, const struct ofpact *a) +{ + struct ofputil_port_map *map; + + map = xmalloc(sizeof *map); + ofputil_port_map_init(map); + + if (ctx->xin->names) { + struct ofproto_dpif *ofprotop; + + ofprotop = ofproto_dpif_lookup_by_name(ctx->xbridge->name); + ofproto_append_ports_to_map(map, ofprotop->up.ports); + } + + struct ds s = DS_EMPTY_INITIALIZER; + struct ofpact_format_params fp = { .s = &s, .port_map = map }; + + ofpacts_format(a, OFPACT_ALIGN(a->len), &fp); + xlate_report(ctx, OFT_ACTION, "%s", ds_cstr(&s)); + ds_destroy(&s); + ofputil_port_map_destroy(map); + free(map); +} + static void do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, struct xlate_ctx *ctx, bool is_last_action, @@ -7020,21 +7227,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, break; } - if (OVS_UNLIKELY(ctx->xin->trace)) { - struct ofputil_port_map map = OFPUTIL_PORT_MAP_INITIALIZER(&map); + reset_mirror_ctx(ctx, flow, a); - if (ctx->xin->names) { - struct ofproto_dpif *ofprotop; - ofprotop = ofproto_dpif_lookup_by_name(ctx->xbridge->name); - ofproto_append_ports_to_map(&map, ofprotop->up.ports); - } - - struct ds s = DS_EMPTY_INITIALIZER; - struct ofpact_format_params fp = { .s = &s, .port_map = &map }; - ofpacts_format(a, OFPACT_ALIGN(a->len), &fp); - xlate_report(ctx, OFT_ACTION, "%s", ds_cstr(&s)); - ds_destroy(&s); - ofputil_port_map_destroy(&map); + if (OVS_UNLIKELY(ctx->xin->trace)) { + xlate_trace(ctx, a); } switch (a->type) { @@ -7128,6 +7324,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_SET_IPV4_SRC: if (flow->dl_type == htons(ETH_TYPE_IP)) { memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src); + WC_MASK_FIELD(wc, nw_proto); flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4; } break; @@ -7135,12 +7332,14 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_SET_IPV4_DST: if (flow->dl_type == htons(ETH_TYPE_IP)) { memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst); + WC_MASK_FIELD(wc, nw_proto); flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4; } break; case OFPACT_SET_IP_DSCP: if (is_ip_any(flow)) { + WC_MASK_FIELD(wc, nw_proto); wc->masks.nw_tos |= IP_DSCP_MASK; flow->nw_tos &= ~IP_DSCP_MASK; flow->nw_tos |= ofpact_get_SET_IP_DSCP(a)->dscp; @@ -7149,6 +7348,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_SET_IP_ECN: if (is_ip_any(flow)) { + WC_MASK_FIELD(wc, nw_proto); wc->masks.nw_tos |= IP_ECN_MASK; flow->nw_tos &= ~IP_ECN_MASK; flow->nw_tos |= ofpact_get_SET_IP_ECN(a)->ecn; @@ -7157,6 +7357,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_SET_IP_TTL: if (is_ip_any(flow)) { + WC_MASK_FIELD(wc, nw_proto); wc->masks.nw_ttl = 0xff; flow->nw_ttl = ofpact_get_SET_IP_TTL(a)->ttl; } @@ -7224,6 +7425,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, /* Set the field only if the packet actually has it. */ if (mf_are_prereqs_ok(mf, flow, wc)) { + mf_set_mask_l3_prereqs(mf, flow, wc); mf_mask_field_masked(mf, ofpact_set_field_mask(set_field), wc); mf_set_flow_value_masked(mf, set_field->value, ofpact_set_field_mask(set_field), @@ -7280,6 +7482,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_DEC_TTL: wc->masks.nw_ttl = 0xff; + WC_MASK_FIELD(wc, nw_proto); if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) { return; } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index f87e27a8c..ba5706f6a 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -714,12 +714,6 @@ close_dpif_backer(struct dpif_backer *backer, bool del) free(backer); } -/* Datapath port slated for removal from datapath. */ -struct odp_garbage { - struct ovs_list list_node; - odp_port_t odp_port; -}; - static void check_support(struct dpif_backer *backer); static int @@ -729,8 +723,6 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) struct dpif_port_dump port_dump; struct dpif_port port; struct shash_node *node; - struct ovs_list garbage_list; - struct odp_garbage *garbage; struct sset names; char *backer_name; @@ -792,25 +784,23 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) dpif_flow_flush(backer->dpif); } - /* Loop through the ports already on the datapath and remove any - * that we don't need anymore. */ - ovs_list_init(&garbage_list); + /* Loop through the ports already on the datapath and find ones that are + * not on the initial OpenFlow ports list. These are stale ports, that we + * do not need anymore, or tunnel backing interfaces, that do not generally + * match the name of OpenFlow tunnel ports, or both. Add all of them to + * the list of tunnel backers. type_run() will garbage collect those that + * are not active tunnel backing interfaces during revalidation. */ dpif_port_dump_start(&port_dump, backer->dpif); while (dpif_port_dump_next(&port_dump, &port)) { node = shash_find(&init_ofp_ports, port.name); if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) { - garbage = xmalloc(sizeof *garbage); - garbage->odp_port = port.port_no; - ovs_list_push_front(&garbage_list, &garbage->list_node); + simap_put(&backer->tnl_backers, port.name, + odp_to_u32(port.port_no)); + backer->need_revalidate = REV_RECONFIGURE; } } dpif_port_dump_done(&port_dump); - LIST_FOR_EACH_POP (garbage, list_node, &garbage_list) { - dpif_port_del(backer->dpif, garbage->odp_port, false); - free(garbage); - } - shash_add(&all_dpif_backers, type, backer); check_support(backer); @@ -2171,8 +2161,7 @@ port_destruct(struct ofport *port_, bool del) struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); const char *devname = netdev_get_name(port->up.netdev); const char *netdev_type = netdev_get_type(port->up.netdev); - char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; - const char *dp_port_name; + struct dpif_port dpif_port; ofproto->backer->need_revalidate = REV_RECONFIGURE; xlate_txn_start(); @@ -2186,9 +2175,13 @@ port_destruct(struct ofport *port_, bool del) del = dpif_cleanup_required(ofproto->backer->dpif); } - dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf, - sizeof namebuf); - if (del && dpif_port_exists(ofproto->backer->dpif, dp_port_name)) { + /* Don't try to delete ports that are not part of the datapath. */ + if (del && port->odp_port == ODPP_NONE) { + del = false; + } + + if (del && !dpif_port_query_by_number(ofproto->backer->dpif, + port->odp_port, &dpif_port, false)) { /* The underlying device is still there, so delete it. This * happens when the ofproto is being destroyed, since the caller * assumes that removal of attached ports will happen as part of @@ -2196,6 +2189,7 @@ port_destruct(struct ofport *port_, bool del) if (!port->is_tunnel) { dpif_port_del(ofproto->backer->dpif, port->odp_port, false); } + dpif_port_destroy(&dpif_port); } else if (del) { /* The underlying device is already deleted (e.g. tunctl -d). * Calling dpif_port_remove to do local cleanup for the netdev */ @@ -4886,7 +4880,7 @@ packet_xlate(struct ofproto *ofproto_, struct ofproto_packet_out *opo) if (entry->type == XC_LEARN) { struct ofproto_flow_mod *ofm = entry->learn.ofm; - error = ofproto_flow_mod_learn_refresh(ofm); + error = ofproto_flow_mod_learn_refresh(ofm, time_msec()); if (error) { goto error_out; } diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index a84ddc1d0..9f7b8b6e8 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -541,6 +541,11 @@ extern unsigned ofproto_max_revalidator; * duration exceeds half of max-revalidator config variable. */ extern unsigned ofproto_min_revalidate_pps; +/* Worst case delay (in ms) it might take before statistics of offloaded flows + * are updated. Offloaded flows younger than this delay will always be + * revalidated regardless of ofproto_min_revalidate_pps. */ +extern unsigned ofproto_offloaded_stats_delay; + /* Number of upcall handler and revalidator threads. Only affects the * ofproto-dpif implementation. */ extern uint32_t n_handlers, n_revalidators; @@ -2022,9 +2027,11 @@ enum ofperr ofproto_flow_mod_init_for_learn(struct ofproto *, struct ofproto_flow_mod *) OVS_EXCLUDED(ofproto_mutex); enum ofperr ofproto_flow_mod_learn(struct ofproto_flow_mod *, bool keep_ref, - unsigned limit, bool *below_limit) + unsigned limit, bool *below_limit, + long long int last_used) OVS_EXCLUDED(ofproto_mutex); -enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm); +enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm, + long long int last_used); enum ofperr ofproto_flow_mod_learn_start(struct ofproto_flow_mod *ofm) OVS_REQUIRES(ofproto_mutex); void ofproto_flow_mod_learn_revert(struct ofproto_flow_mod *ofm) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 17f636ed9..9a2f7f801 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -311,6 +311,7 @@ unsigned ofproto_flow_limit = OFPROTO_FLOW_LIMIT_DEFAULT; unsigned ofproto_max_idle = OFPROTO_MAX_IDLE_DEFAULT; unsigned ofproto_max_revalidator = OFPROTO_MAX_REVALIDATOR_DEFAULT; unsigned ofproto_min_revalidate_pps = OFPROTO_MIN_REVALIDATE_PPS_DEFAULT; +unsigned ofproto_offloaded_stats_delay = OFPROTO_OFFLOADED_STATS_DELAY; uint32_t n_handlers, n_revalidators; @@ -727,6 +728,15 @@ ofproto_set_min_revalidate_pps(unsigned min_revalidate_pps) ofproto_min_revalidate_pps = min_revalidate_pps ? min_revalidate_pps : 1; } +/* Set worst case delay (in ms) it might take before statistics of offloaded + * flows are updated. Offloaded flows younger than this delay will always be + * revalidated regardless of ofproto_min_revalidate_pps. */ +void +ofproto_set_offloaded_stats_delay(unsigned offloaded_stats_delay) +{ + ofproto_offloaded_stats_delay = offloaded_stats_delay; +} + /* If forward_bpdu is true, the NORMAL action will forward frames with * reserved (e.g. STP) destination Ethernet addresses. if forward_bpdu is false, * the NORMAL action will drop these frames. */ @@ -5456,7 +5466,8 @@ ofproto_flow_mod_init_for_learn(struct ofproto *ofproto, } enum ofperr -ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm) +ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm, + long long int last_used) { enum ofperr error = 0; @@ -5477,9 +5488,37 @@ ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm) * this function is executed the rule will be reinstated. */ if (rule->state == RULE_REMOVED) { struct cls_rule cr; + struct oftable *table = &rule->ofproto->tables[rule->table_id]; + ovs_version_t tables_version = rule->ofproto->tables_version; + + if (!cls_rule_visible_in_version(&rule->cr, tables_version)) { + const struct cls_rule *curr_cls_rule; + + /* Only check for matching classifier rules and their modified + * time, instead of also checking all rule metadata, with the goal + * of suppressing a learn action update that would replace a more + * recent rule in the classifier. */ + curr_cls_rule = classifier_find_rule_exactly(&table->cls, + &rule->cr, + tables_version); + if (curr_cls_rule) { + struct rule *curr_rule = rule_from_cls_rule(curr_cls_rule); + long long int curr_last_used; + + ovs_mutex_lock(&curr_rule->mutex); + curr_last_used = curr_rule->modified; + ovs_mutex_unlock(&curr_rule->mutex); + + if (curr_last_used > last_used) { + /* In the case of a newer visible rule, don't recreate the + * current rule. */ + return 0; + } + } + } - cls_rule_clone(&cr, &rule->cr); ovs_mutex_lock(&rule->mutex); + cls_rule_clone(&cr, &rule->cr); error = ofproto_rule_create(rule->ofproto, &cr, rule->table_id, rule->flow_cookie, rule->idle_timeout, @@ -5490,6 +5529,7 @@ ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm) rule->match_tlv_bitmap, rule->ofpacts_tlv_bitmap, &ofm->temp_rule); + ofm->temp_rule->modified = last_used; ovs_mutex_unlock(&rule->mutex); if (!error) { ofproto_rule_unref(rule); /* Release old reference. */ @@ -5497,7 +5537,7 @@ ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm) } else { /* Refresh the existing rule. */ ovs_mutex_lock(&rule->mutex); - rule->modified = time_msec(); + rule->modified = last_used; ovs_mutex_unlock(&rule->mutex); } return error; @@ -5549,10 +5589,16 @@ ofproto_flow_mod_learn_finish(struct ofproto_flow_mod *ofm, /* Refresh 'ofm->temp_rule', for which the caller holds a reference, if already * in the classifier, insert it otherwise. If the rule has already been - * removed from the classifier, a new rule is created using 'ofm->temp_rule' as - * a template and the reference to the old 'ofm->temp_rule' is freed. If - * 'keep_ref' is true, then a reference to the current rule is held, otherwise - * it is released and 'ofm->temp_rule' is set to NULL. + * removed from the classifier and replaced by another rule, the 'last_used' + * parameter is used to determine whether the newer rule is replaced or kept. + * If 'last_used' is greater than the last modified time of an identical rule + * in the classifier, then a new rule is created using 'ofm->temp_rule' as a + * template and the reference to the old 'ofm->temp_rule' is freed. If the + * rule has been removed but another identical rule doesn't exist in the + * classifier, then it will be recreated. If the rule hasn't been removed + * from the classifier, then 'last_used' is used to update the rules modified + * time. If 'keep_ref' is true, then a reference to the current rule is held, + * otherwise it is released and 'ofm->temp_rule' is set to NULL. * * If 'limit' != 0, insertion will fail if there are more than 'limit' rules * in the same table with the same cookie. If insertion succeeds, @@ -5563,10 +5609,11 @@ ofproto_flow_mod_learn_finish(struct ofproto_flow_mod *ofm, * during the call. */ enum ofperr ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, bool keep_ref, - unsigned limit, bool *below_limitp) + unsigned limit, bool *below_limitp, + long long int last_used) OVS_EXCLUDED(ofproto_mutex) { - enum ofperr error = ofproto_flow_mod_learn_refresh(ofm); + enum ofperr error = ofproto_flow_mod_learn_refresh(ofm, last_used); struct rule *rule = ofm->temp_rule; bool below_limit = true; @@ -5599,6 +5646,11 @@ ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, bool keep_ref, error = ofproto_flow_mod_learn_start(ofm); if (!error) { + /* ofproto_flow_mod_learn_start may have overwritten + * modified with current time. */ + ovs_mutex_lock(&ofm->temp_rule->mutex); + ofm->temp_rule->modified = last_used; + ovs_mutex_unlock(&ofm->temp_rule->mutex); error = ofproto_flow_mod_learn_finish(ofm, NULL); } } else { diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 4e15167ab..fa7973ac7 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -311,6 +311,7 @@ int ofproto_port_dump_done(struct ofproto_port_dump *); #define OFPROTO_MAX_IDLE_DEFAULT 10000 /* ms */ #define OFPROTO_MAX_REVALIDATOR_DEFAULT 500 /* ms */ #define OFPROTO_MIN_REVALIDATE_PPS_DEFAULT 5 +#define OFPROTO_OFFLOADED_STATS_DELAY 2000 /* ms */ const char *ofproto_port_open_type(const struct ofproto *, const char *port_type); @@ -340,6 +341,7 @@ void ofproto_set_flow_limit(unsigned limit); void ofproto_set_max_idle(unsigned max_idle); void ofproto_set_max_revalidator(unsigned max_revalidator); void ofproto_set_min_revalidate_pps(unsigned min_revalidate_pps); +void ofproto_set_offloaded_stats_delay(unsigned offloaded_stats_delay); void ofproto_set_forward_bpdu(struct ofproto *, bool forward_bpdu); void ofproto_set_mac_table_config(struct ofproto *, unsigned idle_time, size_t max_entries); diff --git a/ovsdb/execution.c b/ovsdb/execution.c index f9b8067d0..5587ef96f 100644 --- a/ovsdb/execution.c +++ b/ovsdb/execution.c @@ -320,7 +320,7 @@ parse_row(const struct json *json, const struct ovsdb_table *table, } row = ovsdb_row_create(table); - error = ovsdb_row_from_json(row, json, symtab, columns); + error = ovsdb_row_from_json(row, json, symtab, columns, false); if (error) { ovsdb_row_destroy(row); return error; @@ -764,7 +764,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, row = ovsdb_row_create(table); error = ovsdb_row_from_json(row, rows->array.elems[i], x->symtab, - NULL); + NULL, false); if (error) { ovsdb_row_destroy(row); break; diff --git a/ovsdb/file.c b/ovsdb/file.c index fdc289ad1..a3273229b 100644 --- a/ovsdb/file.c +++ b/ovsdb/file.c @@ -107,13 +107,18 @@ ovsdb_file_update_row_from_json(struct ovsdb_row *row, bool converting, column_name, schema->name); } - error = ovsdb_datum_from_json(&datum, &column->type, node->data, NULL); + if (row_contains_diff) { + /* Diff may violate the type size rules. */ + error = ovsdb_transient_datum_from_json(&datum, &column->type, + node->data); + } else { + error = ovsdb_datum_from_json(&datum, &column->type, + node->data, NULL); + } if (error) { return error; } - if (row_contains_diff - && !ovsdb_datum_is_default(&row->fields[column->index], - &column->type)) { + if (row_contains_diff) { error = ovsdb_datum_apply_diff_in_place( &row->fields[column->index], &datum, &column->type); @@ -154,8 +159,7 @@ ovsdb_file_txn_row_from_json(struct ovsdb_txn *txn, struct ovsdb_table *table, new = ovsdb_row_create(table); *ovsdb_row_get_uuid_rw(new) = *row_uuid; - error = ovsdb_file_update_row_from_json(new, converting, - row_contains_diff, json); + error = ovsdb_file_update_row_from_json(new, converting, false, json); if (error) { ovsdb_row_destroy(new); } else { diff --git a/ovsdb/log.c b/ovsdb/log.c index e42f00246..fff7c6ba1 100644 --- a/ovsdb/log.c +++ b/ovsdb/log.c @@ -552,6 +552,23 @@ ovsdb_log_truncate(struct ovsdb_log *file) return error; } +/* Removes all the data from the log by moving current offset to zero and + * truncating the file to zero bytes. After this operation the file is empty + * and in a write state. */ +struct ovsdb_error * OVS_WARN_UNUSED_RESULT +ovsdb_log_reset(struct ovsdb_log *file) +{ + ovsdb_error_destroy(file->error); + file->offset = file->prev_offset = 0; + file->error = ovsdb_log_truncate(file); + if (file->error) { + file->state = OVSDB_LOG_WRITE_ERROR; + return ovsdb_error_clone(file->error); + } + file->state = OVSDB_LOG_WRITE; + return NULL; +} + /* Composes a log record for 'json' by filling 'header' with a header line and * 'data' with a data line (each ending with a new-line). To write the record * to a file, write 'header' followed by 'data'. diff --git a/ovsdb/log.h b/ovsdb/log.h index 90714ea13..63e5681a0 100644 --- a/ovsdb/log.h +++ b/ovsdb/log.h @@ -66,6 +66,9 @@ struct ovsdb_error *ovsdb_log_read(struct ovsdb_log *, struct json **) OVS_WARN_UNUSED_RESULT; void ovsdb_log_unread(struct ovsdb_log *); +struct ovsdb_error *ovsdb_log_reset(struct ovsdb_log *) + OVS_WARN_UNUSED_RESULT; + void ovsdb_log_compose_record(const struct json *, const char *magic, struct ds *header, struct ds *data); diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 191befcae..bf5d083cc 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -474,6 +474,7 @@ ovsdb_monitor_add_column(struct ovsdb_monitor *dbmon, enum ovsdb_monitor_selection select, bool monitored) { + struct ovsdb_monitor_change_set *mcs; struct ovsdb_monitor_table *mt; struct ovsdb_monitor_column *c; @@ -484,6 +485,18 @@ ovsdb_monitor_add_column(struct ovsdb_monitor *dbmon, return column->name; } + mcs = dbmon->init_change_set; + if (mcs) { + /* A new column is going to be added to the monitor. Existing + * initial change set doesn't have it, so can no longer be used. + * Initial change set is never used by more than one session at + * the same time, so it's safe to destroy it here. */ + ovs_assert(mcs->n_refs == 1); + ovsdb_monitor_json_cache_destroy(dbmon, mcs); + ovsdb_monitor_change_set_destroy(mcs); + dbmon->init_change_set = NULL; + } + if (mt->n_columns >= mt->allocated_columns) { mt->columns = x2nrealloc(mt->columns, &mt->allocated_columns, sizeof *mt->columns); @@ -609,7 +622,10 @@ ovsdb_monitor_untrack_change_set(struct ovsdb_monitor *dbmon, ovs_assert(mcs); if (--mcs->n_refs == 0) { if (mcs == dbmon->init_change_set) { - dbmon->init_change_set = NULL; + /* The initial change set should exist as long as the + * monitor doesn't change. */ + mcs->n_refs++; + return; } else if (mcs == dbmon->new_change_set) { dbmon->new_change_set = NULL; } diff --git a/ovsdb/ovsdb-doc b/ovsdb/ovsdb-doc index 10d0c0c13..099770d25 100755 --- a/ovsdb/ovsdb-doc +++ b/ovsdb/ovsdb-doc @@ -24,7 +24,7 @@ import ovs.json from ovs.db import error import ovs.db.schema -from build.nroff import * +from ovs_build_helpers.nroff import * argv0 = sys.argv[0] diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index 33ca4910d..b7b4d1559 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -233,7 +233,7 @@ main_loop(struct server_config *config, SHASH_FOR_EACH_SAFE (node, all_dbs) { struct db *db = node->data; - ovsdb_txn_history_run(db->db); + ovsdb_storage_run(db->db->storage); read_db(config, db); /* Run triggers after storage_run and read_db to make sure new raft @@ -573,8 +573,9 @@ close_db(struct server_config *config, struct db *db, char *comment) } } -static void -update_schema(struct ovsdb *db, const struct ovsdb_schema *schema, void *aux) +static struct ovsdb_error * OVS_WARN_UNUSED_RESULT +update_schema(struct ovsdb *db, const struct ovsdb_schema *schema, + bool conversion_with_no_data, void *aux) { struct server_config *config = aux; @@ -586,13 +587,27 @@ update_schema(struct ovsdb *db, const struct ovsdb_schema *schema, void *aux) : xasprintf("database %s connected to storage", db->name))); } - ovsdb_replace(db, ovsdb_create(ovsdb_schema_clone(schema), NULL)); + if (db->schema && conversion_with_no_data) { + struct ovsdb *new_db = NULL; + struct ovsdb_error *error; + + error = ovsdb_convert(db, schema, &new_db); + if (error) { + /* Should never happen, because conversion should have been + * checked before writing the schema to the storage. */ + return error; + } + ovsdb_replace(db, new_db); + } else { + ovsdb_replace(db, ovsdb_create(ovsdb_schema_clone(schema), NULL)); + } /* Force update to schema in _Server database. */ struct db *dbp = shash_find_data(config->all_dbs, db->name); if (dbp) { dbp->row_uuid = UUID_ZERO; } + return NULL; } static struct ovsdb_error * OVS_WARN_UNUSED_RESULT @@ -600,23 +615,30 @@ parse_txn(struct server_config *config, struct db *db, const struct ovsdb_schema *schema, const struct json *txn_json, const struct uuid *txnid) { + struct ovsdb_error *error = NULL; + struct ovsdb_txn *txn = NULL; + if (schema) { - /* We're replacing the schema (and the data). Destroy the database - * (first grabbing its storage), then replace it with the new schema. - * The transaction must also include the replacement data. + /* We're replacing the schema (and the data). If transaction includes + * replacement data, destroy the database (first grabbing its storage), + * then replace it with the new schema. If not, it's a conversion + * without data specified. In this case, convert the current database + * to a new schema instead. * * Only clustered database schema changes and snapshot installs * go through this path. */ - ovs_assert(txn_json); ovs_assert(ovsdb_storage_is_clustered(db->db->storage)); - struct ovsdb_error *error = ovsdb_schema_check_for_ephemeral_columns( - schema); + error = ovsdb_schema_check_for_ephemeral_columns(schema); + if (error) { + return error; + } + + error = update_schema(db->db, schema, txn_json == NULL, config); if (error) { return error; } - update_schema(db->db, schema, config); } if (txn_json) { @@ -624,24 +646,26 @@ parse_txn(struct server_config *config, struct db *db, return ovsdb_error(NULL, "%s: data without schema", db->filename); } - struct ovsdb_txn *txn; - struct ovsdb_error *error; - error = ovsdb_file_txn_from_json(db->db, txn_json, false, &txn); - if (!error) { - ovsdb_txn_set_txnid(txnid, txn); - log_and_free_error(ovsdb_txn_replay_commit(txn)); - } - if (!error && !uuid_is_zero(txnid)) { - db->db->prereq = *txnid; - } if (error) { ovsdb_storage_unread(db->db->storage); return error; } + } else if (schema) { + /* We just performed conversion without data. Transaction history + * was destroyed. Commit a dummy transaction to set the txnid. */ + txn = ovsdb_txn_create(db->db); } - return NULL; + if (txn) { + ovsdb_txn_set_txnid(txnid, txn); + error = ovsdb_txn_replay_commit(txn); + if (!error && !uuid_is_zero(txnid)) { + db->db->prereq = *txnid; + } + ovsdb_txn_history_run(db->db); + } + return error; } static void diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index 60f353197..facd680ff 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -1006,7 +1006,8 @@ raft_header_to_standalone_log(const struct raft_header *h, } static void -raft_record_to_standalone_log(const struct raft_record *r, +raft_record_to_standalone_log(const char *db_file_name, + const struct raft_record *r, struct ovsdb_log *db_log_data) { if (r->type == RAFT_REC_ENTRY) { @@ -1018,7 +1019,40 @@ raft_record_to_standalone_log(const struct raft_record *r, if (pa->n != 2) { ovs_fatal(0, "Incorrect raft record array length"); } + + struct json *schema_json = pa->elems[0]; struct json *data_json = pa->elems[1]; + + if (schema_json->type != JSON_NULL) { + /* This is a database conversion record. Reset the log and + * write the new schema. */ + struct ovsdb_schema *schema; + + check_ovsdb_error(ovsdb_schema_from_json(schema_json, &schema)); + + if (data_json->type == JSON_NULL) { + /* We have a conversion request with no data. There is no + * other way as to read back what we have and convert. */ + struct ovsdb *old_db, *new_db; + + check_ovsdb_error(ovsdb_log_commit_block(db_log_data)); + + old_db = ovsdb_file_read(db_file_name, false); + check_ovsdb_error(ovsdb_convert(old_db, schema, &new_db)); + ovsdb_destroy(old_db); + + pa->elems[1] = ovsdb_to_txn_json( + new_db, "converted by ovsdb-tool", true); + ovsdb_destroy(new_db); + + json_destroy(data_json); + data_json = pa->elems[1]; + } + + ovsdb_schema_destroy(schema); + check_ovsdb_error(ovsdb_log_reset(db_log_data)); + check_ovsdb_error(ovsdb_log_write(db_log_data, schema_json)); + } if (data_json->type != JSON_NULL) { check_ovsdb_error(ovsdb_log_write(db_log_data, data_json)); } @@ -1060,6 +1094,7 @@ do_show_log_cluster(struct ovsdb_log *log) free(s); } + json_destroy(json); putchar('\n'); } @@ -1636,7 +1671,8 @@ do_compare_versions(struct ovs_cmdl_context *ctx) } static void -do_convert_to_standalone(struct ovsdb_log *log, struct ovsdb_log *db_log_data) +do_convert_to_standalone(const char *db_file_name, + struct ovsdb_log *log, struct ovsdb_log *db_log_data) { for (unsigned int i = 0; ; i++) { struct json *json; @@ -1653,7 +1689,7 @@ do_convert_to_standalone(struct ovsdb_log *log, struct ovsdb_log *db_log_data) } else { struct raft_record r; check_ovsdb_error(raft_record_from_json(&r, json)); - raft_record_to_standalone_log(&r, db_log_data); + raft_record_to_standalone_log(db_file_name, &r, db_log_data); raft_record_uninit(&r); } json_destroy(json); @@ -1676,7 +1712,7 @@ do_cluster_standalone(struct ovs_cmdl_context *ctx) if (strcmp(ovsdb_log_get_magic(log), RAFT_MAGIC) != 0) { ovs_fatal(0, "Database is not clustered db.\n"); } - do_convert_to_standalone(log, db_log_data); + do_convert_to_standalone(db_file_name, log, db_log_data); check_ovsdb_error(ovsdb_log_commit_block(db_log_data)); ovsdb_log_close(db_log_data); ovsdb_log_close(log); diff --git a/ovsdb/relay.c b/ovsdb/relay.c index 9ff6ed8f3..5a2b4b3b6 100644 --- a/ovsdb/relay.c +++ b/ovsdb/relay.c @@ -301,6 +301,8 @@ static void ovsdb_relay_parse_update(struct relay_ctx *ctx, const struct ovsdb_cs_update_event *update) { + struct ovsdb_error *error = NULL; + if (!ctx->db) { return; } @@ -308,15 +310,27 @@ ovsdb_relay_parse_update(struct relay_ctx *ctx, if (update->monitor_reply && ctx->new_schema) { /* There was a schema change. Updating a database with a new schema * before processing monitor reply with the new data. */ - ctx->schema_change_cb(ctx->db, ctx->new_schema, - ctx->schema_change_aux); + error = ctx->schema_change_cb(ctx->db, ctx->new_schema, false, + ctx->schema_change_aux); + if (error) { + /* Should never happen, but handle this case anyway. */ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + char *s = ovsdb_error_to_string_free(error); + + VLOG_ERR_RL(&rl, "%s", s); + free(s); + + ovsdb_cs_flag_inconsistency(ctx->cs); + return; + } ovsdb_schema_destroy(ctx->new_schema); ctx->new_schema = NULL; } struct ovsdb_cs_db_update *du; - struct ovsdb_error *error = ovsdb_cs_parse_db_update(update->table_updates, - update->version, &du); + + error = ovsdb_cs_parse_db_update(update->table_updates, + update->version, &du); if (!error) { if (update->clear) { error = ovsdb_relay_clear(ctx->db); @@ -386,6 +400,7 @@ ovsdb_relay_run(void) } ovsdb_cs_event_destroy(event); } + ovsdb_txn_history_run(ctx->db); } } diff --git a/ovsdb/relay.h b/ovsdb/relay.h index 390ea70c8..2d66b5e5f 100644 --- a/ovsdb/relay.h +++ b/ovsdb/relay.h @@ -23,8 +23,11 @@ struct json; struct ovsdb; struct ovsdb_schema; -typedef void (*schema_change_callback)(struct ovsdb *, - const struct ovsdb_schema *, void *aux); +typedef struct ovsdb_error *(*schema_change_callback)( + struct ovsdb *, + const struct ovsdb_schema *, + bool conversion_with_no_data, + void *aux); void ovsdb_relay_add_db(struct ovsdb *, const char *remote, schema_change_callback schema_change_cb, diff --git a/ovsdb/row.c b/ovsdb/row.c index d7bfbdd36..2b52b6816 100644 --- a/ovsdb/row.c +++ b/ovsdb/row.c @@ -302,12 +302,14 @@ ovsdb_row_columns_to_string(const struct ovsdb_row *row, struct ovsdb_error * ovsdb_row_from_json(struct ovsdb_row *row, const struct json *json, struct ovsdb_symbol_table *symtab, - struct ovsdb_column_set *included) + struct ovsdb_column_set *included, bool is_diff) { struct ovsdb_table_schema *schema = row->table->schema; struct ovsdb_error *error; struct shash_node *node; + ovs_assert(!is_diff || !symtab); + if (json->type != JSON_OBJECT) { return ovsdb_syntax_error(json, NULL, "row must be JSON object"); } @@ -324,8 +326,13 @@ ovsdb_row_from_json(struct ovsdb_row *row, const struct json *json, column_name, schema->name); } - error = ovsdb_datum_from_json(&datum, &column->type, node->data, - symtab); + if (is_diff) { + error = ovsdb_transient_datum_from_json(&datum, &column->type, + node->data); + } else { + error = ovsdb_datum_from_json(&datum, &column->type, node->data, + symtab); + } if (error) { return error; } diff --git a/ovsdb/row.h b/ovsdb/row.h index ff91288fe..59f498a20 100644 --- a/ovsdb/row.h +++ b/ovsdb/row.h @@ -114,7 +114,8 @@ void ovsdb_row_columns_to_string(const struct ovsdb_row *, struct ovsdb_error *ovsdb_row_from_json(struct ovsdb_row *, const struct json *, struct ovsdb_symbol_table *, - struct ovsdb_column_set *included) + struct ovsdb_column_set *included, + bool is_diff) OVS_WARN_UNUSED_RESULT; struct json *ovsdb_row_to_json(const struct ovsdb_row *, const struct ovsdb_column_set *include); diff --git a/ovsdb/storage.c b/ovsdb/storage.c index e8f95ce64..6c395106c 100644 --- a/ovsdb/storage.c +++ b/ovsdb/storage.c @@ -623,7 +623,7 @@ ovsdb_storage_store_snapshot(struct ovsdb_storage *storage, struct ovsdb_write * OVS_WARN_UNUSED_RESULT ovsdb_storage_write_schema_change(struct ovsdb_storage *storage, - const struct json *schema, + const struct ovsdb_schema *schema, const struct json *data, const struct uuid *prereq, struct uuid *resultp) @@ -633,13 +633,23 @@ ovsdb_storage_write_schema_change(struct ovsdb_storage *storage, if (storage->error) { w->error = ovsdb_error_clone(storage->error); } else if (storage->raft) { - struct json *txn_json = json_array_create_2(json_clone(schema), - json_clone(data)); - w->command = raft_command_execute(storage->raft, txn_json, - prereq, &result); - json_destroy(txn_json); + /* Clustered storage doesn't support ephemeral columns. */ + w->error = ovsdb_schema_check_for_ephemeral_columns(schema); + if (!w->error) { + struct json *schema_json, *txn_json; + + schema_json = ovsdb_schema_to_json(schema); + txn_json = json_array_create_2(schema_json, json_clone(data)); + w->command = raft_command_execute(storage->raft, txn_json, + prereq, &result); + json_destroy(txn_json); + } } else if (storage->log) { - w->error = ovsdb_storage_store_snapshot__(storage, schema, data, 0); + struct json *schema_json = ovsdb_schema_to_json(schema); + + w->error = ovsdb_storage_store_snapshot__(storage, schema_json, + data, 0); + json_destroy(schema_json); } else { /* When 'error' and 'command' are both null, it indicates that the * command is complete. This is fine since this unbacked storage drops diff --git a/ovsdb/storage.h b/ovsdb/storage.h index a1fdaa564..05f40ce93 100644 --- a/ovsdb/storage.h +++ b/ovsdb/storage.h @@ -85,7 +85,7 @@ struct ovsdb_error *ovsdb_storage_store_snapshot(struct ovsdb_storage *storage, struct ovsdb_write *ovsdb_storage_write_schema_change( struct ovsdb_storage *, - const struct json *schema, const struct json *data, + const struct ovsdb_schema *, const struct json *data, const struct uuid *prereq, struct uuid *result) OVS_WARN_UNUSED_RESULT; diff --git a/ovsdb/table.c b/ovsdb/table.c index 66071ce2f..0792e1580 100644 --- a/ovsdb/table.c +++ b/ovsdb/table.c @@ -368,7 +368,8 @@ ovsdb_table_execute_insert(struct ovsdb_txn *txn, const struct uuid *row_uuid, struct ovsdb_row *row = ovsdb_row_create(table); - struct ovsdb_error *error = ovsdb_row_from_json(row, json_row, NULL, NULL); + struct ovsdb_error *error = ovsdb_row_from_json(row, json_row, + NULL, NULL, false); if (!error) { *ovsdb_row_get_uuid_rw(row) = *row_uuid; ovsdb_txn_row_insert(txn, row); @@ -411,7 +412,7 @@ ovsdb_table_execute_update(struct ovsdb_txn *txn, const struct uuid *row_uuid, struct ovsdb_column_set columns = OVSDB_COLUMN_SET_INITIALIZER; struct ovsdb_row *update = ovsdb_row_create(table); struct ovsdb_error *error = ovsdb_row_from_json(update, json_row, - NULL, &columns); + NULL, &columns, xor); if (!error && (xor || !ovsdb_row_equal_columns(row, update, &columns))) { error = ovsdb_row_update_columns(ovsdb_txn_row_modify(txn, row), diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c index 03541af85..f01de2a34 100644 --- a/ovsdb/transaction.c +++ b/ovsdb/transaction.c @@ -1251,7 +1251,7 @@ ovsdb_txn_precheck_prereq(const struct ovsdb *db) struct ovsdb_txn_progress * ovsdb_txn_propose_schema_change(struct ovsdb *db, - const struct json *schema, + const struct ovsdb_schema *schema, const struct json *data) { struct ovsdb_txn_progress *progress = xzalloc(sizeof *progress); diff --git a/ovsdb/transaction.h b/ovsdb/transaction.h index 6b5bb7f24..9991f34d2 100644 --- a/ovsdb/transaction.h +++ b/ovsdb/transaction.h @@ -21,6 +21,7 @@ struct json; struct ovsdb; +struct ovsdb_schema; struct ovsdb_table; struct uuid; @@ -41,7 +42,7 @@ struct ovsdb_error *ovsdb_txn_propose_commit_block(struct ovsdb_txn *, void ovsdb_txn_complete(struct ovsdb_txn *); struct ovsdb_txn_progress *ovsdb_txn_propose_schema_change( - struct ovsdb *, const struct json *schema, const struct json *data); + struct ovsdb *, const struct ovsdb_schema *, const struct json *data); bool ovsdb_txn_progress_is_complete(const struct ovsdb_txn_progress *); const struct ovsdb_error *ovsdb_txn_progress_get_error( diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c index 01bb80e28..3c93ae580 100644 --- a/ovsdb/trigger.c +++ b/ovsdb/trigger.c @@ -274,8 +274,8 @@ ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now) if (!error) { error = ovsdb_convert(t->db, new_schema, &newdb); } - ovsdb_schema_destroy(new_schema); if (error) { + ovsdb_schema_destroy(new_schema); trigger_convert_error(t, error); return false; } @@ -286,7 +286,8 @@ ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now) /* Propose the change. */ t->progress = ovsdb_txn_propose_schema_change( - t->db, new_schema_json, txn_json); + t->db, new_schema, txn_json); + ovsdb_schema_destroy(new_schema); json_destroy(txn_json); t->reply = jsonrpc_create_reply(json_object_create(), t->request->id); diff --git a/python/automake.mk b/python/automake.mk index d00911828..b0f444169 100644 --- a/python/automake.mk +++ b/python/automake.mk @@ -64,10 +64,10 @@ ovs_pytests = \ # These python files are used at build time but not runtime, # so they are not installed. EXTRA_DIST += \ - python/build/__init__.py \ - python/build/extract_ofp_fields.py \ - python/build/nroff.py \ - python/build/soutil.py + python/ovs_build_helpers/__init__.py \ + python/ovs_build_helpers/extract_ofp_fields.py \ + python/ovs_build_helpers/nroff.py \ + python/ovs_build_helpers/soutil.py # PyPI support. EXTRA_DIST += \ @@ -86,10 +86,10 @@ PYCOV_CLEAN_FILES += $(PYFILES:.py=.py,cover) FLAKE8_PYFILES += \ $(filter-out python/ovs/compat/% python/ovs/dirs.py,$(PYFILES)) \ - python/build/__init__.py \ - python/build/extract_ofp_fields.py \ - python/build/nroff.py \ - python/build/soutil.py \ + python/ovs_build_helpers/__init__.py \ + python/ovs_build_helpers/extract_ofp_fields.py \ + python/ovs_build_helpers/nroff.py \ + python/ovs_build_helpers/soutil.py \ python/ovs/dirs.py.template \ python/setup.py @@ -110,11 +110,14 @@ ovs-install-data-local: $(INSTALL_DATA) python/ovs/dirs.py.tmp $(DESTDIR)$(pkgdatadir)/python/ovs/dirs.py rm python/ovs/dirs.py.tmp +.PHONY: python-sdist python-sdist: $(srcdir)/python/ovs/version.py $(ovs_pyfiles) python/ovs/dirs.py - (cd python/ && $(PYTHON3) setup.py sdist) + cd python/ && $(PYTHON3) -m build --sdist + +.PHONY: pypi-upload +pypi-upload: python-sdist + twine upload python/dist/ovs-$(VERSION).tar.gz -pypi-upload: $(srcdir)/python/ovs/version.py $(ovs_pyfiles) python/ovs/dirs.py - (cd python/ && $(PYTHON3) setup.py sdist upload) install-data-local: ovs-install-data-local UNINSTALL_LOCAL += ovs-uninstall-local diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py index 9fc2159b0..16ece0334 100644 --- a/python/ovs/db/idl.py +++ b/python/ovs/db/idl.py @@ -494,6 +494,7 @@ class Idl(object): if not msg.result[0]: self.__clear() self.__parse_update(msg.result[2], OVSDB_UPDATE3) + self.last_id = msg.result[1] elif self.state == self.IDL_S_DATA_MONITOR_COND_REQUESTED: self.__clear() self.__parse_update(msg.result, OVSDB_UPDATE2) diff --git a/python/ovs/stream.py b/python/ovs/stream.py index ac5b0fd0c..b32341076 100644 --- a/python/ovs/stream.py +++ b/python/ovs/stream.py @@ -824,7 +824,8 @@ class SSLStream(Stream): self.socket.do_handshake() except ssl.SSLWantReadError: return errno.EAGAIN - except ssl.SSLSyscallError as e: + except (ssl.SSLSyscallError, ssl.SSLZeroReturnError, + ssl.SSLEOFError, OSError) as e: return ovs.socket_util.get_exception_errno(e) return 0 diff --git a/python/build/__init__.py b/python/ovs_build_helpers/__init__.py similarity index 100% rename from python/build/__init__.py rename to python/ovs_build_helpers/__init__.py diff --git a/python/build/extract_ofp_fields.py b/python/ovs_build_helpers/extract_ofp_fields.py similarity index 100% rename from python/build/extract_ofp_fields.py rename to python/ovs_build_helpers/extract_ofp_fields.py diff --git a/python/build/nroff.py b/python/ovs_build_helpers/nroff.py similarity index 100% rename from python/build/nroff.py rename to python/ovs_build_helpers/nroff.py diff --git a/python/build/soutil.py b/python/ovs_build_helpers/soutil.py similarity index 100% rename from python/build/soutil.py rename to python/ovs_build_helpers/soutil.py diff --git a/tests/.gitignore b/tests/.gitignore index 83b1cb3b4..3a8c45975 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -3,6 +3,7 @@ /Makefile.in /atconfig /atlocal +/clang-analyzer-results/ /idltest.c /idltest.h /idltest.ovsidl diff --git a/tests/automake.mk b/tests/automake.mk index c8de3fe28..e39453cd1 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -110,7 +110,8 @@ TESTSUITE_AT = \ tests/mcast-snooping.at \ tests/packet-type-aware.at \ tests/nsh.at \ - tests/drop-stats.at + tests/drop-stats.at \ + tests/learning-switch.at EXTRA_DIST += $(FUZZ_REGRESSION_TESTS) FUZZ_REGRESSION_TESTS = \ diff --git a/tests/classifier.at b/tests/classifier.at index f652b5983..de2705653 100644 --- a/tests/classifier.at +++ b/tests/classifier.at @@ -65,6 +65,94 @@ Datapath actions: 2 OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([flow classifier - lookup segmentation - final stage]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 3 +AT_DATA([flows.txt], [dnl +table=0 in_port=1 priority=33,tcp,tp_dst=80,tcp_flags=+psh,action=output(2) +table=0 in_port=1 priority=0,ip,action=drop +table=0 in_port=2 priority=16,icmp6,nw_ttl=255,icmp_type=135,icmp_code=0,nd_target=1000::1 ,action=output(1) +table=0 in_port=2 priority=0,ip,action=drop +table=0 in_port=3 action=resubmit(,1) +table=1 in_port=3 priority=45,ct_state=+trk+rpl,ct_nw_proto=6,ct_tp_src=3/0x1,tcp,tp_dst=80,tcp_flags=+psh,action=output(2) +table=1 in_port=3 priority=10,ip,action=drop +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80,tcp_flags=syn'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=80,tcp_flags=-psh +Datapath actions: drop +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80,tcp_flags=syn|ack'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=80,tcp_flags=-psh +Datapath actions: drop +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80,tcp_flags=ack|psh'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=80,tcp_flags=+psh +Datapath actions: 2 +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=80,tcp_flags=-psh +Datapath actions: drop +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=0x40/0xfff0,tcp_flags=-psh +Datapath actions: drop +]) + +dnl Having both the port and the tcp flags in the resulting megaflow below +dnl is redundant, but that is how ports trie logic is implemented. +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=81'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,tcp,in_port=1,nw_frag=no,tp_dst=81,tcp_flags=-psh +Datapath actions: drop +]) + +dnl nd_target is redundant in the megaflow below and it is also not relevant +dnl for an icmp reply. Datapath may discard that match, but it is OK as long +dnl as we have prerequisites (icmp_type) in the match as well. +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=2,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,nw_ttl=255,icmpv6_type=128,icmpv6_code=0"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=2,nw_ttl=255,nw_frag=no,icmp_type=0x80/0xfc,nd_target=:: +Datapath actions: drop +]) + +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=2,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,nw_ttl=255,icmpv6_type=135,icmpv6_code=0"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=2,nw_ttl=255,nw_frag=no,icmp_type=0x87/0xff,icmp_code=0x0/0xff,nd_target=:: +Datapath actions: drop +]) +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=2,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,nw_ttl=255,icmpv6_type=135,icmpv6_code=0,nd_target=1000::1"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=2,nw_ttl=255,nw_frag=no,icmp_type=0x87/0xff,icmp_code=0x0/0xff,nd_target=1000::1 +Datapath actions: 1 +]) +AT_CHECK([ovs-appctl ofproto/trace br0 "in_port=2,eth_src=f6:d2:b0:19:5e:7b,eth_dst=d2:49:19:91:78:fe,dl_type=0x86dd,ipv6_src=1000::3,ipv6_dst=1000::4,nw_proto=58,nw_ttl=255,icmpv6_type=135,icmpv6_code=0,nd_target=1000::2"], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,icmp6,in_port=2,nw_ttl=255,nw_frag=no,icmp_type=0x87/0xff,icmp_code=0x0/0xff,nd_target=1000::2 +Datapath actions: drop +]) + +dnl Check that ports' mask doesn't affect ct ports. +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=3,ct_state=trk|rpl,ct_nw_proto=6,ct_tp_src=3,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=80,tcp_flags=psh'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,ct_state=+rpl+trk,ct_nw_proto=6,ct_tp_src=0x1/0x1,eth,tcp,in_port=3,nw_frag=no,tp_dst=80,tcp_flags=+psh +Datapath actions: 2 +]) +AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=3,ct_state=trk|rpl,ct_nw_proto=6,ct_tp_src=3,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=6,nw_tos=0,nw_ttl=128,tp_src=8,tp_dst=79,tcp_flags=psh'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,ct_state=+rpl+trk,ct_nw_proto=6,ct_tp_src=0x1/0x1,eth,tcp,in_port=3,nw_frag=no,tp_dst=0x40/0xfff0,tcp_flags=+psh +Datapath actions: drop +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([flow classifier prefix lookup]) AT_SETUP([flow classifier - prefix lookup]) OVS_VSWITCHD_START diff --git a/tests/dpctl.at b/tests/dpctl.at index 7454a51ec..d2f1046f8 100644 --- a/tests/dpctl.at +++ b/tests/dpctl.at @@ -135,3 +135,19 @@ AT_CHECK([ovs-appctl dpctl/dump-flows dummy@br0 | sort], [0], [dnl AT_CHECK([ovs-appctl dpctl/del-dp dummy@br0]) OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([dpctl - ct-get-limits ct-del-limits]) +OVS_VSWITCHD_START +AT_CHECK([ovs-appctl dpctl/ct-get-limits], [0], [default limit=0 +]) +AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=], [0], [default limit=0 +]) +AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=,], [0], [default limit=0 +]) +AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=x], [2], [], + [ovs-vswitchd: invalid zone (Invalid argument) +ovs-appctl: ovs-vswitchd: server returned an error +]) +AT_CHECK([ovs-appctl dpctl/ct-del-limits zone=]) +OVS_VSWITCHD_STOP +AT_CLEANUP \ No newline at end of file diff --git a/tests/learn.at b/tests/learn.at index d127fed34..d0bcc8363 100644 --- a/tests/learn.at +++ b/tests/learn.at @@ -836,3 +836,63 @@ AT_CHECK([ovs-vsctl add-br br1 -- set b br1 datapath_type=dummy]) OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([learning action - flapping learn rule]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 3 + +AT_CHECK([ovs-appctl time/stop], [0], [ignore]) +AT_CHECK([[ovs-ofctl add-flow br0 'table=0,priority=2,in_port=1,actions=resubmit(,2)']]) +AT_CHECK([[ovs-ofctl add-flow br0 'table=0,priority=2,in_port=2,actions=resubmit(,2)']]) +AT_CHECK([[ovs-ofctl add-flow br0 'table=2,actions=learn(table=0,hard_timeout=3,priority=1,cookie=0x123,NXM_OF_ETH_DST[]=NXM_OF_ETH_SRC[],output:OXM_OF_IN_PORT[]),output:3']]) + +packet="eth(src=50:54:00:00:00:06,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9)" + +dnl Run this test a few times in a loop to reduce the likelyhood that it passes by chance. +for i in 1 2 3; do + AT_CHECK([ovs-appctl revalidator/pause], [0]) + AT_CHECK([ovs-appctl netdev-dummy/receive p2 $packet], [0]) + AT_CHECK([ovs-appctl time/warp 75], [0], [ignore]) + AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet], [0]) + AT_CHECK([ovs-appctl time/warp 75], [0], [ignore]) + AT_CHECK([ovs-appctl netdev-dummy/receive p2 $packet], [0]) + AT_CHECK([ovs-appctl time/warp 75], [0], [ignore]) + AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet], [0]) + AT_CHECK([ovs-appctl time/warp 75], [0], [ignore]) + + AT_CHECK([ovs-appctl revalidator/resume], [0]) + AT_CHECK([ovs-appctl revalidator/wait], [0]) + + AT_CHECK([ovs-ofctl --no-stats dump-flows br0 | ofctl_strip | sort | grep 0x123], [0], [dnl + cookie=0x123, hard_timeout=3, priority=1,dl_dst=50:54:00:00:00:06 actions=output:1 + table=2, actions=learn(table=0,hard_timeout=3,priority=1,cookie=0x123,NXM_OF_ETH_DST[[]]=NXM_OF_ETH_SRC[[]],output:OXM_OF_IN_PORT[[]]),output:3 +]) + + AT_CHECK([ovs-appctl revalidator/pause], [0]) + AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet], [0]) + AT_CHECK([ovs-appctl time/warp 75], [0], [ignore]) + AT_CHECK([ovs-appctl netdev-dummy/receive p2 $packet], [0]) + AT_CHECK([ovs-appctl time/warp 75], [0], [ignore]) + AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet], [0]) + AT_CHECK([ovs-appctl time/warp 75], [0], [ignore]) + AT_CHECK([ovs-appctl netdev-dummy/receive p2 $packet], [0]) + AT_CHECK([ovs-appctl time/warp 75], [0], [ignore]) + + AT_CHECK([ovs-appctl revalidator/resume], [0]) + AT_CHECK([ovs-appctl revalidator/wait], [0]) + + AT_CHECK([ovs-ofctl --no-stats dump-flows br0 | ofctl_strip | sort | grep 0x123], [0], [dnl + cookie=0x123, hard_timeout=3, priority=1,dl_dst=50:54:00:00:00:06 actions=output:2 + table=2, actions=learn(table=0,hard_timeout=3,priority=1,cookie=0x123,NXM_OF_ETH_DST[[]]=NXM_OF_ETH_SRC[[]],output:OXM_OF_IN_PORT[[]]),output:3 +]) +done + +dnl Wait and check for learned rule eviction due to hard timeout. +AT_CHECK([ovs-appctl time/warp 3200], [0], [ignore]) + +AT_CHECK([ovs-ofctl --no-stats dump-flows br0 | ofctl_strip | grep 0x123], [0], [dnl + table=2, actions=learn(table=0,hard_timeout=3,priority=1,cookie=0x123,NXM_OF_ETH_DST[[]]=NXM_OF_ETH_SRC[[]],output:OXM_OF_IN_PORT[[]]),output:3 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/learning-switch.at b/tests/learning-switch.at new file mode 100644 index 000000000..ac2fc1b80 --- /dev/null +++ b/tests/learning-switch.at @@ -0,0 +1,23 @@ +AT_BANNER([learning switch]) + +### ----------------------------------------------------------------- +### learning switch OpenFlow15 test case +### ----------------------------------------------------------------- + +AT_SETUP([learning switch - OpenFlow15]) +dnl Start ovs-testcontroller +AT_CHECK([ovs-testcontroller --no-chdir --detach punix:controller --pidfile -v ptcp:], [0], [ignore]) +dnl Start ovs +OVS_VSWITCHD_START([dnl + set bridge br0 datapath_type=dummy \ + protocols=OpenFlow15 -- \ + add-port br0 p1 -- set Interface p1 type=dummy ofport_request=1 -- \ + set-controller br0 tcp:127.0.0.1:6653]) +AT_CHECK([ + ovs-appctl netdev-dummy/receive p1 1e2ce92a669e3a6dd2099cab0800450000548a53400040011addc0a80a0ac0a80a1e08006f200a4d0001fc509a58000000002715020000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637 +], [0], [ignore]) +AT_CHECK([kill `cat ovs-testcontroller.pid`]) + +OVS_WAIT_UNTIL([! test -e controller]) +OVS_VSWITCHD_STOP(["/cannot find route for controller/d"]) +AT_CLEANUP diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index fa6111c1e..450483bee 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -849,7 +849,7 @@ table=2 ip actions=set_field:192.168.3.91->ip_src,output(11) AT_CHECK([ovs-ofctl -O OpenFlow12 add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,dl_type=0x0800,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=1,nw_tos=0,nw_ttl=128,nw_frag=no,icmp_type=8,icmp_code=0'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,eth,ip,in_port=1,nw_src=192.168.0.1,nw_frag=no + [Megaflow: recirc_id=0,eth,icmp,in_port=1,nw_src=192.168.0.1,nw_frag=no Datapath actions: 10,set(ipv4(src=192.168.3.91)),11,set(ipv4(src=192.168.3.90)),13 ]) OVS_VSWITCHD_STOP @@ -912,7 +912,7 @@ AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_ds # Must match on the source address to be able to restore it's value for # the second bucket AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,eth,ip,in_port=1,nw_src=192.168.0.1,nw_frag=no + [Megaflow: recirc_id=0,eth,icmp,in_port=1,nw_src=192.168.0.1,nw_frag=no Datapath actions: set(ipv4(src=192.168.3.90)),10,set(ipv4(src=192.168.0.1)),11 ]) OVS_VSWITCHD_STOP @@ -944,7 +944,7 @@ done AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/dp_hash(.*\/0xf)/dp_hash(0xXXXX\/0xf)/' | sed 's/packets.*actions:/actions:/' | strip_ufid | strip_used | sort], [0], [dnl flow-dump from the main thread: recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), actions:hash(sym_l4(0)),recirc(0x1) -recirc_id(0x1),dp_hash(0xXXXX/0xf),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(src=192.168.0.1,frag=no), actions:set(ipv4(src=192.168.3.90)),10,set(ipv4(src=192.168.0.1)),10 +recirc_id(0x1),dp_hash(0xXXXX/0xf),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(src=192.168.0.1,proto=1,frag=no), actions:set(ipv4(src=192.168.3.90)),10,set(ipv4(src=192.168.0.1)),10 ]) OVS_VSWITCHD_STOP @@ -959,7 +959,7 @@ AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=1,dl_src=50:54:00:00:00:05,dl_ds # Must match on the source address to be able to restore it's value for # the third bucket AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,eth,ip,in_port=1,nw_src=192.168.0.1,nw_frag=no + [Megaflow: recirc_id=0,eth,icmp,in_port=1,nw_src=192.168.0.1,nw_frag=no Datapath actions: set(ipv4(src=192.168.3.90)),10,set(ipv4(src=192.168.0.1)),11 ]) OVS_VSWITCHD_STOP @@ -1536,17 +1536,17 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=111,tos=0,ttl=2,frag=no)' -generate], [0], [stdout]) AT_CHECK([tail -4 stdout], [0], [ Final flow: ip,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:07,nw_src=192.168.0.1,nw_dst=192.168.0.2,nw_proto=111,nw_tos=0,nw_ecn=0,nw_ttl=1,nw_frag=no -Megaflow: recirc_id=0,eth,ip,in_port=1,nw_ttl=2,nw_frag=no +Megaflow: recirc_id=0,eth,ip,in_port=1,nw_proto=111,nw_ttl=2,nw_frag=no Datapath actions: set(ipv4(ttl=1)),2,userspace(pid=0,controller(reason=2,dont_send=0,continuation=0,recirc_id=1,rule_cookie=0,controller_id=0,max_len=65535)),4 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=111,tos=0,ttl=3,frag=no)'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,eth,ip,in_port=1,nw_ttl=3,nw_frag=no + [Megaflow: recirc_id=0,eth,ip,in_port=1,nw_proto=111,nw_ttl=3,nw_frag=no Datapath actions: set(ipv4(ttl=2)),2,set(ipv4(ttl=1)),3,4 ]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x86dd),ipv6(src=::1,dst=::2,label=0,proto=10,tclass=0x70,hlimit=128,frag=no)'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,eth,ipv6,in_port=1,nw_ttl=128,nw_frag=no + [Megaflow: recirc_id=0,eth,ipv6,in_port=1,nw_proto=10,nw_ttl=128,nw_frag=no Datapath actions: set(ipv6(hlimit=127)),2,set(ipv6(hlimit=126)),3,4 ]) @@ -1656,7 +1656,7 @@ AT_CHECK([ovs-vsctl -- \ --id=@q2 create Queue dscp=2], [0], [ignore]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(9),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=1.1.1.1,dst=2.2.2.2,proto=1,tos=0xff,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -2 stdout], [0], - [Megaflow: recirc_id=0,skb_priority=0,eth,ip,in_port=9,nw_tos=252,nw_frag=no + [Megaflow: recirc_id=0,skb_priority=0,eth,icmp,in_port=9,nw_tos=252,nw_frag=no Datapath actions: dnl 100,dnl set(ipv4(tos=0x4/0xfc)),set(skb_priority(0x1)),1,dnl @@ -5349,7 +5349,7 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], - [Datapath actions: 3,push_vlan(vid=17,pcp=0),2 + [Datapath actions: 3,push_vlan(vid=17,pcp=0),2,3 ]) flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" @@ -5388,7 +5388,7 @@ flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x080 AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) actual=`tail -1 stdout | sed 's/Datapath actions: //'` -expected="push_vlan(vid=17,pcp=0),1,pop_vlan,push_vlan(vid=12,pcp=0),1,2,100" +expected="push_vlan(vid=12,pcp=0),100,2,1,pop_vlan,push_vlan(vid=17,pcp=0),1,pop_vlan,push_vlan(vid=12,pcp=0),100,2,1" AT_CHECK([ovs-dpctl normalize-actions "$flow" "$expected"], [0], [stdout]) mv stdout expout AT_CHECK([ovs-dpctl normalize-actions "$flow" "$actual"], [0], [expout]) @@ -5656,7 +5656,7 @@ AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) flow="in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" AT_CHECK([ovs-appctl ofproto/trace ovs-dummy "$flow"], [0], [stdout]) AT_CHECK_UNQUOTED([tail -1 stdout], [0], - [Datapath actions: trunc(100),3,push_vlan(vid=17,pcp=0),2 + [Datapath actions: trunc(100),3,push_vlan(vid=17,pcp=0),2,trunc(100),3 ]) flow="in_port(2),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)" @@ -5854,6 +5854,40 @@ OVS_WAIT_UNTIL([check_flows], [ovs-ofctl dump-flows br0]) OVS_VSWITCHD_STOP AT_CLEANUP +dnl Checks for regression against a bug in which OVS dropped packets +dnl originating from a controller passing through a patch port. +AT_SETUP([ofproto-dpif - packet-out recirculation OFPP_CONTROLLER and patch port]) +OVS_VSWITCHD_START( + [add-port br0 patch-br1 -- \ + set interface patch-br1 type=patch options:peer=patch-br0 -- \ + add-br br1 -- set bridge br1 datapath-type=dummy fail-mode=secure -- \ + add-port br1 patch-br0 -- set interface patch-br0 type=patch options:peer=patch-br1 +]) + +add_of_ports --pcap br1 1 + +AT_DATA([flows-br0.txt], [dnl +table=0 icmp actions=output:patch-br1 +]) +AT_CHECK([ovs-ofctl add-flows br0 flows-br0.txt]) + +AT_DATA([flows-br1.txt], [dnl +table=0, icmp actions=ct(table=1,zone=1) +table=1, ct_state=+trk, icmp actions=p1 +]) +AT_CHECK([ovs-ofctl add-flows br1 flows-br1.txt]) + +packet=50540000000750540000000508004500005c000000008001b94dc0a80001c0a80002080013fc00000000000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f +AT_CHECK([ovs-ofctl packet-out br0 "in_port=CONTROLLER packet=$packet actions=table"]) + +OVS_WAIT_UNTIL_EQUAL([ovs-ofctl dump-flows -m br1 | grep "ct_state" | ofctl_strip], [dnl + table=1, n_packets=1, n_bytes=106, ct_state=+trk,icmp actions=output:2]) + +OVS_WAIT_UNTIL([ovs-pcap p1-tx.pcap | grep -q "$packet"]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - debug_slow action]) OVS_VSWITCHD_START add_of_ports br0 1 2 3 @@ -11884,7 +11918,7 @@ ovs-ofctl dump-flows br0 AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.10.10.2,dst=10.10.10.1,proto=1,tos=1,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) AT_CHECK([tail -3 stdout], [0], [dnl -Megaflow: recirc_id=0,eth,ip,reg0=0/0x1,in_port=1,nw_src=10.10.10.2,nw_frag=no +Megaflow: recirc_id=0,eth,icmp,reg0=0/0x1,in_port=1,nw_src=10.10.10.2,nw_frag=no Datapath actions: drop Translation failed (Recursion too deep), packet is dropped. ]) diff --git a/tests/ofproto.at b/tests/ofproto.at index a666bebca..2fa8486a8 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -6538,3 +6538,185 @@ verify_deleted OVS_VSWITCHD_STOP(["/nw_dst,output=2 +table=0 in_port=1 priority=83,ip,nw_dst=192.168.1.15,actions=set_field:192.168.21.26->nw_src,output=2 +table=0 in_port=1 priority=82,ip,nw_dst=192.168.1.14,actions=set_field:0x40->nw_tos,output=2 +table=0 in_port=1 priority=0,actions=drop +]) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +dnl send a proto 0 packet to try and poison the DP flow path +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ + '5054000000075054000000050800450000548de140004000289fc0a801c4c0a8011408003bf60002001bbf080a640000000032ad010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) + +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.20,proto=0,frag=no), packets:0, bytes:0, used:never, actions:2 +]) + +dnl Send ICMP for mod nw_src and mod nw_dst +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.21,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.20,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) + +dnl send ICMP that will dec TTL +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.10,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) + +dnl send ICMP that will mod TTL +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.19,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) + +dnl send ICMP that will mod ECN +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.18,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) + +dnl send ICMP that will mod TOS +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.17,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) + +dnl send ICMP that will set DST +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.16,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) + +dnl send ICMP that will set SRC +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.15,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) + +dnl send ICMP that will set TOS +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.14,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) + +AT_CHECK([ovs-appctl dpctl/dump-flows | sort], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.10,proto=1,ttl=64,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(ttl=63)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.14,proto=1,tos=0/0xfc,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(tos=0x40/0xfc)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.16,proto=1,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(dst=192.168.20.26)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.17,proto=1,tos=0/0xfc,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(tos=0x40/0xfc)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.18,proto=1,tos=0/0x3,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(tos=0x2/0x3)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.19,proto=1,ttl=64,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(ttl=8)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.20,proto=0,frag=no), packets:0, bytes:0, used:never, actions:2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=192.168.1.20,proto=1,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(dst=192.168.20.20)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.15,proto=1,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(src=192.168.21.26)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(src=192.168.1.1,dst=192.168.1.21,proto=1,frag=no), packets:0, bytes:0, used:never, actions:set(ipv4(src=192.168.20.21)),2 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ofproto - implicit mask of ipv6 proto with HOPOPT field]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 + +AT_DATA([flows.txt], [dnl +table=0 in_port=1 priority=77,ip6,ipv6_dst=111:db8::3,actions=dec_ttl,output=2 +table=0 in_port=1 priority=76,ip6,ipv6_dst=111:db8::4,actions=mod_nw_ttl:8,output=2 +table=0 in_port=1 priority=75,ip6,ipv6_dst=111:db8::5,actions=mod_nw_ecn:2,output=2 +table=0 in_port=1 priority=74,ip6,ipv6_dst=111:db8::6,actions=mod_nw_tos:0x40,output=2 +table=0 in_port=1 priority=73,ip6,ipv6_dst=111:db8::7,actions=set_field:2112:db8::2->ipv6_dst,output=2 +table=0 in_port=1 priority=72,ip6,ipv6_dst=111:db8::8,actions=set_field:2112:db8::3->ipv6_src,output=2 +table=0 in_port=1 priority=72,ip6,ipv6_dst=111:db8::9,actions=set_field:44->ipv6_label,output=2 +table=0 in_port=1 priority=0,actions=drop +]) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +dnl send a proto 0 packet to try and poison the DP flow path +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::3,proto=0,tclass=0,hlimit=64,frag=no)']) + +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(dst=111:db8::3,proto=0,hlimit=0,frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=2,dont_send=0,continuation=0,recirc_id=1,rule_cookie=0,controller_id=0,max_len=65535)) +]) + +dnl Send ICMP for mod nw_src and mod nw_dst +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::3,proto=1,tclass=0,hlimit=64,frag=no),icmpv6(type=0,code=8)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::4,proto=1,tclass=0,hlimit=64,frag=no),icmpv6(type=0,code=8)']) + +dnl send ICMP that will dec TTL +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::5,proto=1,tclass=0,hlimit=64,frag=no),icmpv6(type=0,code=8)']) + +dnl send ICMP that will mod TTL +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::6,proto=1,tclass=0,hlimit=64,frag=no),icmpv6(type=0,code=8)']) + +dnl send ICMP that will mod ECN +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::7,proto=1,tclass=0,hlimit=64,frag=no),icmpv6(type=0,code=8)']) + +dnl send ICMP that will mod TOS +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::8,proto=1,tclass=0,hlimit=64,frag=no),icmpv6(type=0,code=8)']) + +dnl send ICMP that will set LABEL +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::9,proto=1,tclass=0,hlimit=64,frag=no),icmpv6(type=0,code=8)']) + +AT_CHECK([ovs-appctl dpctl/dump-flows | sort], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(dst=111:db8::3,proto=0,hlimit=0,frag=no), packets:0, bytes:0, used:never, actions:userspace(pid=0,controller(reason=2,dont_send=0,continuation=0,recirc_id=1,rule_cookie=0,controller_id=0,max_len=65535)) +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(dst=111:db8::3,proto=1,hlimit=64,frag=no), packets:0, bytes:0, used:never, actions:set(ipv6(hlimit=63)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(dst=111:db8::4,proto=1,hlimit=64,frag=no), packets:0, bytes:0, used:never, actions:set(ipv6(hlimit=8)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(dst=111:db8::5,proto=1,tclass=0/0x3,frag=no), packets:0, bytes:0, used:never, actions:set(ipv6(tclass=0x2/0x3)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(dst=111:db8::6,proto=1,tclass=0/0xfc,frag=no), packets:0, bytes:0, used:never, actions:set(ipv6(tclass=0x40/0xfc)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(dst=111:db8::7,proto=1,frag=no), packets:0, bytes:0, used:never, actions:set(ipv6(dst=2112:db8::2)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(dst=111:db8::9,label=0,proto=1,frag=no), packets:0, bytes:0, used:never, actions:set(ipv6(label=0x2c)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=111:db8::8,proto=1,frag=no), packets:0, bytes:0, used:never, actions:set(ipv6(src=2112:db8::3)),2 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ofproto - implicit mask of ARP OPer field]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 + +AT_DATA([flows.txt], [dnl +table=0 in_port=1 priority=77,arp,arp_sha=00:01:02:03:04:06,actions=set_field:0x1->arp_op,2 +table=0 in_port=1 priority=76,arp,arp_sha=00:01:02:03:04:07,actions=set_field:00:02:03:04:05:06->arp_sha,2 +table=0 in_port=1 priority=75,arp,arp_sha=00:01:02:03:04:08,actions=set_field:ff:00:00:00:00:ff->arp_tha,2 +table=0 in_port=1 priority=74,arp,arp_sha=00:01:02:03:04:09,actions=set_field:172.31.110.26->arp_spa,2 +table=0 in_port=1 priority=73,arp,arp_sha=00:01:02:03:04:0a,actions=set_field:172.31.110.10->arp_tpa,2 +table=0 in_port=1 priority=1,actions=drop +]) + +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +dnl Send op == 0 packet +AT_CHECK([ovs-appctl netdev-dummy/receive p1 \ + 'ffffffffffffaa55aa550000080600010800060400000001020304070c0a00010000000000000c0a0002']) + +AT_CHECK([ovs-appctl dpctl/dump-flows], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0806),arp(op=0,sha=00:01:02:03:04:07), packets:0, bytes:0, used:never, actions:2 +]) + +dnl Send op 2 -> set op +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0806),arp(sip=172.31.110.1,tip=172.31.110.25,op=2,sha=00:01:02:03:04:06,tha=ff:ff:ff:ff:ff:ff)']) + +dnl Send op 1 -> set SHA +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0806),arp(sip=172.31.110.1,tip=172.31.110.25,op=1,sha=00:01:02:03:04:07,tha=ff:ff:ff:ff:ff:ff)']) + +dnl Send op 1 -> set THA +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0806),arp(sip=172.31.110.1,tip=172.31.110.25,op=1,sha=00:01:02:03:04:08,tha=ff:ff:ff:ff:ff:ff)']) + +dnl Send op 1 -> set SIP +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0806),arp(sip=172.31.110.1,tip=172.31.110.25,op=1,sha=00:01:02:03:04:09,tha=ff:ff:ff:ff:ff:ff)']) + +dnl Send op 1 -> set TIP +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0806),arp(sip=172.31.110.1,tip=172.31.110.25,op=1,sha=00:01:02:03:04:0a,tha=ff:ff:ff:ff:ff:ff)']) + +AT_CHECK([ovs-appctl dpctl/dump-flows | sort], [0], [dnl +flow-dump from the main thread: +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0806),arp(op=0,sha=00:01:02:03:04:07), packets:0, bytes:0, used:never, actions:2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0806),arp(op=1,sha=00:01:02:03:04:07), packets:0, bytes:0, used:never, actions:userspace(pid=0,slow_path(action)) +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0806),arp(op=1,sha=00:01:02:03:04:08,tha=ff:ff:ff:ff:ff:ff), packets:0, bytes:0, used:never, actions:userspace(pid=0,slow_path(action)) +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0806),arp(op=2,sha=00:01:02:03:04:06), packets:0, bytes:0, used:never, actions:userspace(pid=0,slow_path(action)) +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0806),arp(sip=172.31.110.1,op=1,sha=00:01:02:03:04:09), packets:0, bytes:0, used:never, actions:userspace(pid=0,slow_path(action)) +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0806),arp(tip=172.31.110.25,op=1,sha=00:01:02:03:04:0a), packets:0, bytes:0, used:never, actions:userspace(pid=0,slow_path(action)) +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at index a92156f00..a368bff6e 100644 --- a/tests/ovs-vsctl.at +++ b/tests/ovs-vsctl.at @@ -425,6 +425,7 @@ AT_CHECK([RUN_OVS_VSCTL_ONELINE( [add-port a a1], [add-bond a bond0 a2 a3], [br-set-external-id a key0 value0], + [add Bridge a external_ids key0=value1], [set port a1 external-ids:key1=value1], [set interface a2 external-ids:key2=value2], [set interface a2 external-ids:key3=value3], @@ -446,6 +447,7 @@ AT_CHECK([RUN_OVS_VSCTL_ONELINE( + key0=value0 value0 @@ -1071,13 +1073,9 @@ AT_CHECK([RUN_OVS_VSCTL([set controller br1 'connection-mode=xyz'])], AT_CHECK([RUN_OVS_VSCTL([set controller br1 connection-mode:x=y])], [1], [], [ovs-vsctl: cannot specify key to set for non-map column connection_mode ]) -AT_CHECK([RUN_OVS_VSCTL([add bridge br1 datapath_id x y -- show])], +AT_CHECK([RUN_OVS_VSCTL([add bridge br1 datapath_id x y])], [1], [], [ovs-vsctl: "add" operation would put 2 values in column datapath_id of table Bridge but the maximum number is 1 ]) -AT_CHECK([RUN_OVS_VSCTL([add bridge br1 datapath_id x y])], [1], [], [stderr]) -AT_CHECK([sed "/^.*|WARN|.*/d" < stderr], [0], [dnl -ovs-vsctl: transaction error: {"details":"set must have 0 to 1 members but 2 are present","error":"syntax error","syntax":"[[\"set\",[\"x\",\"y\"]]]"} -]) AT_CHECK([RUN_OVS_VSCTL([remove netflow `cat netflow-uuid` targets '"1.2.3.4:567"'])], [1], [], [ovs-vsctl: "remove" operation would put 0 values in column targets of table NetFlow but the minimum number is 1 ]) diff --git a/tests/ovsdb-execution.at b/tests/ovsdb-execution.at index e72bf0606..fd1c7a239 100644 --- a/tests/ovsdb-execution.at +++ b/tests/ovsdb-execution.at @@ -728,6 +728,53 @@ dnl collide (only) with their previous values (succeeds). [{"count":2},{"uuid":["uuid","<6>"]},{"uuid":["uuid","<7>"]},{"rows":[{"name":"new one","number":1},{"name":"new two","number":2},{"name":"old one","number":10},{"name":"old two","number":20}]}] ]]) +OVSDB_CHECK_EXECUTION([size constraints on sets], + [constraint_schema], + [ + [[["constraints", + {"op": "insert", + "table": "b", + "row": {"b": 1} + }]]], + [[["constraints", + {"op": "mutate", + "table": "b", + "where": [], + "mutations": [["x", "delete", 0]] + }]]], + [[["constraints", + {"op": "mutate", + "table": "b", + "where": [], + "mutations": [["x", "insert", 1]] + }]]], + [[["constraints", + {"op": "update", + "table": "b", + "where": [], + "row": {"x": ["set", [3, 4]]} + }]]], + [[["constraints", + {"op": "mutate", + "table": "b", + "where": [], + "mutations": [["x", "insert", 5]] + }]]], + [[["constraints", + {"op": "mutate", + "table": "b", + "where": [], + "mutations": [["x", "delete", 4], ["x", "insert", 5]] + }]]] + ], + [[[{"uuid":["uuid","<0>"]}] +[{"details":"Attempted to store 0 elements in set of 1 to 2 integers.","error":"constraint violation"}] +[{"count":1}] +[{"count":1}] +[{"details":"Attempted to store 3 elements in set of 1 to 2 integers.","error":"constraint violation"}] +[{"count":1}] +]]) + OVSDB_CHECK_EXECUTION([referential integrity -- simple], [constraint_schema], [[[["constraints", @@ -751,12 +798,6 @@ OVSDB_CHECK_EXECUTION([referential integrity -- simple], {"op": "delete", "table": "b", "where": []}]]], -dnl Check that "mutate" honors number-of-elements constraints on sets and maps. - [[["constraints", - {"op": "mutate", - "table": "b", - "where": [], - "mutations": [["x", "delete", 0]]}]]], [[["constraints", {"op": "delete", "table": "a", @@ -783,7 +824,6 @@ dnl Check that "mutate" honors number-of-elements constraints on sets and maps. "where": []}]]]], [[[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]},{"uuid":["uuid","<2>"]},{"uuid":["uuid","<3>"]}] [{"count":1},{"details":"cannot delete b row <0> because of 3 remaining reference(s)","error":"referential integrity violation"}] -[{"details":"Attempted to store 0 elements in set of 1 to 2 integers.","error":"constraint violation"}] [{"count":1}] [{"count":1},{"details":"cannot delete b row <0> because of 2 remaining reference(s)","error":"referential integrity violation"}] [{"count":1}] diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 5a7e76eaa..1dd334325 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -94,7 +94,7 @@ m4_define([OVSDB_CHECK_IDL_WRITE_CHANGED_ONLY_C], AT_CHECK([ovsdb_start_idltest]) m4_if([$2], [], [], [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])]) - AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl unix:socket $3], + AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 --write-changed-only idl unix:socket $3], [0], [stdout], [ignore]) AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]), [0], [$4]) @@ -1216,7 +1216,7 @@ m4_define([OVSDB_CHECK_IDL_TRACK_WRITE_CHANGED_ONLY_C], AT_CHECK([ovsdb_start_idltest]) m4_if([$2], [], [], [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])]) - AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 -c -w idl unix:socket $3], + AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 -c --write-changed-only idl unix:socket $3], [0], [stdout], [ignore]) AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]), [0], [$4]) @@ -2296,6 +2296,23 @@ CHECK_STREAM_OPEN_BLOCK([Python3], [$PYTHON3 $srcdir/test-stream.py], CHECK_STREAM_OPEN_BLOCK([Python3], [$PYTHON3 $srcdir/test-stream.py], [tcp6], [[[::1]]]) +dnl OVSDB_CLUSTER_CHECK_MONITOR_COND_SINCE_TXN_IDS(LOG) +dnl +dnl Looks up transaction IDs in the log of OVSDB client application. +dnl All-zero UUID should not be sent within a monitor request more than once, +dnl unless some database requests were lost (not replied). +m4_define([OVSDB_CLUSTER_CHECK_MONITOR_COND_SINCE_TXN_IDS], +[ + requests=$(grep -c 'send request' $1) + replies=$(grep -c 'received reply' $1) + + if test "$requests" -eq "$replies"; then + AT_CHECK([grep 'monitor_cond_since' $1 \ + | grep -c "00000000-0000-0000-0000-000000000000" | tr -d '\n'], + [0], [1]) + fi +]) + # same as OVSDB_CHECK_IDL but uses Python IDL implementation with tcp # with multiple remotes to assert the idl connects to the leader of the Raft cluster m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY], @@ -2311,10 +2328,11 @@ m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY], pids=$(cat s2.pid s3.pid s1.pid | tr '\n' ',') echo $pids AT_CHECK([$PYTHON3 $srcdir/test-ovsdb.py -t30 idl-cluster $srcdir/idltest.ovsschema $remotes $pids $3], - [0], [stdout], [ignore]) + [0], [stdout], [stderr]) remote=$(ovsdb_cluster_leader $remotes "idltest") leader=$(echo $remote | cut -d'|' -f 1) AT_CHECK([grep -F -- "${leader}" stdout], [0], [ignore]) + OVSDB_CLUSTER_CHECK_MONITOR_COND_SINCE_TXN_IDS([stderr]) AT_CLEANUP]) OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL connects to leader], 3, ['remote']) @@ -2357,6 +2375,7 @@ m4_define([OVSDB_CHECK_CLUSTER_IDL_C], AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]), [0], [$5]) m4_ifval([$8], [AT_CHECK([grep '$8' stderr], [1])], [], []) + OVSDB_CLUSTER_CHECK_MONITOR_COND_SINCE_TXN_IDS([stderr]) AT_CLEANUP]) # Same as OVSDB_CHECK_CLUSTER_IDL_C but uses the Python IDL implementation. @@ -2377,6 +2396,7 @@ m4_define([OVSDB_CHECK_CLUSTER_IDL_PY], AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]), [0], [$5]) m4_if([$8], [AT_CHECK([grep '$8' stderr], [1])], [], []) + OVSDB_CLUSTER_CHECK_MONITOR_COND_SINCE_TXN_IDS([stderr]) AT_CLEANUP]) m4_define([OVSDB_CHECK_CLUSTER_IDL], diff --git a/tests/ovsdb-monitor.at b/tests/ovsdb-monitor.at index 3b622b3ec..82b0e9362 100644 --- a/tests/ovsdb-monitor.at +++ b/tests/ovsdb-monitor.at @@ -1011,3 +1011,69 @@ row,action,name,number,_version ]], [ignore]) AT_CLEANUP +AT_SETUP([monitor-cond initial reply with condition on non-monitored column]) +AT_KEYWORDS([ovsdb server monitor monitor-cond positive initial non-monitored]) + +ordinal_schema > schema +AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) +on_exit 'kill `cat ovsdb-server.pid`' +AT_CAPTURE_FILE([ovsdb-server.log]) +AT_CHECK([ovsdb-server --detach --no-chdir --pidfile \ + --remote=punix:socket --log-file db], [0], [ignore], [ignore]) + +dnl Initialize the database content. +for txn in m4_foreach([txn], [[[["ordinals", + {"op": "insert", + "table": "ordinals", + "row": {"number": 0, "name": "zero"}}, + {"op": "insert", + "table": "ordinals", + "row": {"number": 1, "name": "one"}}, + {"op": "insert", + "table": "ordinals", + "row": {"number": 2, "name": "two"}}]]]], ['txn' ]); do + AT_CHECK([ovsdb-client transact unix:socket "$txn"], [0], [ignore], [ignore]) +done + +dnl Start a first client that monitors only the column 'name'. +on_exit 'kill `cat client-1.pid`' +AT_CAPTURE_FILE([client-1.out]) +AT_CHECK([ovsdb-client -vjsonrpc --pidfile=client-1.pid --detach --no-chdir \ + -d json monitor-cond --format=csv unix:socket \ + ordinals '[[true]]' ordinals ["name"] \ + > client-1.out 2> client-1.err], [0], [ignore], [ignore]) +dnl Wait for the initial monitor reply. +OVS_WAIT_UNTIL([grep -q 'initial' client-1.out]) + +dnl Start a second client that monitors the column 'name', but has a condition +dnl on column 'number'. +on_exit 'kill `cat client-2.pid`' +AT_CAPTURE_FILE([client-2.out]) +AT_CHECK([ovsdb-client -vjsonrpc --pidfile=client-2.pid --detach --no-chdir \ + -d json monitor-cond --format=csv unix:socket \ + ordinals '[[["number", "!=", 1]]]' ordinals ["name"] \ + > client-2.out 2> client-2.err], [0], [ignore], [ignore]) +dnl Wait for the initial monitor reply. +OVS_WAIT_UNTIL([grep -q 'initial' client-2.out]) + +OVSDB_SERVER_SHUTDOWN +OVS_WAIT_UNTIL([test ! -e ovsdb-server.pid && \ + test ! -e client-1.pid && test ! -e client-2.pid]) + +dnl The first client should have all the names. +AT_CHECK([$PYTHON3 $srcdir/ovsdb-monitor-sort.py < client-1.out | uuidfilt], + [0], [dnl +row,action,name +<0>,initial,"""one""" +<1>,initial,"""two""" +<2>,initial,"""zero""" +]) + +dnl The second client should not have the name 'one'. +AT_CHECK([$PYTHON3 $srcdir/ovsdb-monitor-sort.py < client-2.out | uuidfilt], + [0], [dnl +row,action,name +<0>,initial,"""two""" +<1>,initial,"""zero""" +]) +AT_CLEANUP diff --git a/tests/ovsdb-server.at b/tests/ovsdb-server.at index bf539b6e5..b9c3bf203 100644 --- a/tests/ovsdb-server.at +++ b/tests/ovsdb-server.at @@ -24,6 +24,9 @@ m4_define([OVSDB_SERVER_SHUTDOWN2], # If a given UUID appears more than once it is always replaced by the # same marker. # +# Additionally, checks that records written to a database file can be +# read back producing the same in-memory database content. +# # TITLE is provided to AT_SETUP and KEYWORDS to AT_KEYWORDS. m4_define([OVSDB_CHECK_EXECUTION], [AT_SETUP([$1]) @@ -31,12 +34,22 @@ m4_define([OVSDB_CHECK_EXECUTION], $2 > schema AT_CHECK([ovsdb-tool create db schema], [0], [stdout], [ignore]) on_exit 'kill `cat *.pid`' - AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket db], [0], [ignore], [ignore]) + AT_CHECK([ovsdb-server --detach --no-chdir --log-file --pidfile \ + --remote=punix:socket db], [0], [ignore], [ignore]) m4_foreach([txn], [$3], [AT_CHECK([ovsdb-client transact unix:socket 'txn'], [0], [stdout], [ignore]) cat stdout >> output ]) AT_CHECK([uuidfilt output], [0], [$4], [ignore]) + + AT_CHECK([ovsdb-client dump unix:socket], [0], [stdout], [ignore]) + + OVSDB_SERVER_SHUTDOWN + + AT_CHECK([ovsdb-server --detach --no-chdir --log-file --pidfile \ + --remote=punix:socket db], [0], [ignore], [ignore]) + OVS_WAIT_UNTIL([ovsdb-client dump unix:socket > dump2; diff stdout dump2]) + OVSDB_SERVER_SHUTDOWN AT_CLEANUP]) diff --git a/tests/ovsdb-tool.at b/tests/ovsdb-tool.at index 12ad6fb3f..5496ccda7 100644 --- a/tests/ovsdb-tool.at +++ b/tests/ovsdb-tool.at @@ -465,6 +465,7 @@ AT_SETUP([ovsdb-tool convert-to-standalone]) AT_KEYWORDS([ovsdb file positive]) ordinal_schema > schema AT_CHECK([ovsdb-tool create-cluster db schema unix:s1.raft], [0], [stdout], [ignore]) +on_exit 'kill `cat ovsdb-server.pid`' AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket --log-file db >/dev/null 2>&1]) for txn in m4_foreach([txn], [[[["ordinals", {"op": "insert", @@ -498,3 +499,71 @@ OVS_APP_EXIT_AND_WAIT([ovsdb-server]) # Make sure both standalone and cluster db data matches. AT_CHECK([diff standalonedump clusterdump]) AT_CLEANUP + +AT_SETUP([ovsdb-tool convert-to-standalone after schema conversion]) +AT_KEYWORDS([ovsdb file positive]) +ordinal_schema > schema +AT_CHECK([ovsdb-tool create-cluster db schema unix:s1.raft], [0], [stdout], [ignore]) +on_exit 'kill `cat ovsdb-server.pid`' +AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --remote=punix:socket dnl + --log-file db >/dev/null 2>&1]) +for txn in m4_foreach([txn], [[[["ordinals", + {"op": "insert", + "table": "ordinals", + "row": {"number": 0, "name": "zero"}}, + {"op": "insert", + "table": "ordinals", + "row": {"number": 1, "name": "one"}}, + {"op": "insert", + "table": "ordinals", + "row": {"number": 2, "name": "two"}}]]]], ['txn' ]); do + AT_CHECK([ovsdb-client transact unix:socket "$txn"], [0], [ignore], [ignore]) +done + +dnl Change the schema. +AT_CHECK([sed 's/5\.1\.3/5.1.4/' < schema > schema2]) +AT_CHECK([sed -i'back' -e '/.*"number":.*/a \ + "is_seven": {"type": "boolean"}, + ' schema2]) + +dnl Convert the database. +AT_CHECK([ovsdb-client convert unix:socket schema2]) + +dnl Add a new row with a new column. +AT_CHECK([ovsdb-client transact unix:socket dnl + '[["ordinals", + {"op": "insert", + "table": "ordinals", + "row": {"number": 7, "name": "seven", "is_seven": true} + }]]'], [0], [ignore], [ignore]) + +AT_CHECK([ovsdb-client dump unix:socket > clusterdump]) + +AT_CHECK([uuidfilt clusterdump], [0], [dnl +ordinals table +_uuid is_seven name number +------------------------------------ -------- ----- ------ +<0> false one 1 +<1> false two 2 +<2> false zero 0 +<3> true seven 7 +]) + +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +dnl Convert to standalone database from clustered database. +AT_CHECK(ovsdb-tool cluster-to-standalone db1 db) + +dnl Check it's a standalone db. +AT_CHECK([ovsdb-tool db-is-standalone db1]) + +dnl Dump the standalone db data. +AT_CHECK([ovsdb-server -vconsole:off -vfile -vvlog:off --detach --no-chdir dnl + --pidfile --log-file --remote=punix:db.sock db1]) +AT_CHECK([ovsdb_client_wait ordinals connected]) +AT_CHECK([ovsdb-client dump > standalonedump]) +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) + +dnl Make sure both standalone and cluster db data matches. +AT_CHECK([diff standalonedump clusterdump]) +AT_CLEANUP diff --git a/tests/packet-type-aware.at b/tests/packet-type-aware.at index 3b5c66fe5..d63528e69 100644 --- a/tests/packet-type-aware.at +++ b/tests/packet-type-aware.at @@ -1021,7 +1021,7 @@ AT_CHECK([ ], [0], [flow-dump from the main thread: recirc_id(0),in_port(p0),packet_type(ns=0,id=0),eth(src=aa:bb:cc:00:00:02,dst=aa:bb:cc:00:00:01),eth_type(0x0800),ipv4(dst=20.0.0.1,proto=47,frag=no), packets:3, bytes:378, used:0.0s, actions:tnl_pop(gre_sys) tunnel(src=20.0.0.2,dst=20.0.0.1,flags(-df-csum)),recirc_id(0),in_port(gre_sys),packet_type(ns=1,id=0x8847),eth_type(0x8847),mpls(label=999/0x0,tc=0/0,ttl=64/0x0,bos=1/1), packets:3, bytes:264, used:0.0s, actions:push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00),pop_mpls(eth_type=0x800),recirc(0x1) -tunnel(src=20.0.0.2,dst=20.0.0.1,flags(-df-csum)),recirc_id(0x1),in_port(gre_sys),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(ttl=64,frag=no), packets:3, bytes:294, used:0.0s, actions:set(ipv4(ttl=63)),int-br +tunnel(src=20.0.0.2,dst=20.0.0.1,flags(-df-csum)),recirc_id(0x1),in_port(gre_sys),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(proto=1,ttl=64,frag=no), packets:3, bytes:294, used:0.0s, actions:set(ipv4(ttl=63)),int-br ]) ovs-appctl time/warp 1000 diff --git a/tests/pmd.at b/tests/pmd.at index c707f762c..5546e063b 100644 --- a/tests/pmd.at +++ b/tests/pmd.at @@ -1300,3 +1300,54 @@ OVS_WAIT_UNTIL([tail -n +$LINENUM ovs-vswitchd.log | grep "PMD load based sleeps OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([PMD - revalidator modify overlapping flows]) + +OVS_VSWITCHD_START( +[add-port br0 p1 \ + -- set bridge br0 datapath-type=dummy \ + -- set interface p1 type=dummy-pmd \ + -- add-port br0 p2 \ + -- set interface p2 type=dummy-pmd +], [], [], [DUMMY_NUMA]) + +dnl Add one OpenFlow rule and generate a megaflow. +AT_CHECK([ovs-ofctl add-flow br0 'table=0,in_port=p1,ip,nw_dst=10.1.2.0/24,actions=p2']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.2.2,proto=6),tcp(src=1,dst=2)']) + +OVS_WAIT_UNTIL_EQUAL([ovs-appctl dpctl/dump-flows | sed 's/.*core: [[0-9]]*//'], [ +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.2.2/255.255.255.0,frag=no), packets:0, bytes:0, used:never, actions:2]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.2.2,proto=6),tcp(src=1,dst=2)']) +dnl Replace OpenFlow rules, trigger the revalidation. +AT_CHECK([echo 'table=0,in_port=p1,ip,nw_dst=10.1.0.0/16 actions=ct(commit)' | dnl + ovs-ofctl --bundle replace-flows br0 -]) +AT_CHECK([ovs-appctl revalidator/wait]) + +dnl Prevent flows from expiring. +AT_CHECK([ovs-appctl time/stop]) + +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.0.2,proto=6),tcp(src=1,dst=2)']) +OVS_WAIT_UNTIL_EQUAL([ovs-appctl dpctl/dump-flows | sed 's/.*core: [[0-9]]*//' | strip_xout_keep_actions], [ +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.0.2/255.255.0.0,frag=no), packets:0, bytes:0, used:never, actions:ct(commit) +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.2.2/255.255.255.0,frag=no), packets:0, bytes:0, used:0.0s, actions:ct(commit)]) + +dnl Send more 10.1.0.2 to make 10.1.0.0/16 tuple prepend 10.1.2.0/24 tuple in the pvector of subtables. +for i in $(seq 0 256); do + AT_CHECK([ovs-appctl netdev-dummy/receive p1 'ipv4(src=10.0.0.1,dst=10.1.0.2,proto=6),tcp(src=1,dst=2)']) +done + +dnl Warp time enough to trigger subtable optimization. +AT_CHECK([ovs-appctl time/warp 500 2000], [0], [ignore]) + +AT_CHECK([echo 'table=0,in_port=p1,ip,nw_dst=10.1.0.0/16 actions=p2' | dnl + ovs-ofctl --bundle replace-flows br0 -]) + +AT_CHECK([ovs-appctl revalidator/wait]) +AT_CHECK([ovs-appctl dpctl/dump-flows | sed 's/.*core: [[0-9]]*//' | strip_xout_keep_actions], [0], [ +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.0.2/255.255.0.0,frag=no), packets:0, bytes:0, used:0.0s, actions:2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(dst=10.1.2.2/255.255.255.0,frag=no), packets:0, bytes:0, used:0.0s, actions:2 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/system-dpdk-macros.at b/tests/system-dpdk-macros.at index 53fbc1320..3920f08a5 100644 --- a/tests/system-dpdk-macros.at +++ b/tests/system-dpdk-macros.at @@ -42,7 +42,7 @@ m4_define([OVS_DPDK_START], OVS_DPDK_START_OVSDB() dnl Enable DPDK functionality AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true]) - OVS_DPDK_START_VSWITCHD() + OVS_DPDK_START_VSWITCHD($1) ]) # OVS_DPDK_START_OVSDB() @@ -72,7 +72,7 @@ m4_define([OVS_DPDK_START_OVSDB], # m4_define([OVS_DPDK_START_VSWITCHD], [dnl Change DPDK drivers log levels so that tests only catch errors - AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-extra=--log-level=pmd.*:error]) + AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-extra="--log-level=pmd.*:error $1"]) dnl Start ovs-vswitchd. AT_CHECK([ovs-vswitchd --detach --no-chdir --pidfile --log-file -vvconn -vofproto_dpif -vunixctl], [0], [stdout], [stderr]) diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at index cb6c6d590..0f58e8574 100644 --- a/tests/system-dpdk.at +++ b/tests/system-dpdk.at @@ -32,7 +32,7 @@ dnl Check if EAL init is successful AT_SETUP([OVS-DPDK - EAL init]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) AT_CHECK([grep "DPDK Enabled - initializing..." ovs-vswitchd.log], [], [stdout]) AT_CHECK([grep "EAL" ovs-vswitchd.log], [], [stdout]) AT_CHECK([grep "DPDK Enabled - initialized" ovs-vswitchd.log], [], [stdout]) @@ -69,7 +69,7 @@ dnl Add vhost-user-client port AT_SETUP([OVS-DPDK - add vhost-user-client port]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Add userspace bridge and attach it to OVS AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev]) @@ -98,7 +98,7 @@ AT_SETUP([OVS-DPDK - ping vhost-user ports]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() AT_SKIP_IF([! which dpdk-testpmd >/dev/null 2>/dev/null]) -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Find number of sockets AT_CHECK([lscpu], [], [stdout]) @@ -174,7 +174,7 @@ AT_SETUP([OVS-DPDK - ping vhost-user-client ports]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() AT_SKIP_IF([! which dpdk-testpmd >/dev/null 2>/dev/null]) -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Find number of sockets AT_CHECK([lscpu], [], [stdout]) @@ -309,7 +309,7 @@ AT_SETUP([OVS-DPDK - Ingress policing create delete vport port]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Add userspace bridge and attach it to OVS and add ingress policer AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev]) @@ -352,7 +352,7 @@ AT_SETUP([OVS-DPDK - Ingress policing no policing rate]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Add userspace bridge and attach it to OVS and add ingress policer AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev]) @@ -393,7 +393,7 @@ AT_SETUP([OVS-DPDK - Ingress policing no policing burst]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Add userspace bridge and attach it to OVS and add ingress policer AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev]) @@ -465,7 +465,7 @@ AT_SETUP([OVS-DPDK - QoS create delete vport port]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Add userspace bridge and attach it to OVS and add egress policer AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev]) @@ -506,7 +506,7 @@ AT_SETUP([OVS-DPDK - QoS no cir]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Add userspace bridge and attach it to OVS and add egress policer AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev]) @@ -541,7 +541,7 @@ AT_SETUP([OVS-DPDK - QoS no cbs]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Add userspace bridge and attach it to OVS and add egress policer AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev]) @@ -661,7 +661,7 @@ AT_KEYWORDS([dpdk]) AT_SKIP_IF([! which dpdk-testpmd >/dev/null 2>/dev/null]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Find number of sockets AT_CHECK([lscpu], [], [stdout]) @@ -717,7 +717,7 @@ AT_KEYWORDS([dpdk]) AT_SKIP_IF([! which dpdk-testpmd >/dev/null 2>/dev/null]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Find number of sockets AT_CHECK([lscpu], [], [stdout]) @@ -856,7 +856,7 @@ AT_KEYWORDS([dpdk]) AT_SKIP_IF([! which dpdk-testpmd >/dev/null 2>/dev/null]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Find number of sockets AT_CHECK([lscpu], [], [stdout]) @@ -908,7 +908,7 @@ AT_KEYWORDS([dpdk]) AT_SKIP_IF([! which dpdk-testpmd >/dev/null 2>/dev/null]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) dnl Find number of sockets AT_CHECK([lscpu], [], [stdout]) @@ -963,7 +963,7 @@ dnl MFEX Autovalidator AT_SETUP([OVS-DPDK - MFEX Autovalidator]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) AT_CHECK([ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev]) AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl ]) @@ -996,7 +996,7 @@ dnl MFEX Autovalidator Fuzzy AT_SETUP([OVS-DPDK - MFEX Autovalidator Fuzzy]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) AT_CHECK([ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev]) AT_SKIP_IF([! ovs-appctl dpif-netdev/miniflow-parser-get | sed 1,4d | grep "True"], [], [dnl ]) @@ -1032,7 +1032,7 @@ AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() AT_SKIP_IF([! $PYTHON3 -c "import scapy"], [], []) AT_CHECK([$PYTHON3 $srcdir/mfex_fuzzy.py test_traffic.pcap 1], [], [stdout]) -OVS_DPDK_START() +OVS_DPDK_START([--no-pci]) AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . other_config:pmd-cpu-mask=0x1]) dnl Add userspace bridge and attach it to OVS AT_CHECK([ovs-vsctl add-br br0 -- set bridge br0 datapath_type=netdev]) @@ -1153,7 +1153,7 @@ AT_SETUP([OVS-DPDK - user configured mempool]) AT_KEYWORDS([dpdk]) OVS_DPDK_PRE_CHECK() OVS_DPDK_START_OVSDB() -OVS_DPDK_START_VSWITCHD() +OVS_DPDK_START_VSWITCHD([--no-pci]) AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . other_config:shared-mempool-config=8000,6000,1500]) AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . other_config:dpdk-init=true]) diff --git a/tests/system-interface.at b/tests/system-interface.at index 784bada12..15e789a24 100644 --- a/tests/system-interface.at +++ b/tests/system-interface.at @@ -63,3 +63,119 @@ AT_CHECK([ [stdout], [Device "br-p1" does not exist.] ) AT_CLEANUP + +AT_SETUP([interface - datapath ports garbage collection]) +OVS_CHECK_GENEVE() +OVS_TRAFFIC_VSWITCHD_START() + +dnl Not relevant for userspace datapath. +AT_SKIP_IF([! ovs-appctl dpctl/show | grep -q ovs-system]) + +AT_CHECK([ovs-vsctl add-port br0 tunnel_port dnl + -- set Interface tunnel_port dnl + type=geneve options:remote_ip=flow options:key=123]) + +AT_CHECK([ip link add ovs-veth0 type veth peer name ovs-veth1]) +on_exit 'ip link del ovs-veth0' + +AT_CHECK([ovs-vsctl add-port br0 ovs-veth0]) + +OVS_WAIT_UNTIL([ip link show | grep -q " genev_sys_[[0-9]]*: .* ovs-system "]) + +dnl Store the output of ip link for geneve port to compare ifindex later. +AT_CHECK([ip link show | grep " genev_sys_[[0-9]]*: .* ovs-system " > geneve.0]) + +AT_CHECK([ovs-appctl dpctl/show | grep port], [0], [dnl + port 0: ovs-system (internal) + port 1: br0 (internal) + port 2: genev_sys_6081 (geneve: packet_type=ptap) + port 3: ovs-veth0 +]) + +OVS_APP_EXIT_AND_WAIT_BY_TARGET([ovs-vswitchd], [ovs-vswitchd.pid]) + +dnl Check that geneve backing interface is still in the datapath. +AT_CHECK([ip link show | grep " genev_sys_[[0-9]]*: .* ovs-system " | diff -u - geneve.0]) + +dnl Remove the veth port from the database while ovs-vswitchd is down. +AT_CHECK([ovs-vsctl --no-wait del-port ovs-veth0]) + +dnl Check that it is still tied to the OVS datapath. +AT_CHECK([ip link show ovs-veth0 | grep -q ovs-system]) + +dnl Bring ovs-vswitchd back up. +AT_CHECK([ovs-vswitchd --detach --no-chdir --pidfile --log-file -vdpif:dbg], + [0], [], [stderr]) + +dnl Wait for the veth port to be removed from the datapath. +OVS_WAIT_WHILE([ip link show ovs-veth0 | grep -q ovs-system]) + +AT_CHECK([ovs-appctl dpctl/show | grep port], [0], [dnl + port 0: ovs-system (internal) + port 1: br0 (internal) + port 2: genev_sys_6081 (geneve: packet_type=ptap) +]) + +dnl Check that geneve backing interface is still in the datapath and it wasn't +dnl re-created, i.e. the ifindex is the same. +AT_CHECK([ip link show | grep " genev_sys_[[0-9]]*: .* ovs-system " | diff -u - geneve.0]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([interface - datapath port rename]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Not relevant for userspace datapath. +AT_SKIP_IF([! ovs-appctl dpctl/show | grep -q ovs-system]) + +AT_CHECK([ip link add ovs-veth0 type veth peer name ovs-veth1]) +dnl We will rename ovs-veth0, so removing the peer on exit. +on_exit 'ip link del ovs-veth1' + +AT_CHECK([ovs-vsctl add-port br0 ovs-veth0]) + +OVS_WAIT_UNTIL([ip link show | grep -q "ovs-veth0.* ovs-system "]) + +AT_CHECK([ovs-appctl dpctl/show | grep port], [0], [dnl + port 0: ovs-system (internal) + port 1: br0 (internal) + port 2: ovs-veth0 +]) + +dnl Rename the interface while attached to OVS. +AT_CHECK([ip l set ovs-veth0 name ovs-new-port]) + +dnl Wait for the port to be detached from the OVS datapath. +OVS_WAIT_UNTIL([ip link show | grep "ovs-new-port" | grep -v "ovs-system"]) + +dnl Check that database indicates the error. +AT_CHECK([ovs-vsctl get interface ovs-veth0 error], [0], [dnl +"could not open network device ovs-veth0 (No such device)" +]) + +dnl Check that the port is no longer in the datapath. +AT_CHECK([ovs-appctl dpctl/show | grep port], [0], [dnl + port 0: ovs-system (internal) + port 1: br0 (internal) +]) + +dnl Rename the interface back and check that it is in use again. +AT_CHECK([ip l set ovs-new-port name ovs-veth0]) + +OVS_WAIT_UNTIL([ip link show | grep -q "ovs-veth0.* ovs-system "]) + +AT_CHECK([ovs-vsctl get interface ovs-veth0 error], [0], [dnl +[[]] +]) + +AT_CHECK([ovs-appctl dpctl/show | grep port], [0], [dnl + port 0: ovs-system (internal) + port 1: br0 (internal) + port 2: ovs-veth0 +]) + +OVS_TRAFFIC_VSWITCHD_STOP([" + /could not open network device ovs-veth0 (No such device)/d +"]) +AT_CLEANUP diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index 11920e60b..cbec8de02 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -112,6 +112,17 @@ m4_define([CHECK_CONNTRACK_ZEROIP_SNAT], AT_SKIP_IF([test "$IS_WIN32" = "yes"]) ]) +# CHECK_CONNTRACK_SCTP() +# +# Perform requirements checks for running conntrack SCTP. The kernel +# optionally support nf proto sctp. +# +m4_define([CHECK_CONNTRACK_SCTP], +[ + AT_SKIP_IF([test "$IS_WIN32" = "yes"]) + AT_SKIP_IF([! test -e /proc/sys/net/netfilter/nf_conntrack_sctp_timeout_closed]) +]) + # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. @@ -224,3 +235,13 @@ m4_define([VSCTL_ADD_DATAPATH_TABLE], # or necessary for the userspace datapath as it is checking for a kernel # specific regression. m4_define([CHECK_L3L4_CONNTRACK_REASM]) + +# OVS_CHECK_BAREUDP() +# +# The feature needs to be enabled in the kernel configuration (CONFIG_BAREUDP) +# to work. +m4_define([OVS_CHECK_BAREUDP], +[ + AT_SKIP_IF([! ip link add dev ovs_bareudp0 type bareudp dstport 6635 ethertype mpls_uc 2>&1 >/dev/null]) + AT_CHECK([ip link del dev ovs_bareudp0]) +]) diff --git a/tests/system-layer3-tunnels.at b/tests/system-layer3-tunnels.at index c37852b21..81123f730 100644 --- a/tests/system-layer3-tunnels.at +++ b/tests/system-layer3-tunnels.at @@ -154,7 +154,7 @@ OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([layer3 - ping over MPLS Bareudp]) -OVS_CHECK_MIN_KERNEL(5, 7) +OVS_CHECK_BAREUDP() OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) ADD_NAMESPACES(at_ns0, at_ns1) @@ -202,7 +202,7 @@ OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP AT_SETUP([layer3 - ping over Bareudp]) -OVS_CHECK_MIN_KERNEL(5, 7) +OVS_CHECK_BAREUDP() OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) ADD_NAMESPACES(at_ns0, at_ns1) diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at index d36da0580..8dd3bdf88 100644 --- a/tests/system-offloads-traffic.at +++ b/tests/system-offloads-traffic.at @@ -742,3 +742,118 @@ recirc_id(),in_port(3),eth_type(0x0800),ipv4(frag=no), packets:29, bytes OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP + + +AT_SETUP([offloads - offload flow to none-offload]) +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true]) + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +AT_DATA([flows.txt], [dnl +add in_port=ovs-p0,actions=ovs-p1 +add in_port=ovs-p1,actions=ovs-p0 +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +10 packets transmitted, 10 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc | grep "eth_type(0x0800)" | sort | strip_recirc | strip_used], [0], [dnl +recirc_id(),in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.0s, actions:3 +recirc_id(),in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.0s, actions:2 +]) + +dnl Here we use an output action with truncate, which will force a kernel flow. +AT_DATA([flows2.txt], [dnl +modify in_port=ovs-p0,actions=output(port=ovs-p1, max_len=128) +modify in_port=ovs-p1,actions=output(port=ovs-p0, max_len=128) +]) +AT_CHECK([ovs-ofctl add-flows br0 flows2.txt]) +AT_CHECK([ovs-appctl revalidator/wait], [0]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +10 packets transmitted, 10 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-flows type=ovs | grep "eth_type(0x0800)" | sort | strip_recirc | strip_used], [0], [dnl +recirc_id(),in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:10, bytes:980, used:0.0s, actions:trunc(128),3 +recirc_id(),in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:10, bytes:980, used:0.0s, actions:trunc(128),2 +]) + +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) +AT_CHECK([ovs-appctl revalidator/wait], [0]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +10 packets transmitted, 10 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl dpctl/dump-flows type=tc | grep "eth_type(0x0800)" | sort | strip_recirc | strip_used], [0], [dnl +recirc_id(),in_port(2),eth(),eth_type(0x0800),ipv4(frag=no), packets:10, bytes:840, used:0.0s, actions:3 +recirc_id(),in_port(3),eth(),eth_type(0x0800),ipv4(frag=no), packets:10, bytes:840, used:0.0s, actions:2 +]) + +AT_CHECK([ovs-appctl coverage/read-counter ukey_invalid_stat_reset], [0], [dnl +0 +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([offloads - delete ufid mapping if device not exist - offloads enabled]) +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) + +ADD_NAMESPACES(at_ns0, at_ns1, at_ns2) + +dnl Disable IPv6 to skip unexpected flow +AT_CHECK([sysctl -w net.ipv6.conf.br0.disable_ipv6=1], [0], [ignore]) +NS_CHECK_EXEC([at_ns0], [sysctl -w net.ipv6.conf.all.disable_ipv6=1], [0], [ignore]) +NS_CHECK_EXEC([at_ns1], [sysctl -w net.ipv6.conf.all.disable_ipv6=1], [0], [ignore]) +NS_CHECK_EXEC([at_ns2], [sysctl -w net.ipv6.conf.all.disable_ipv6=1], [0], [ignore]) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "aa:1a:54:e9:c5:56") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +NS_CHECK_EXEC([at_ns0], [ping -q -c 2 -i 0.2 10.1.1.2 | FORMAT_PING], [0], [dnl +2 packets transmitted, 2 received, 0% packet loss, time 0ms +]) + +dnl Delete and add interface ovs-p0/p0 +AT_CHECK([ip link del dev ovs-p0]) +AT_CHECK([ip link add p0 type veth peer name ovs-p0 || return 77]) +AT_CHECK([ip link set p0 netns at_ns0]) +AT_CHECK([ip link set dev ovs-p0 up]) +NS_CHECK_EXEC([at_ns0], [ip addr add dev p0 "10.1.1.1/24"]) +NS_CHECK_EXEC([at_ns0], [ip link set dev p0 up]) +NS_CHECK_EXEC([at_ns0], [ip link set dev p0 address "aa:1a:54:e9:c5:56"]) + +AT_CHECK([ovs-appctl revalidator/purge], [0]) + +dnl Generate flows to trigger the hmap expand once +ADD_VETH(p2, at_ns2, br0, "10.1.1.3/24") +NS_CHECK_EXEC([at_ns0], [ping -q -c 2 -i 0.2 10.1.1.2 | FORMAT_PING], [0], [dnl +2 packets transmitted, 2 received, 0% packet loss, time 0ms +]) +NS_CHECK_EXEC([at_ns0], [ping -q -c 2 -i 0.2 10.1.1.3 | FORMAT_PING], [0], [dnl +2 packets transmitted, 2 received, 0% packet loss, time 0ms +]) + +AT_CHECK([ovs-appctl revalidator/purge], [0]) +dnl Fix purge fail occasionally +AT_CHECK([ovs-appctl revalidator/purge], [0]) + +AT_CHECK([test $(ovs-appctl dpctl/dump-flows | grep -c "eth_type(0x0800)") -eq 0], [0], [ignore]) + +OVS_TRAFFIC_VSWITCHD_STOP(["/could not open network device ovs-p0/d +/on nonexistent port/d +/failed to flow_get/d +/Failed to acquire udpif_key/d +/No such device/d +/failed to offload flow/d +"]) +AT_CLEANUP diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 221d96aef..0f0970a31 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -2343,6 +2343,7 @@ AT_CLEANUP AT_SETUP([conntrack - ct flush]) CHECK_CONNTRACK() +CHECK_CONNTRACK_SCTP() OVS_TRAFFIC_VSWITCHD_START() ADD_NAMESPACES(at_ns0, at_ns1) @@ -2353,15 +2354,15 @@ ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") AT_DATA([flows.txt], [dnl priority=1,action=drop priority=10,arp,action=normal -priority=100,in_port=1,udp,action=ct(commit),2 -priority=100,in_port=2,udp,action=ct(zone=5,commit),1 -priority=100,in_port=1,icmp,action=ct(commit),2 -priority=100,in_port=2,icmp,action=ct(zone=5,commit),1 +priority=100,in_port=1,ip,action=ct(commit),2 +priority=100,in_port=2,ip,action=ct(zone=5,commit),1 ]) AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) +dp=$(ovs-appctl dpctl/dump-dps) m4_foreach([FLUSH_CMD], [[ovs-appctl dpctl/flush-conntrack], + [ovs-appctl dpctl/flush-conntrack $dp], [ovs-ofctl ct-flush br0]], [ AS_BOX([Testing with FLUSH_CMD]) @@ -2503,9 +2504,68 @@ udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10. AT_CHECK([FLUSH_CMD zone=5 '' 'ct_nw_src=10.1.1.1']) +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) + +dnl Test UDP from port 1 and 2, flush without arguments +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101020a0101010002000100080000 actions=resubmit(,0)"]) + + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sort], [0], [dnl +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) +udp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([FLUSH_CMD]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) + +dnl Test SCTP flush based on port. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500003400010000408464410a0101010a01010200010002000000009178f7d30100001470e18ccc00000000000a000a00000000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000950540000000a08004500003400010000408464410a0101020a010101000200010000000098f29e470100001470e18ccc00000000000a000a00000000 actions=resubmit(,0)"]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sed "s/,protoinfo=.*$//" | sort], [0], [dnl +sctp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),reply=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1) +sctp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([FLUSH_CMD 'ct_nw_src=10.1.1.1,ct_nw_proto=132,ct_tp_src=1,ct_tp_dst=2']) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1" | sed "s/,protoinfo=.*$//" | sort], [0], [dnl +sctp,orig=(src=10.1.1.2,dst=10.1.1.1,sport=2,dport=1),reply=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=2),zone=5 +]) + +AT_CHECK([FLUSH_CMD 'ct_nw_src=10.1.1.2,ct_nw_proto=132,ct_tp_src=2,ct_tp_dst=1']) + AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "10\.1\.1\.1"], [1]) ]) +dnl Test flush with invalid arguments + +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=invalid 'ct_nw_src=10.1.1.1' 'ct_nw_dst=10.1.1.1'], [2], [ignore], [stderr]) +AT_CHECK([grep -q "failed to parse zone" stderr]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=1 'ct_nw_src=10.1.1.1,invalid=invalid' 'ct_nw_dst=10.1.1.1'], [2], [ignore], [stderr]) +AT_CHECK([grep -q "invalid conntrack tuple field: invalid" stderr]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=1 'ct_nw_src=invalid' 'ct_nw_dst=10.1.1.1'], [2], [ignore], [stderr]) +AT_CHECK([grep -q "failed to parse field ct_nw_src" stderr]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=1 'ct_nw_src=10.1.1.1' 'ct_nw_dst=10.1.1.1' invalid], [2], [ignore], [stderr]) +AT_CHECK([grep -q "invalid arguments" stderr]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack $dp zone=1 'ct_nw_src=10.1.1.1' 'ct_nw_dst=10.1.1.1' invalid], [2], [ignore], [stderr]) +AT_CHECK([grep -q "command takes at most 4 arguments" stderr]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack $dp 'ct_nw_src=10.1.1.1' 'ct_nw_dst=10.1.1.1' invalid], [2], [ignore], [stderr]) +AT_CHECK([grep -q "invalid arguments" stderr]) + +AT_CHECK([ovs-ofctl ct-flush br0 zone=1 'ct_nw_src=10.1.1.1' 'ct_nw_dst=10.1.1.1' invalid], [1], [ignore], [stderr]) +AT_CHECK([grep -q "command takes at most 4 arguments" stderr]) + +AT_CHECK([ovs-ofctl ct-flush br0 'ct_nw_src=10.1.1.1' 'ct_nw_dst=10.1.1.1' invalid], [1], [ignore], [stderr]) +AT_CHECK([grep -q "Invalid arguments" stderr]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP @@ -7220,7 +7280,7 @@ table=2,in_port=ovs-server,ip,ct_state=+trk+rpl,actions=output:ovs-client AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) rm server.pcap -OVS_DAEMONIZE([tcpdump -l -U -i ovs-server -w server.pcap 2>tcpdump0_err], [tcpdump0.pid]) +NETNS_DAEMONIZE([server], [tcpdump -l -U -i server -w server.pcap 2>tcpdump0_err], [tcpdump0.pid]) OVS_WAIT_UNTIL([grep "listening" tcpdump0_err]) dnl Send UDP client->server @@ -7262,7 +7322,7 @@ dnl Check the ICMP error in reply direction AT_CHECK([ovs-appctl dpctl/flush-conntrack zone=42]) rm client.pcap -OVS_DAEMONIZE([tcpdump -l -U -i ovs-client -w client.pcap 2>tcpdump1_err], [tcpdump1.pid]) +NETNS_DAEMONIZE([client], [tcpdump -l -U -i client -w client.pcap 2>tcpdump1_err], [tcpdump1.pid]) OVS_WAIT_UNTIL([grep "listening" tcpdump1_err]) dnl Send UDP client->server diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index b34a84775..2db62bf8d 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -106,6 +106,16 @@ m4_define([CHECK_CONNTRACK_NAT]) # m4_define([CHECK_CONNTRACK_ZEROIP_SNAT]) +# CHECK_CONNTRACK_SCTP() +# +# Perform requirements checks for running conntrack SCTP. The userspace +# datapath does not support SCTP. +# +m4_define([CHECK_CONNTRACK_SCTP], +[ + AT_SKIP_IF([:]) +]) + # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. @@ -325,3 +335,11 @@ m4_define([CHECK_L3L4_CONNTRACK_REASM], [ AT_SKIP_IF([:]) ]) + +# OVS_CHECK_BAREUDP() +# +# The userspace datapath does not support bareudp tunnels. +m4_define([OVS_CHECK_BAREUDP], +[ + AT_SKIP_IF([:]) +]) diff --git a/tests/test-barrier.c b/tests/test-barrier.c index 3bc5291cc..fb0ab0e69 100644 --- a/tests/test-barrier.c +++ b/tests/test-barrier.c @@ -14,13 +14,13 @@ * limitations under the License. */ -#include - #include +#undef NDEBUG +#include -#include "ovs-thread.h" -#include "ovs-rcu.h" #include "ovstest.h" +#include "ovs-rcu.h" +#include "ovs-thread.h" #include "random.h" #include "util.h" diff --git a/tests/test-id-fpool.c b/tests/test-id-fpool.c index 25275d9ae..27800aa9b 100644 --- a/tests/test-id-fpool.c +++ b/tests/test-id-fpool.c @@ -14,12 +14,12 @@ * limitations under the License. */ +#include #undef NDEBUG #include #include #include - -#include +#include #include "command-line.h" #include "id-fpool.h" diff --git a/tests/test-mpsc-queue.c b/tests/test-mpsc-queue.c index a38bf9e6d..16aa804a0 100644 --- a/tests/test-mpsc-queue.c +++ b/tests/test-mpsc-queue.c @@ -14,12 +14,12 @@ * limitations under the License. */ +#include #undef NDEBUG #include #include #include - -#include +#include #include "command-line.h" #include "guarded-list.h" diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index 1bc5ac17a..c761822e6 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -870,7 +870,8 @@ do_parse_rows(struct ovs_cmdl_context *ctx) row = ovsdb_row_create(table); json = unbox_json(parse_json(ctx->argv[i])); - check_ovsdb_error(ovsdb_row_from_json(row, json, NULL, &columns)); + check_ovsdb_error(ovsdb_row_from_json(row, json, NULL, + &columns, false)); json_destroy(json); print_and_free_json(ovsdb_row_to_json(row, &all_columns)); @@ -937,7 +938,7 @@ do_compare_rows(struct ovs_cmdl_context *ctx) } names[i] = xstrdup(json->array.elems[0]->string); check_ovsdb_error(ovsdb_row_from_json(rows[i], json->array.elems[1], - NULL, NULL)); + NULL, NULL, false)); json_destroy(json); } for (i = 0; i < n_rows; i++) { @@ -1050,7 +1051,7 @@ do_evaluate_condition__(struct ovs_cmdl_context *ctx, int mode) for (i = 0; i < n_rows; i++) { rows[i] = ovsdb_row_create(table); check_ovsdb_error(ovsdb_row_from_json(rows[i], json->array.elems[i], - NULL, NULL)); + NULL, NULL, false)); } json_destroy(json); @@ -1224,7 +1225,7 @@ do_execute_mutations(struct ovs_cmdl_context *ctx) for (i = 0; i < n_rows; i++) { rows[i] = ovsdb_row_create(table); check_ovsdb_error(ovsdb_row_from_json(rows[i], json->array.elems[i], - NULL, NULL)); + NULL, NULL, false)); } json_destroy(json); @@ -1338,7 +1339,7 @@ do_query(struct ovs_cmdl_context *ctx) struct ovsdb_row *row = ovsdb_row_create(table); uuid_generate(ovsdb_row_get_uuid_rw(row)); check_ovsdb_error(ovsdb_row_from_json(row, json->array.elems[i], - NULL, NULL)); + NULL, NULL, false)); if (ovsdb_table_get_row(table, ovsdb_row_get_uuid(row))) { ovs_fatal(0, "duplicate UUID "UUID_FMT" in table", UUID_ARGS(ovsdb_row_get_uuid(row))); @@ -1445,7 +1446,7 @@ do_query_distinct(struct ovs_cmdl_context *ctx) row = ovsdb_row_create(table); uuid_generate(ovsdb_row_get_uuid_rw(row)); check_ovsdb_error(ovsdb_row_from_json(row, json->array.elems[i], - NULL, NULL)); + NULL, NULL, false)); /* Initialize row and find equivalence class. */ rows[i].uuid = *ovsdb_row_get_uuid(row); diff --git a/tests/testsuite.at b/tests/testsuite.at index cf4e3eadf..9d77a9f51 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -77,3 +77,4 @@ m4_include([tests/packet-type-aware.at]) m4_include([tests/nsh.at]) m4_include([tests/drop-stats.at]) m4_include([tests/pytest.at]) +m4_include([tests/learning-switch.at]) diff --git a/utilities/ovs-appctl-bashcomp.bash b/utilities/ovs-appctl-bashcomp.bash index 4384be8ae..0a9af1a18 100644 --- a/utilities/ovs-appctl-bashcomp.bash +++ b/utilities/ovs-appctl-bashcomp.bash @@ -223,6 +223,13 @@ printf_stderr() { # The code below is taken from Peter Amidon. His change makes it more # robust. extract_bash_prompt() { + # On Bash 4.4+ just use the @P expansion + if ((BASH_VERSINFO[0] > 4 || + (BASH_VERSINFO[0] == 4 && BASH_VERSINFO[1] >= 4))); then + _BASH_PROMPT="${PS1@P}" + return + fi + local myPS1 v myPS1="$(sed 's/Begin prompt/\\Begin prompt/; s/End prompt/\\End prompt/' <<< "$PS1")" diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index eabec18a3..3ce4e82ec 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -3089,6 +3089,10 @@ ofctl_ct_flush(struct ovs_cmdl_context *ctx) args--; } + if (args > 0) { + ovs_fatal(0, "Invalid arguments"); + } + open_vconn(ctx->argv[1], &vconn); enum ofp_version version = vconn_get_version(vconn); struct ofpbuf *msg = ofp_ct_match_encode(&match, pzone, version); diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in index a49ec9f94..4cbd9a5d3 100755 --- a/utilities/ovs-tcpdump.in +++ b/utilities/ovs-tcpdump.in @@ -96,6 +96,10 @@ def _install_dst_if_linux(tap_name, mtu_value=None): *(['ip', 'link', 'set', 'dev', str(tap_name), 'up'])) pipe.wait() + pipe = _doexec( + *(['ip', '-6', 'addr', 'flush', 'dev', str(tap_name)])) + pipe.wait() + def _remove_dst_if_linux(tap_name): _doexec( @@ -538,6 +542,17 @@ def main(): print(data.decode('utf-8')) raise KeyboardInterrupt except KeyboardInterrupt: + # If there is a pipe behind ovs-tcpdump (such as ovs-tcpdump + # -i eth0 | grep "192.168.1.1"), the pipe is no longer available + # after received Ctrl+C. + # If we write data to an unavailable pipe, a pipe error will be + # reported, so we turn off stdout to avoid subsequent flushing + # of data into the pipe. + try: + sys.stdout.close() + except IOError: + pass + if pipes.poll() is None: pipes.terminate() diff --git a/utilities/ovs-vsctl-bashcomp.bash b/utilities/ovs-vsctl-bashcomp.bash index fc8245bfb..c5ad24fb7 100644 --- a/utilities/ovs-vsctl-bashcomp.bash +++ b/utilities/ovs-vsctl-bashcomp.bash @@ -413,6 +413,13 @@ _ovs_vsctl_get_PS1 () { return; fi + # On Bash 4.4+ just use the @P expansion + if ((BASH_VERSINFO[0] > 4 || + (BASH_VERSINFO[0] == 4 && BASH_VERSINFO[1] >= 4))); then + printf '%s\n' "${PS1@P}" + return + fi + # Original inspiration from # http://stackoverflow.com/questions/10060500/bash-how-to-evaluate-ps1-ps2, # but changed quite a lot to make it more robust. diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index bfb2adef1..0deca14b9 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -832,6 +832,9 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) ofproto_set_min_revalidate_pps( smap_get_uint(&ovs_cfg->other_config, "min-revalidate-pps", OFPROTO_MIN_REVALIDATE_PPS_DEFAULT)); + ofproto_set_offloaded_stats_delay( + smap_get_uint(&ovs_cfg->other_config, "offloaded-stats-delay", + OFPROTO_OFFLOADED_STATS_DELAY)); ofproto_set_vlan_limit(smap_get_int(&ovs_cfg->other_config, "vlan-limit", LEGACY_MAX_VLAN_HEADERS)); ofproto_set_bundle_idle_timeout(smap_get_uint(&ovs_cfg->other_config, diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c index 407bfc60e..2b2afb44c 100644 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@ -65,19 +65,19 @@ static unixctl_cb_func ovs_vswitchd_exit; static char *parse_options(int argc, char *argv[], char **unixctl_path); OVS_NO_RETURN static void usage(void); -struct ovs_vswitchd_exit_args { - bool *exiting; - bool *cleanup; -}; +static struct ovs_vswitchd_exit_args { + struct unixctl_conn **conns; + size_t n_conns; + bool exiting; + bool cleanup; +} exit_args; int main(int argc, char *argv[]) { - char *unixctl_path = NULL; struct unixctl_server *unixctl; + char *unixctl_path = NULL; char *remote; - bool exiting, cleanup; - struct ovs_vswitchd_exit_args exit_args = {&exiting, &cleanup}; int retval; set_program_name(argv[0]); @@ -108,14 +108,12 @@ main(int argc, char *argv[]) exit(EXIT_FAILURE); } unixctl_command_register("exit", "[--cleanup]", 0, 1, - ovs_vswitchd_exit, &exit_args); + ovs_vswitchd_exit, NULL); bridge_init(remote); free(remote); - exiting = false; - cleanup = false; - while (!exiting) { + while (!exit_args.exiting) { OVS_USDT_PROBE(main, run_start); memory_run(); if (memory_should_report()) { @@ -134,16 +132,22 @@ main(int argc, char *argv[]) bridge_wait(); unixctl_server_wait(unixctl); netdev_wait(); - if (exiting) { + if (exit_args.exiting) { poll_immediate_wake(); } OVS_USDT_PROBE(main, poll_block); poll_block(); if (should_service_stop()) { - exiting = true; + exit_args.exiting = true; } } - bridge_exit(cleanup); + bridge_exit(exit_args.cleanup); + + for (size_t i = 0; i < exit_args.n_conns; i++) { + unixctl_command_reply(exit_args.conns[i], NULL); + } + free(exit_args.conns); + unixctl_server_destroy(unixctl); service_stop(); vlog_disable_async(); @@ -295,10 +299,14 @@ usage(void) static void ovs_vswitchd_exit(struct unixctl_conn *conn, int argc, - const char *argv[], void *exit_args_) + const char *argv[], void *args OVS_UNUSED) { - struct ovs_vswitchd_exit_args *exit_args = exit_args_; - *exit_args->exiting = true; - *exit_args->cleanup = argc == 2 && !strcmp(argv[1], "--cleanup"); - unixctl_command_reply(conn, NULL); + exit_args.n_conns++; + exit_args.conns = xrealloc(exit_args.conns, + exit_args.n_conns * sizeof *exit_args.conns); + exit_args.conns[exit_args.n_conns - 1] = conn; + exit_args.exiting = true; + if (!exit_args.cleanup) { + exit_args.cleanup = argc == 2 && !strcmp(argv[1], "--cleanup"); + } } diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 2b57fc0e3..1204805b1 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -215,6 +215,19 @@

+ +

+ Set worst case delay (in ms) it might take before statistics of + offloaded flows are updated. Offloaded flows younger than this + delay will always be revalidated regardless of + . +

+

+ The default is 2000. +

+
+

@@ -6296,6 +6309,12 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ translated to an ephemeral port. If there is no collision, no SNAT is performed. + + True if the datapath supports CT flush OpenFlow Nicira extension + called NXT_CT_FLUSH. The NXT_CT_FLUSH + extensions allows to flush CT entries based on specified parameters. + Submodule dpdk 9dae7a15a..3812e23f6: diff --git a/dpdk/drivers/net/i40e/i40e_ethdev.c b/dpdk/drivers/net/i40e/i40e_ethdev.c index 7726a89d99..a982e42264 100644 --- a/dpdk/drivers/net/i40e/i40e_ethdev.c +++ b/dpdk/drivers/net/i40e/i40e_ethdev.c @@ -387,7 +387,6 @@ static int i40e_set_default_mac_addr(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr); static int i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); -static void i40e_set_mac_max_frame(struct rte_eth_dev *dev, uint16_t size); static int i40e_ethertype_filter_convert( const struct rte_eth_ethertype_filter *input, @@ -1711,6 +1710,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) */ i40e_add_tx_flow_control_drop_filter(pf); + /* Set the max frame size to 0x2600 by default, + * in case other drivers changed the default value. + */ + i40e_aq_set_mac_config(hw, I40E_FRAME_SIZE_MAX, TRUE, false, 0, NULL); + /* initialize RSS rule list */ TAILQ_INIT(&pf->rss_config_list); @@ -2328,7 +2332,6 @@ i40e_dev_start(struct rte_eth_dev *dev) uint32_t intr_vector = 0; struct i40e_vsi *vsi; uint16_t nb_rxq, nb_txq; - uint16_t max_frame_size; hw->adapter_stopped = 0; @@ -2467,9 +2470,6 @@ i40e_dev_start(struct rte_eth_dev *dev) "please call hierarchy_commit() " "before starting the port"); - max_frame_size = dev->data->mtu + I40E_ETH_OVERHEAD; - i40e_set_mac_max_frame(dev, max_frame_size); - return I40E_SUCCESS; tx_err: @@ -2809,9 +2809,6 @@ i40e_dev_set_link_down(struct rte_eth_dev *dev) return i40e_phy_conf_link(hw, abilities, speed, false); } -#define CHECK_INTERVAL 100 /* 100ms */ -#define MAX_REPEAT_TIME 10 /* 1s (10 * 100ms) in total */ - static __rte_always_inline void update_link_reg(struct i40e_hw *hw, struct rte_eth_link *link) { @@ -2878,6 +2875,8 @@ static __rte_always_inline void update_link_aq(struct i40e_hw *hw, struct rte_eth_link *link, bool enable_lse, int wait_to_complete) { +#define CHECK_INTERVAL 100 /* 100ms */ +#define MAX_REPEAT_TIME 10 /* 1s (10 * 100ms) in total */ uint32_t rep_cnt = MAX_REPEAT_TIME; struct i40e_link_status link_status; int status; @@ -6738,7 +6737,6 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) if (!ret) rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); - break; default: PMD_DRV_LOG(DEBUG, "Request %u is not supported yet", @@ -12123,40 +12121,6 @@ i40e_cloud_filter_qinq_create(struct i40e_pf *pf) return ret; } -static void -i40e_set_mac_max_frame(struct rte_eth_dev *dev, uint16_t size) -{ - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - uint32_t rep_cnt = MAX_REPEAT_TIME; - struct rte_eth_link link; - enum i40e_status_code status; - bool can_be_set = true; - - /* - * I40E_MEDIA_TYPE_BASET link up can be ignored - * I40E_MEDIA_TYPE_BASET link down that hw->phy.media_type - * is I40E_MEDIA_TYPE_UNKNOWN - */ - if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET && - hw->phy.media_type != I40E_MEDIA_TYPE_UNKNOWN) { - do { - update_link_reg(hw, &link); - if (link.link_status) - break; - rte_delay_ms(CHECK_INTERVAL); - } while (--rep_cnt); - can_be_set = !!link.link_status; - } - - if (can_be_set) { - status = i40e_aq_set_mac_config(hw, size, TRUE, 0, false, NULL); - if (status != I40E_SUCCESS) - PMD_DRV_LOG(ERR, "Failed to set max frame size at port level"); - } else { - PMD_DRV_LOG(ERR, "Set max frame size at port level not applicable on link down"); - } -} - RTE_LOG_REGISTER_SUFFIX(i40e_logtype_init, init, NOTICE); RTE_LOG_REGISTER_SUFFIX(i40e_logtype_driver, driver, NOTICE); #ifdef RTE_ETHDEV_DEBUG_RX